summaryrefslogtreecommitdiff
blob: 4752f448267832bd7e1d9a985d7f1773f9c7d24e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
commit 5efbf0d243984444cf352ad6f0d147e226c64498
Author: Sergei Trofimovich <slyfox@gentoo.org>
Date:   Thu Sep 1 17:34:58 2016 +0100

    restore -fmax-worker-args handling (Trac #11565)
    
    maxWorkerArgs handling was accidentally lost 3 years ago
    in a major update of demand analysis
        commit 0831a12ea2fc73c33652eeec1adc79fa19700578
    
    Old regression is noticeable as:
    - code bloat (requires stack reshuffling)
    - compilation slowdown (more code to optimise/generate)
    - and increased heap usage (DynFlags unboxing/reboxing?)
    
    On a simple compile benchmark this change causes heap
    allocation drop from 70G don to 67G (ghc perf build).
    
    Signed-off-by: Sergei Trofimovich <siarheit@google.com>
    
    Reviewers: simonpj, ezyang, goldfire, austin, bgamari
    
    Reviewed By: simonpj, ezyang
    
    Subscribers: thomie
    
    Differential Revision: https://phabricator.haskell.org/D2503
    
    GHC Trac Issues: #11565

diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs
index 10d5614..7166f57 100644
--- a/compiler/specialise/SpecConstr.hs
+++ b/compiler/specialise/SpecConstr.hs
@@ -29,7 +29,7 @@ import CoreFVs          ( exprsFreeVarsList )
 import CoreMonad
 import Literal          ( litIsLifted )
 import HscTypes         ( ModGuts(..) )
-import WwLib            ( mkWorkerArgs )
+import WwLib            ( isWorkerSmallEnough, mkWorkerArgs )
 import DataCon
 import Coercion         hiding( substCo )
 import Rules
@@ -1533,10 +1533,14 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs
 
   | Just all_calls <- lookupVarEnv bind_calls fn
   = -- pprTrace "specialise entry {" (ppr fn <+> ppr (length all_calls)) $
-    do  { (boring_call, pats) <- callsToPats env specs arg_occs all_calls
-
+    do  { (boring_call, all_pats) <- callsToPats env specs arg_occs all_calls
                 -- Bale out if too many specialisations
-        ; let n_pats      = length pats
+        ; let pats = filter (is_small_enough . fst) all_pats
+              is_small_enough vars = isWorkerSmallEnough (sc_dflags env) vars
+                  -- We are about to construct w/w pair in 'spec_one'.
+                  -- Omit specialisation leading to high arity workers.
+                  -- See Note [Limit w/w arity]
+              n_pats      = length pats
               spec_count' = n_pats + spec_count
         ; case sc_count env of
             Just max | not (sc_force env) && spec_count' > max
diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs
index 09bc204..d9460d9 100644
--- a/compiler/stranal/WwLib.hs
+++ b/compiler/stranal/WwLib.hs
@@ -8,6 +8,7 @@
 
 module WwLib ( mkWwBodies, mkWWstr, mkWorkerArgs
              , deepSplitProductType_maybe, findTypeShape
+             , isWorkerSmallEnough
  ) where
 
 #include "HsVersions.h"
@@ -144,7 +145,8 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots
               wrapper_body = wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var
               worker_body = mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args
 
-        ; if useful1 && not (only_one_void_argument) || useful2
+        ; if isWorkerSmallEnough dflags work_args
+             && (useful1 && not only_one_void_argument || useful2)
           then return (Just (worker_args_dmds, wrapper_body, worker_body))
           else return Nothing
         }
@@ -165,6 +167,12 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots
       | otherwise
       = False
 
+-- See Note [Limit w/w arity]
+isWorkerSmallEnough :: DynFlags -> [Var] -> Bool
+isWorkerSmallEnough dflags vars = count isId vars <= maxWorkerArgs dflags
+    -- We count only Free variables (isId) to skip Type, Kind
+    -- variables which have no runtime representation.
+
 {-
 Note [Always do CPR w/w]
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -178,6 +186,30 @@ a disaster, because then the enclosing function might say it has the CPR
 property, but now doesn't and there a cascade of disaster.  A good example
 is Trac #5920.
 
+Note [Limit w/w arity]
+~~~~~~~~~~~~~~~~~~~~~~~~
+Guard against high worker arity as it generates a lot of stack traffic.
+A simplified example is Trac #11565#comment:6
+
+Current strategy is very simple: don't perform w/w transformation at all
+if the result produces a wrapper with arity higher than -fmax-worker-args=.
+
+It is a bit all or nothing, consider
+
+        f (x,y) (a,b,c,d,e ... , z) = rhs
+
+Currently we will remove all w/w ness entirely. But actually we could
+w/w on the (x,y) pair... it's the huge product that is the problem.
+
+Could we instead refrain from w/w on an arg-by-arg basis? Yes, that'd
+solve f. But we can get a lot of args from deeply-nested products:
+
+        g (a, (b, (c, (d, ...)))) = rhs
+
+This is harder to spot on an arg-by-arg basis. Previously mkWwStr was
+given some "fuel" saying how many arguments it could add; when we ran
+out of fuel it would stop w/wing.
+Still not very clever because it had a left-right bias.
 
 ************************************************************************
 *                                                                      *