commit 5efbf0d243984444cf352ad6f0d147e226c64498 Author: Sergei Trofimovich Date: Thu Sep 1 17:34:58 2016 +0100 restore -fmax-worker-args handling (Trac #11565) maxWorkerArgs handling was accidentally lost 3 years ago in a major update of demand analysis commit 0831a12ea2fc73c33652eeec1adc79fa19700578 Old regression is noticeable as: - code bloat (requires stack reshuffling) - compilation slowdown (more code to optimise/generate) - and increased heap usage (DynFlags unboxing/reboxing?) On a simple compile benchmark this change causes heap allocation drop from 70G don to 67G (ghc perf build). Signed-off-by: Sergei Trofimovich Reviewers: simonpj, ezyang, goldfire, austin, bgamari Reviewed By: simonpj, ezyang Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2503 GHC Trac Issues: #11565 diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs index 10d5614..7166f57 100644 --- a/compiler/specialise/SpecConstr.hs +++ b/compiler/specialise/SpecConstr.hs @@ -29,7 +29,7 @@ import CoreFVs ( exprsFreeVarsList ) import CoreMonad import Literal ( litIsLifted ) import HscTypes ( ModGuts(..) ) -import WwLib ( mkWorkerArgs ) +import WwLib ( isWorkerSmallEnough, mkWorkerArgs ) import DataCon import Coercion hiding( substCo ) import Rules @@ -1533,10 +1533,14 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs | Just all_calls <- lookupVarEnv bind_calls fn = -- pprTrace "specialise entry {" (ppr fn <+> ppr (length all_calls)) $ - do { (boring_call, pats) <- callsToPats env specs arg_occs all_calls - + do { (boring_call, all_pats) <- callsToPats env specs arg_occs all_calls -- Bale out if too many specialisations - ; let n_pats = length pats + ; let pats = filter (is_small_enough . fst) all_pats + is_small_enough vars = isWorkerSmallEnough (sc_dflags env) vars + -- We are about to construct w/w pair in 'spec_one'. + -- Omit specialisation leading to high arity workers. + -- See Note [Limit w/w arity] + n_pats = length pats spec_count' = n_pats + spec_count ; case sc_count env of Just max | not (sc_force env) && spec_count' > max diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs index 09bc204..d9460d9 100644 --- a/compiler/stranal/WwLib.hs +++ b/compiler/stranal/WwLib.hs @@ -8,6 +8,7 @@ module WwLib ( mkWwBodies, mkWWstr, mkWorkerArgs , deepSplitProductType_maybe, findTypeShape + , isWorkerSmallEnough ) where #include "HsVersions.h" @@ -144,7 +145,8 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots wrapper_body = wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var worker_body = mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args - ; if useful1 && not (only_one_void_argument) || useful2 + ; if isWorkerSmallEnough dflags work_args + && (useful1 && not only_one_void_argument || useful2) then return (Just (worker_args_dmds, wrapper_body, worker_body)) else return Nothing } @@ -165,6 +167,12 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots | otherwise = False +-- See Note [Limit w/w arity] +isWorkerSmallEnough :: DynFlags -> [Var] -> Bool +isWorkerSmallEnough dflags vars = count isId vars <= maxWorkerArgs dflags + -- We count only Free variables (isId) to skip Type, Kind + -- variables which have no runtime representation. + {- Note [Always do CPR w/w] ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -178,6 +186,30 @@ a disaster, because then the enclosing function might say it has the CPR property, but now doesn't and there a cascade of disaster. A good example is Trac #5920. +Note [Limit w/w arity] +~~~~~~~~~~~~~~~~~~~~~~~~ +Guard against high worker arity as it generates a lot of stack traffic. +A simplified example is Trac #11565#comment:6 + +Current strategy is very simple: don't perform w/w transformation at all +if the result produces a wrapper with arity higher than -fmax-worker-args=. + +It is a bit all or nothing, consider + + f (x,y) (a,b,c,d,e ... , z) = rhs + +Currently we will remove all w/w ness entirely. But actually we could +w/w on the (x,y) pair... it's the huge product that is the problem. + +Could we instead refrain from w/w on an arg-by-arg basis? Yes, that'd +solve f. But we can get a lot of args from deeply-nested products: + + g (a, (b, (c, (d, ...)))) = rhs + +This is harder to spot on an arg-by-arg basis. Previously mkWwStr was +given some "fuel" saying how many arguments it could add; when we ran +out of fuel it would stop w/wing. +Still not very clever because it had a left-right bias. ************************************************************************ * *