{- (c) The University of Glasgow 2006 (c) The AQUA Project, Glasgow University, 1994-1998 This module contains inlining logic used by the simplifier. -} module GHC.Core.Opt.Simplify.Inline ( -- * Cheap and cheerful inlining checks. couldBeSmallEnoughToInline, smallEnoughToInline, activeUnfolding, -- * The smart inlining decisions are made by callSiteInline callSiteInline, CallCtxt(..), ) where import GHC.Prelude import GHC.Driver.Flags import GHC.Core.Opt.Simplify.Env import GHC.Core import GHC.Core.Unfold import GHC.Core.FVs( exprFreeIds ) import GHC.Types.Id import GHC.Types.Var.Env( InScopeSet, lookupInScope ) import GHC.Types.Var.Set import GHC.Types.Basic ( Arity, RecFlag(..), isActive ) import GHC.Utils.Logger import GHC.Utils.Misc import GHC.Utils.Outputable import GHC.Types.Name import Data.List (isPrefixOf) {- ************************************************************************ * * \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding} * * ************************************************************************ We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that we ``couldn't possibly use'' on the other side. Can be overridden w/ flaggery. Just the same as smallEnoughToInline, except that it has no actual arguments. -} couldBeSmallEnoughToInline :: UnfoldingOpts -> Int -> CoreExpr -> Bool couldBeSmallEnoughToInline opts threshold rhs = case sizeExpr opts threshold [] body of TooBig -> False _ -> True where (_, body) = collectBinders rhs ---------------- smallEnoughToInline :: UnfoldingOpts -> Unfolding -> Bool smallEnoughToInline opts (CoreUnfolding {uf_guidance = guidance}) = case guidance of UnfIfGoodArgs {ug_size = size} -> size <= unfoldingUseThreshold opts UnfWhen {} -> True UnfNever -> False smallEnoughToInline _ _ = False {- ************************************************************************ * * \subsection{callSiteInline} * * ************************************************************************ This is the key function. It decides whether to inline a variable at a call site callSiteInline is used at call sites, so it is a bit more generous. It's a very important function that embodies lots of heuristics. A non-WHNF can be inlined if it doesn't occur inside a lambda, and occurs exactly once or occurs once in each branch of a case and is small If the thing is in WHNF, there's no danger of duplicating work, so we can inline if it occurs once, or is small NOTE: we don't want to inline top-level functions that always diverge. It just makes the code bigger. Tt turns out that the convenient way to prevent them inlining is to give them a NOINLINE pragma, which we do in StrictAnal.addStrictnessInfoToTopId -} callSiteInline :: SimplEnv -> Logger -> Id -- The Id -> Bool -- True if there are no arguments at all (incl type args) -> [ArgSummary] -- One for each value arg; True if it is interesting -> CallCtxt -- True <=> continuation is interesting -> Maybe CoreExpr -- Unfolding, if any callSiteInline env logger id lone_variable arg_infos cont_info = case idUnfolding id of -- idUnfolding checks for loop-breakers, returning NoUnfolding -- Things with an INLINE pragma may have an unfolding *and* -- be a loop breaker (maybe the knot is not yet untied) CoreUnfolding { uf_tmpl = unf_template , uf_cache = unf_cache , uf_guidance = guidance } | active_unf -> tryUnfolding env logger id lone_variable arg_infos cont_info unf_template unf_cache guidance | otherwise -> traceInline logger uf_opts id "Inactive unfolding:" (ppr id) Nothing NoUnfolding -> Nothing BootUnfolding -> Nothing OtherCon {} -> Nothing DFunUnfolding {} -> Nothing -- Never unfold a DFun where uf_opts = seUnfoldingOpts env active_unf = activeUnfolding (seMode env) id activeUnfolding :: SimplMode -> Id -> Bool activeUnfolding mode id | isCompulsoryUnfolding (realIdUnfolding id) = True -- Even sm_inline can't override compulsory unfoldings | otherwise = isActive (sm_phase mode) (idInlineActivation id) && sm_inline mode -- `or` isStableUnfolding (realIdUnfolding id) -- Inline things when -- (a) they are active -- (b) sm_inline says so, except that for stable unfoldings -- (ie pragmas) we inline anyway -- | Report the inlining of an identifier's RHS to the user, if requested. traceInline :: Logger -> UnfoldingOpts -> Id -> String -> SDoc -> a -> a traceInline logger opts inline_id str doc result -- We take care to ensure that doc is used in only one branch, ensuring that -- the simplifier can push its allocation into the branch. See Note [INLINE -- conditional tracing utilities]. | enable = logTraceMsg logger str doc result | otherwise = result where enable | logHasDumpFlag logger Opt_D_dump_verbose_inlinings = True | Just prefix <- unfoldingReportPrefix opts = prefix `isPrefixOf` occNameString (getOccName inline_id) | otherwise = False {-# INLINE traceInline #-} -- see Note [INLINE conditional tracing utilities] {- Note [Avoid inlining into deeply nested cases] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Also called "exponential inlining". Consider a function f like this: (#18730) f arg1 arg2 = case ... ... -> g arg1 ... -> g arg2 This function is small. So should be safe to inline. However sometimes this doesn't quite work out like that. Consider this code: f1 arg1 arg2 ... = ... case _foo of alt1 -> ... f2 arg1 ... alt2 -> ... f2 arg2 ... f2 arg1 arg2 ... = ... case _foo of alt1 -> ... f3 arg1 ... alt2 -> ... f3 arg2 ... f3 arg1 arg2 ... = ... ... repeats up to n times. And then f1 is applied to some arguments: foo = ... f1 ... Initially f2..fn are not interesting to inline so we don't. However we see that f1 is applied to interesting args. So it's an obvious choice to inline those: foo = ... case _foo of alt1 -> ... f2 ... alt2 -> ... f2 ... As a result we go and inline f2 both mentions of f2 in turn are now applied to interesting arguments and f2 is small: foo = ... case _foo of alt1 -> ... case _foo of alt1 -> ... f3 ... alt2 -> ... f3 ... alt2 -> ... case _foo of alt1 -> ... f3 ... alt2 -> ... f3 ... The same thing happens for each binding up to f_n, duplicating the amount of inlining done in each step. Until at some point we are either done or run out of simplifier ticks/RAM. This pattern happened #18730. To combat this we introduce one more heuristic when weighing inlining decision. We keep track of a "case-depth". Which increases each time we look inside a case expression with more than one alternative. We then apply a penalty to inlinings based on the case-depth at which they would be inlined. Bounding the number of inlinings in such a scenario. The heuristic can be tuned in two ways: * We can ignore the first n levels of case nestings for inlining decisions using -funfolding-case-threshold. * The penalty grows linear with the depth. It's computed as size*(depth-threshold)/scaling. Scaling can be set with -funfolding-case-scaling. Reflections and wrinkles * See also Note [Do not add unfoldings to join points at birth] in GHC.Core.Opt.Simplify.Iteration * The total case depth is really the wrong thing; it will inhibit inlining of a local function, just because there is some giant case nest further out. What we want is the /difference/ in case-depth between the binding site and the call site. That could be done quite easily by adding the case-depth to the Unfolding of the function. * What matters more than /depth/ is total /width/; that is how many alternatives are in the tree. We could perhaps multiply depth by width at each case expression. * There might be a case nest with many alternatives, but the function is called in only a handful of them. So maybe we should ignore case-depth, and instead penalise funtions that are called many times -- after all, inlining them bloats code. But in the scenario above, we are simplifying an inlined fuction, without doing a global occurrence analysis each time. So if we based the penalty on multiple occurences, we should /also/ add a penalty when simplifying an already-simplified expression. We do track this (seInlineDepth) but currently we barely use it. An advantage of using occurrences+inline depth is that it'll work when no case expressions are involved. See #15488. * Test T18730 did not involve join points. But join points are very prone to the same kind of thing. For exampe in #13253, and several related tickets, we got an exponential blowup in code size from a program that looks like this. let j1a x = case f y of { True -> p; False -> q } j1b x = case f y of { True -> q; False -> p } j2a x = case f (y+1) of { True -> j1a x; False -> j1b x} j2b x = case f (y+1) of { True -> j1b x; False -> j1a x} ... in case f (y+10) of { True -> j10a 7; False -> j10b 8 } The first danger is this: in Simplifier iteration 1 postInlineUnconditionally inlines the last functions, j10a and j10b (they are both small). Now we have two calls to j9a and two to j9b. In the next Simplifer iteration, postInlineUnconditionally inlines all four of these calls, leaving four calls to j8a and j8b. Etc. Happily, this probably /won't/ happen because the Simplifier works top down, so it'll inline j1a/j1b into j2a/j2b, which will make the latter bigger; so the process will stop. But we still need to stop the inline cascade described at the head of this Note. Some guidance on setting these defaults: * A low threshold (<= 2) is needed to prevent exponential cases from spiraling out of control. We picked 2 for no particular reason. * Scaling the penalty by any more than 30 means the reproducer from T18730 won't compile even with reasonably small values of n. Instead it will run out of runs/ticks. This means to positively affect the reproducer a scaling <= 30 is required. * A scaling of >= 15 still causes a few very large regressions on some nofib benchmarks. (+80% for gc/fulsom, +90% for real/ben-raytrace, +20% for spectral/fibheaps) * A scaling of >= 25 showed no regressions on nofib. However it showed a number of (small) regression for compiler perf benchmarks. The end result is that we are settling for a scaling of 30, with a threshold of 2. This gives us minimal compiler perf regressions. No nofib runtime regressions and will still avoid this pattern sometimes. This is a "safe" default, where we err on the side of compiler blowup instead of risking runtime regressions. For cases where the default falls short the flag can be changed to allow more/less inlining as needed on a per-module basis. -} tryUnfolding :: SimplEnv -> Logger -> Id -> Bool -> [ArgSummary] -> CallCtxt -> CoreExpr -> UnfoldingCache -> UnfoldingGuidance -> Maybe CoreExpr tryUnfolding env logger id lone_variable arg_infos cont_info unf_template unf_cache guidance = case guidance of UnfNever -> traceInline logger opts id str (text "UnfNever") Nothing UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok } | enough_args && (boring_ok || some_benefit || unfoldingVeryAggressive opts) -- See Note [INLINE for small functions] (3) -> traceInline logger opts id str (mk_doc some_benefit empty True) (Just unf_template) | otherwise -> traceInline logger opts id str (mk_doc some_benefit empty False) Nothing where some_benefit = calc_some_benefit uf_arity True enough_args = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0) UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size } | isJoinId id, small_enough -> inline_join_point | unfoldingVeryAggressive opts -> yes | is_wf, some_benefit, small_enough -> yes | otherwise -> no where yes = traceInline logger opts id str (mk_doc some_benefit extra_doc True) (Just unf_template) no = traceInline logger opts id str (mk_doc some_benefit extra_doc False) Nothing some_benefit = calc_some_benefit (length arg_discounts) False -- depth_penalty: see Note [Avoid inlining into deeply nested cases] depth_threshold = unfoldingCaseThreshold opts depth_scaling = unfoldingCaseScaling opts depth_penalty | case_depth <= depth_threshold = 0 | otherwise = (size * (case_depth - depth_threshold)) `div` depth_scaling adjusted_size = size + depth_penalty - discount small_enough = adjusted_size <= unfoldingUseThreshold opts discount = computeDiscount arg_discounts res_discount arg_infos cont_info extra_doc = vcat [ ppWhen (isJoinId id) $ text "join" <+> fsep [ ppr (v, hasCoreUnfolding (idUnfolding v) , fmap (isEvaldUnfolding . idUnfolding) (lookupInScope in_scope v) , is_more_evald in_scope v) | v <- vselems (exprFreeIds unf_template) ] , text "depth based penalty =" <+> int depth_penalty , text "adjusted size =" <+> int adjusted_size ] inline_join_point -- See Note [Inlining join points] | or (zipWith scrut_arg arg_discounts arg_infos) = yes | anyVarSet (is_more_evald in_scope) $ exprFreeIds unf_template = yes | otherwise = no -- scrut_arg is True if the function body has a discount and the arg is a value scrut_arg disc ValueArg = disc > 0 scrut_arg _ _ = False where opts = seUnfoldingOpts env case_depth = seCaseDepth env inline_depth = seInlineDepth env in_scope = seInScope env -- Unpack the UnfoldingCache lazily because it may not be needed, and all -- its fields are strict; so evaluating unf_cache at all forces all the -- isWorkFree etc computations to take place. That risks wasting effort for -- Ids that are never going to inline anyway. -- See Note [UnfoldingCache] in GHC.Core UnfoldingCache{ uf_is_work_free = is_wf, uf_expandable = is_exp } = unf_cache mk_doc some_benefit extra_doc yes_or_no = vcat [ text "arg infos" <+> ppr arg_infos , text "interesting continuation" <+> ppr cont_info , text "some_benefit" <+> ppr some_benefit , text "is exp:" <+> ppr is_exp , text "is work-free:" <+> ppr is_wf , text "guidance" <+> ppr guidance , text "case depth =" <+> int case_depth , text "inline depth =" <+> int inline_depth , extra_doc , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"] ctx = log_default_dump_context (logFlags logger) str = "Considering inlining: " ++ showSDocOneLine ctx (ppr id) n_val_args = length arg_infos -- some_benefit is used when the RHS is small enough -- and the call has enough (or too many) value -- arguments (ie n_val_args >= arity). But there must -- be *something* interesting about some argument, or the -- result context, to make it worth inlining calc_some_benefit :: Arity -> Bool -> Bool -- The Arity is the number of args -- expected by the unfolding calc_some_benefit uf_arity is_inline | not saturated = interesting_args -- Under-saturated -- Note [Unsaturated applications] | otherwise = interesting_args -- Saturated or over-saturated || interesting_call where saturated = n_val_args >= uf_arity over_saturated = n_val_args > uf_arity interesting_args = any nonTriv arg_infos -- NB: (any nonTriv arg_infos) looks at the -- over-saturated args too which is "wrong"; -- but if over-saturated we inline anyway. interesting_call | over_saturated = True | otherwise = case cont_info of CaseCtxt -> not (lone_variable && is_exp) -- Note [Lone variables] ValAppCtxt -> True -- Note [Cast then apply] RuleArgCtxt -> uf_arity > 0 -- See Note [RHS of lets] DiscArgCtxt -> uf_arity > 0 -- Note [Inlining in ArgCtxt] RhsCtxt NonRecursive | is_inline -> uf_arity > 0 -- See Note [RHS of lets] _other -> False -- See Note [Nested functions] vselems :: VarSet -> [Var] vselems s = nonDetStrictFoldVarSet (\v vs -> v : vs) [] s is_more_evald :: InScopeSet -> Id -> Bool -- See Note [Inlining join points] is_more_evald in_scope v | Just v1 <- lookupInScope in_scope v , idUnfolding v1 `isBetterUnfoldingThan` idUnfolding v = True | otherwise = False {- Note [RHS of lets] ~~~~~~~~~~~~~~~~~~~~~ When the call is the argument of a function with a RULE, or the RHS of a let, we are a little bit keener to inline (in tryUnfolding). For example f y = (y,y,y) g y = let x = f y in ...(case x of (a,b,c) -> ...) ... We'd inline 'f' if the call was in a case context, and it kind-of-is, only we can't see it. Also x = f v could be expensive whereas x = case v of (a,b) -> a is patently cheap and may allow more eta expansion. So, in `interesting_call` in `tryUnfolding`, we treat the RHS of a /non-recursive/ let as not-totally-boring. A /recursive/ let isn't going be inlined so there is much less point. Hence the (only reason for the) RecFlag in RhsCtxt We inline only if `f` has an `UnfWhen` guidance. I found that being more eager led to fruitless inlining. See Note [Seq is boring] wrinkle (SB1) in GHC.Core.Opt.Simplify.Utils. Note [Inlining join points] ~~~~~~~~~~~~~~~~~~~~~~~~~~~ In general we /do not/ want to inline join points /even if they are small/. See Note [Duplicating join points] in GHC.Core.Opt.Simplify.Iteration. But, assuming it is small, there are various times when we /do/ want to inline a (non-recursive) join point. Namely, if either of these hold: (1) A /scrutinised/ argument (non-zero discount) has a /ValueArg/ info. Inlining will give some benefit. (2) A free variable of the RHS is * Is /not/ evaluated at the join point defn site * Is evaluated at the join point call site. This is the is_more_evald predicate. (1) is fairly obvious but (2) is less so. Here is the code for `integerGT` without (2): integerGt = \ (x :: Integer) (y :: Integer) -> join fail _ = case x of { IS x1 -> case y of { IS y1 -> case <# x1 y1 of _DEFAULT -> case ==# x1 y1 of DEFAULT -> True; 1# -> False 1# -> False IP ds1 -> False IN ds1 -> True IP x1 -> case y of { _DEFAULT -> True; IP y1 -> case bigNatCompare x1 y1 of _DEFAULT -> False; GT -> True IN x1 -> case y of { _DEFAULT -> False; IN y1 -> case bigNatCompare y1 x1 of _DEFAULT -> False; GT -> True in case x of { _DEFAULT -> jump fail GHC.Prim.(##); IS x1 -> case y of { _DEFAULT -> jump fail GHC.Prim.(##); IS y1 -> tagToEnum# @Bool (># x1 y1) If we inline `fail` we get /much/ better code. The only clue is that `x` and `y` (a) are not evaluated at the definition site, and (b) are evaluated at the call site. This predicate is `isBetterUnfoldingThan`. You might think that the variable should also be /scrutinised/ in the join-point RHS, but here are two reasons for not taking that into account. First, we see code somewhat like this in imaginary/wheel-sieve1: let x = in join $j = (x,y) in case z of A -> case x of P -> $j Q -> blah B -> (x,x) C -> True Here `x` can't be duplicated into the branches becuase it is used in both the join point and the A branch. But if we inline $j we get let x = in case z of A -> case x of x' P -> (x', y) Q -> blah B -> x C -> True and now we /can/ duplicate x into the branches, at which point: * it is used strictly in the A branch (evaluated, but no thunk) * it is used lazily in the B branch (still a thunk) * it is not used at all in the C branch (no thunk) Second, spectral/treejoin gets a big win from SpecConstr due to evaluated-ness. Something like this: join $j x = ...(foo fv)... in case fv of I# x -> ... jump $j True ... If we inline $j, SpecConstr sees a call (foo (I# x)) and specialises. Note [Unsaturated applications] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When a call is not saturated, we *still* inline if one of the arguments has interesting structure. That's sometimes very important. A good example is the Ord instance for Bool in Base: Rec { $fOrdBool =GHC.Classes.D:Ord @ Bool ... $cmin_ajX $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool } But the defn of GHC.Classes.$dmmin is: $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a {- Arity: 3, HasNoCafRefs, Strictness: SLL, Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a -> case @ a GHC.Classes.<= @ a $dOrd x y of wild { GHC.Types.False -> y GHC.Types.True -> x }) -} We *really* want to inline $dmmin, even though it has arity 3, in order to unravel the recursion. Note [Things to watch] ~~~~~~~~~~~~~~~~~~~~~~ * { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... } Assume x is exported, so not inlined unconditionally. Then we want x to inline unconditionally; no reason for it not to, and doing so avoids an indirection. * { x = I# 3; ....f x.... } Make sure that x does not inline unconditionally! Lest we get extra allocation. Note [Nested functions] ~~~~~~~~~~~~~~~~~~~~~~~ At one time we treated a call of a non-top-level function as "interesting" (regardless of how boring the context) in the hope that inlining it would eliminate the binding, and its allocation. Specifically, in the default case of interesting_call we had _other -> not is_top && uf_arity > 0 But actually postInlineUnconditionally does some of this and overall it makes virtually no difference to nofib. So I simplified away this special case Note [Cast then apply] ~~~~~~~~~~~~~~~~~~~~~~ Consider myIndex = __inline_me ( (/\a. ) |> co ) co :: (forall a. a -> a) ~ (forall a. T a) ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ... We need to inline myIndex to unravel this; but the actual call (myIndex a) has no value arguments. The ValAppCtxt gives it enough incentive to inline. Note [Inlining in ArgCtxt] ~~~~~~~~~~~~~~~~~~~~~~~~~~ The condition (arity > 0) here is very important, because otherwise we end up inlining top-level stuff into useless places; eg x = I# 3# f = \y. g x This can make a very big difference: it adds 16% to nofib 'integer' allocs, and 20% to 'power'. At one stage I replaced this condition by 'True' (leading to the above slow-down). The motivation was test eyeball/inline1.hs; but that seems to work ok now. NOTE: arguably, we should inline in ArgCtxt only if the result of the call is at least CONLIKE. At least for the cases where we use ArgCtxt for the RHS of a 'let', we only profit from the inlining if we get a CONLIKE thing (modulo lets). Note [Lone variables] ~~~~~~~~~~~~~~~~~~~~~ See also Note [Interaction of exprIsWorkFree and lone variables] which appears below The "lone-variable" case is important. I spent ages messing about with unsatisfactory variants, but this is nice. The idea is that if a variable appears all alone as an arg of lazy fn, or rhs BoringCtxt as scrutinee of a case CaseCtxt as arg of a fn ArgCtxt AND it is bound to a cheap expression then we should not inline it (unless there is some other reason, e.g. it is the sole occurrence). That is what is happening at the use of 'lone_variable' in 'interesting_call'. Why? At least in the case-scrutinee situation, turning let x = (a,b) in case x of y -> ... into let x = (a,b) in case (a,b) of y -> ... and thence to let x = (a,b) in let y = (a,b) in ... is bad if the binding for x will remain. Another example: I discovered that strings were getting inlined straight back into applications of 'error' because the latter is strict. s = "foo" f = \x -> ...(error s)... Fundamentally such contexts should not encourage inlining because, provided the RHS is "expandable" (see Note [exprIsExpandable] in GHC.Core.Utils) the context can ``see'' the unfolding of the variable (e.g. case or a RULE) so there's no gain. However, watch out: * Consider this: foo = \n. [n]) {-# INLINE foo #-} bar = foo 20 {-# INLINE bar #-} baz = \n. case bar of { (m:_) -> m + n } Here we really want to inline 'bar' so that we can inline 'foo' and the whole thing unravels as it should obviously do. This is important: in the NDP project, 'bar' generates a closure data structure rather than a list. So the non-inlining of lone_variables should only apply if the unfolding is regarded as expandable; because that is when exprIsConApp_maybe looks through the unfolding. Hence the "&& is_exp" in the CaseCtxt branch of interesting_call * Even a type application or coercion isn't a lone variable. Consider case $fMonadST @ RealWorld of { :DMonad a b c -> c } We had better inline that sucker! The case won't see through it. For now, I'm treating treating a variable applied to types in a *lazy* context "lone". The motivating example was f = /\a. \x. BIG g = /\a. \y. h (f a) There's no advantage in inlining f here, and perhaps a significant disadvantage. Hence some_val_args in the Stop case Note [Interaction of exprIsWorkFree and lone variables] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The lone-variable test says "don't inline if a case expression scrutinises a lone variable whose unfolding is cheap". It's very important that, under these circumstances, exprIsConApp_maybe can spot a constructor application. So, for example, we don't consider let x = e in (x,x) to be cheap, and that's good because exprIsConApp_maybe doesn't think that expression is a constructor application. In the 'not (lone_variable && is_wf)' test, I used to test is_value rather than is_wf, which was utterly wrong, because the above expression responds True to exprIsHNF, which is what sets is_value. This kind of thing can occur if you have {-# INLINE foo #-} foo = let x = e in (x,x) which Roman did. -} computeDiscount :: [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int computeDiscount arg_discounts res_discount arg_infos cont_info = 10 -- Discount of 10 because the result replaces the call -- so we count 10 for the function itself + 10 * length actual_arg_discounts -- Discount of 10 for each arg supplied, -- because the result replaces the call + total_arg_discount + res_discount' where actual_arg_discounts = zipWith mk_arg_discount arg_discounts arg_infos total_arg_discount = sum actual_arg_discounts mk_arg_discount _ TrivArg = 0 mk_arg_discount _ NonTrivArg = 10 mk_arg_discount discount ValueArg = discount res_discount' | LT <- arg_discounts `compareLength` arg_infos = res_discount -- Over-saturated | otherwise = case cont_info of BoringCtxt -> 0 CaseCtxt -> res_discount -- Presumably a constructor ValAppCtxt -> res_discount -- Presumably a function _ -> 40 `min` res_discount -- ToDo: this 40 `min` res_discount doesn't seem right -- for DiscArgCtxt it shouldn't matter because the function will -- get the arg discount for any non-triv arg -- for RuleArgCtxt we do want to be keener to inline; but not only -- constructor results -- for RhsCtxt I suppose that exposing a data con is good in general -- And 40 seems very arbitrary -- -- res_discount can be very large when a function returns -- constructors; but we only want to invoke that large discount -- when there's a case continuation. -- Otherwise we, rather arbitrarily, threshold it. Yuk. -- But we want to avoid inlining large functions that return -- constructors into contexts that are simply "interesting"