{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} -- | Coverage checking step of the -- [Lower Your Guards paper](https://dl.acm.org/doi/abs/10.1145/3408989). -- -- Coverage check guard trees (like @'PmMatch' 'Pre'@) to get a -- 'CheckResult', containing -- -- 1. The set of uncovered values, 'cr_uncov' -- 2. And an annotated tree variant (like @'PmMatch' 'Post'@) that captures -- redundancy and inaccessibility information as 'RedSets' annotations -- -- Basically the UA function from Section 5.1, which is an optimised -- interleaving of U and A from Section 3.2 (Figure 5). -- The Normalised Refinement Types 'Nablas' are maintained in -- "GHC.HsToCore.Pmc.Solver". module GHC.HsToCore.Pmc.Check ( CheckAction(..), checkMatchGroup, checkGRHSs, checkPatBind, checkEmptyCase, checkRecSel ) where import GHC.Prelude import GHC.Builtin.Names ( hasKey, considerAccessibleIdKey, trueDataConKey ) import GHC.HsToCore.Monad ( DsM ) import GHC.HsToCore.Pmc.Types import GHC.HsToCore.Pmc.Utils import GHC.HsToCore.Pmc.Solver import GHC.Driver.DynFlags import GHC.Utils.Outputable import GHC.Tc.Utils.TcType (evVarPred) import GHC.Data.OrdList import GHC.Data.Bag import qualified Data.Semigroup as Semi import Data.List.NonEmpty ( NonEmpty(..) ) import qualified Data.List.NonEmpty as NE import Data.Coerce import GHC.Types.Var import GHC.Core import GHC.Core.Utils -- | Coverage checking action. Can be composed 'leftToRight' or 'topToBottom'. newtype CheckAction a = CA { unCA :: Nablas -> DsM (CheckResult a) } deriving Functor -- | A 'CheckAction' representing a successful pattern-match. matchSucceeded :: CheckAction RedSets matchSucceeded = CA $ \inc -> -- succeed pure CheckResult { cr_ret = emptyRedSets { rs_cov = inc } , cr_uncov = mempty , cr_approx = Precise } -- | Composes 'CheckAction's top-to-bottom: -- If a value falls through the resulting action, then it must fall through the -- first action and then through the second action. -- If a value matches the resulting action, then it either matches the -- first action or matches the second action. -- Basically the semantics of the LYG branching construct. topToBottom :: ((Nablas -> (Precision, Nablas)) -> top -> bot -> (Precision, ret)) -> CheckAction top -> CheckAction bot -> CheckAction ret topToBottom f (CA top) (CA bot) = CA $ \inc -> do t <- top inc b <- bot (cr_uncov t) limit <- maxPmCheckModels <$> getDynFlags -- See Note [Countering exponential blowup] let throttler cov = throttle limit inc cov let (prec', ret) = f throttler (cr_ret t) (cr_ret b) pure CheckResult { cr_ret = ret , cr_uncov = cr_uncov b , cr_approx = prec' Semi.<> cr_approx t Semi.<> cr_approx b } -- | Composes 'CheckAction's left-to-right: -- If a value falls through the resulting action, then it either falls through the -- first action or through the second action. -- If a value matches the resulting action, then it must match the first action -- and then match the second action. -- Basically the semantics of the LYG guard construct. leftToRight :: (RedSets -> right -> ret) -> CheckAction RedSets -> CheckAction right -> CheckAction ret leftToRight f (CA left) (CA right) = CA $ \inc -> do l <- left inc r <- right (rs_cov (cr_ret l)) limit <- maxPmCheckModels <$> getDynFlags let uncov = cr_uncov l Semi.<> cr_uncov r -- See Note [Countering exponential blowup] let (prec', uncov') = throttle limit inc uncov pure CheckResult { cr_ret = f (cr_ret l) (cr_ret r) , cr_uncov = uncov' , cr_approx = prec' Semi.<> cr_approx l Semi.<> cr_approx r } -- | @throttle limit old new@ returns @old@ if the number of 'Nabla's in @new@ -- is exceeding the given @limit@ and the @old@ number of 'Nabla's. -- See Note [Countering exponential blowup]. throttle :: Int -> Nablas -> Nablas -> (Precision, Nablas) throttle limit old@(MkNablas old_ds) new@(MkNablas new_ds) --- | pprTrace "PmCheck:throttle" (ppr (length old_ds) <+> ppr (length new_ds) <+> ppr limit) False = undefined | length new_ds > max limit (length old_ds) = (Approximate, old) | otherwise = (Precise, new) checkAlternatives :: (grdtree -> CheckAction anntree) -> NonEmpty grdtree -> CheckAction (NonEmpty anntree) -- The implementation is pretty similar to -- @traverse1 :: Apply f => (a -> f b) -> NonEmpty a -> f (NonEmpty b)@ checkAlternatives act (t :| []) = (:| []) <$> act t checkAlternatives act (t1 :| (t2:ts)) = topToBottom (no_throttling (NE.<|)) (act t1) (checkAlternatives act (t2:|ts)) where no_throttling f _throttler t b = (Precise, f t b) emptyRedSets :: RedSets -- Semigroup instance would be misleading! emptyRedSets = RedSets mempty mempty mempty checkGrd :: PmGrd -> CheckAction RedSets checkGrd grd = CA $ \inc -> case grd of -- let x = e: Refine with x ~ e PmLet x e -> do matched <- addPhiCtNablas inc (PhiCoreCt x e) tracePm "check:Let" (ppr x <+> char '=' <+> ppr e) pure CheckResult { cr_ret = emptyRedSets { rs_cov = matched } , cr_uncov = mempty , cr_approx = Precise } -- Bang x _: Diverge on x ~ ⊥, refine with x ≁ ⊥ PmBang x mb_info -> do div <- addPhiCtNablas inc (PhiBotCt x) matched <- addPhiCtNablas inc (PhiNotBotCt x) -- See Note [Dead bang patterns] -- mb_info = Just info <==> PmBang originates from bang pattern in source let bangs | Just info <- mb_info = unitOL (div, info) | otherwise = NilOL tracePm "check:Bang" (ppr x <+> ppr div) pure CheckResult { cr_ret = RedSets { rs_cov = matched, rs_div = div, rs_bangs = bangs } , cr_uncov = mempty , cr_approx = Precise } -- See point (3) of Note [considerAccessible] PmCon x (PmAltConLike con) _ _ _ | x `hasKey` considerAccessibleIdKey , con `hasKey` trueDataConKey -> pure CheckResult { cr_ret = emptyRedSets { rs_cov = initNablas } , cr_uncov = mempty , cr_approx = Precise } -- Con: Fall through on x ≁ K and refine with x ~ K ys and type info PmCon x con tvs dicts args -> do !div <- if isPmAltConMatchStrict con then addPhiCtNablas inc (PhiBotCt x) else pure mempty !matched <- addPhiCtNablas inc (PhiConCt x con tvs (map evVarPred dicts) args) !uncov <- addPhiCtNablas inc (PhiNotConCt x con) tracePm "check:Con" $ vcat [ ppr grd , ppr inc , hang (text "div") 2 (ppr div) , hang (text "matched") 2 (ppr matched) , hang (text "uncov") 2 (ppr uncov) ] pure CheckResult { cr_ret = emptyRedSets { rs_cov = matched, rs_div = div } , cr_uncov = uncov , cr_approx = Precise } checkGrdDag :: GrdDag -> CheckAction RedSets checkGrdDag (GdOne g) = checkGrd g checkGrdDag GdEnd = matchSucceeded checkGrdDag (GdSeq dl dr) = leftToRight merge (checkGrdDag dl) (checkGrdDag dr) where -- Note that -- * the incoming set of dr is the covered set of dl -- * the covered set of dr is a subset of the incoming set of dr -- * this is so that the covered set of dr is the covered set of the -- entire sequence -- Hence we merge by returning @rs_cov ri_r@ as the covered set. merge ri_l ri_r = RedSets { rs_cov = rs_cov ri_r , rs_div = rs_div ri_l Semi.<> rs_div ri_r , rs_bangs = rs_bangs ri_l Semi.<> rs_bangs ri_r } checkGrdDag (GdAlt dt db) = topToBottom merge (checkGrdDag dt) (checkGrdDag db) where -- The intuition here: ri_b is disjoint with ri_t, because db only gets -- fed the "leftover" uncovered set of dt. But for the GrdDag that follows -- to the right of the GdAlt (say), we have to reunite the RedSets. Hence -- component-wise merge. -- After the GdAlt, we unite the covered sets. If they become too large, we -- throttle, continuing with the incoming set. merge throttler ri_t ri_b = let (prec, cov) = throttler (rs_cov ri_t Semi.<> rs_cov ri_b) in (prec, RedSets { rs_cov = cov , rs_div = rs_div ri_t Semi.<> rs_div ri_b , rs_bangs = rs_bangs ri_t Semi.<> rs_bangs ri_b }) checkMatchGroup :: PmMatchGroup Pre -> CheckAction (PmMatchGroup Post) checkMatchGroup (PmMatchGroup matches) = PmMatchGroup <$> checkAlternatives checkMatch matches checkMatch :: PmMatch Pre -> CheckAction (PmMatch Post) checkMatch (PmMatch { pm_pats = grds, pm_grhss = grhss }) = leftToRight PmMatch (checkGrdDag grds) (checkGRHSs grhss) checkGRHSs :: PmGRHSs Pre -> CheckAction (PmGRHSs Post) checkGRHSs (PmGRHSs { pgs_lcls = lcls, pgs_grhss = grhss }) = leftToRight PmGRHSs (checkGrdDag lcls) (checkAlternatives checkGRHS grhss) checkGRHS :: PmGRHS Pre -> CheckAction (PmGRHS Post) checkGRHS (PmGRHS { pg_grds = grds, pg_rhs = rhs_info }) = flip PmGRHS rhs_info <$> checkGrdDag grds checkEmptyCase :: PmEmptyCase -> CheckAction PmEmptyCase -- See Note [Checking EmptyCase] checkEmptyCase pe@(PmEmptyCase { pe_var = var }) = CA $ \inc -> do unc <- addPhiCtNablas inc (PhiNotBotCt var) pure CheckResult { cr_ret = pe, cr_uncov = unc, cr_approx = mempty } checkPatBind :: (PmPatBind Pre) -> CheckAction (PmPatBind Post) checkPatBind = coerce checkGRHS checkRecSel :: PmRecSel () -> CheckAction (PmRecSel Id) -- See Note [Detecting incomplete record selectors] in GHC.HsToCore.Pmc checkRecSel pr@(PmRecSel { pr_arg = arg, pr_cons = cons }) = CA $ \inc -> do arg_id <- case arg of Var arg_id -> return arg_id _ -> mkPmId $ exprType arg let con_cts = map (PhiNotConCt arg_id . PmAltConLike) cons arg_ct = PhiCoreCt arg_id arg phi_cts = listToBag (arg_ct : con_cts) unc <- addPhiCtsNablas inc phi_cts pure CheckResult { cr_ret = pr{ pr_arg_var = arg_id }, cr_uncov = unc, cr_approx = mempty } {- Note [Checking EmptyCase] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -XEmptyCase is useful for matching on empty data types like 'Void'. For example, the following is a complete match: f :: Void -> () f x = case x of {} Really, -XEmptyCase is the only way to write a program that at the same time is safe (@f _ = error "boom"@ is not because of ⊥), doesn't trigger a warning (@f !_ = error "inaccessible" has inaccessible RHS) and doesn't turn an exception into divergence (@f x = f x@). Semantically, unlike every other case expression, -XEmptyCase is strict in its match var x, which rules out ⊥ as an inhabitant. So we add x ≁ ⊥ to the initial Nabla and check if there are any values left to match on. Note [Dead bang patterns] ~~~~~~~~~~~~~~~~~~~~~~~~~ Consider f :: Bool -> Int f True = 1 f !x = 2 Whenever we fall through to the second equation, we will already have evaluated the argument. Thus, the bang pattern serves no purpose and should be warned about. We call this kind of bang patterns "dead". Dead bangs are the ones that under no circumstances can force a thunk that wasn't already forced. Dead bangs are a form of redundant bangs; see below. We can detect dead bang patterns by checking whether @x ~ ⊥@ is satisfiable where the PmBang appears in 'checkGrd'. If not, then clearly the bang is dead. So for a source bang, we add the refined Nabla and the source info to the 'RedSet's 'rs_bangs'. When collecting stuff to warn, we test that Nabla for inhabitants. If it's empty, we'll warn that it's redundant. Note that we don't want to warn for a dead bang that appears on a redundant clause. That is because in that case, we recommend to delete the clause wholly, including its leading pattern match. Dead bang patterns are redundant. But there are bang patterns which are redundant that aren't dead, for example f !() = 0 the bang still forces the match variable, before we attempt to match on (). But it is redundant with the forcing done by the () match. We currently don't detect redundant bangs that aren't dead. Note [Countering exponential blowup] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Precise pattern match exhaustiveness checking is necessarily exponential in the size of some input programs. We implement a counter-measure in the form of the -fmax-pmcheck-models flag, limiting the number of Nablas we check against each pattern by a constant. How do we do that? Consider f True True = () f True True = () And imagine we set our limit to 1 for the sake of the example. The first clause will be checked against the initial Nabla, {}. Doing so will produce an Uncovered set of size 2, containing the models {x≁True} and {x~True,y≁True}. Also we find the first clause to cover the model {x~True,y~True}. But the Uncovered set we get out of the match is too large! We somehow have to ensure not to make things worse than they are already, so we continue checking with a singleton Uncovered set of the initial Nabla {}. Why is this sound (wrt. the notion in GADTs Meet Their Match)? Well, it basically amounts to forgetting that we matched against the first clause. The values represented by {} are a superset of those represented by its two refinements {x≁True} and {x~True,y≁True}. This forgetfulness becomes very apparent in the example above: By continuing with {} we don't detect the second clause as redundant, as it again covers the same non-empty subset of {}. So we don't flag everything as redundant anymore, but still will never flag something as redundant that isn't. For exhaustivity, the converse applies: We will report @f@ as non-exhaustive and report @f _ _@ as missing, which is a superset of the actual missing matches. But soundness means we will never fail to report a missing match. This mechanism is implemented in 'throttle'. Guards are an extreme example in this regard, with #11195 being a particularly dreadful example: Since their RHS are often pretty much unique, we split on a variable (the one representing the RHS) that doesn't occur anywhere else in the program, so we don't actually get useful information out of that split! We counter this by throttling *Uncovered* sets in `leftToRight`. Another challenge is posed by or-patterns (see also Note [Implementation of OrPatterns]): Large matches such as `f (LT; GT) (LT; GT) .... True = 1` will desugar into a long sequence of `GdAlt LT GT`. The careless desugaring of `GdAlt` via `topToBottom` would cause ever enlarging *Covered* sets. So we throttle when merging Covered sets from LT and GT, by using the original incoming covered set. The effect is very like replacing (LT; GT) with a wildcard pattern _. Note [considerAccessible] ~~~~~~~~~~~~~~~~~~~~~~~~~ Consider (T18610) f :: Bool -> Int f x = case (x, x) of (True, True) -> 1 (False, False) -> 2 (True, False) -> 3 -- Warning: Redundant The third case is detected as redundant. But it may be the intent of the programmer to keep the dead code, in order for it not to bitrot or to support debugging scenarios. But there is no way to communicate that to the pattern-match checker! The only way is to deactivate pattern-match checking whole-sale, which is quite annoying. Hence, we define in "GHC.Exts": considerAccessible = True 'considerAccessible' is treated specially by the pattern-match checker in that a guard with it as the scrutinee expression will keep its parent clause alive: g :: Bool -> Int g x = case (x, x) of (True, True) -> 1 (False, False) -> 2 (True, False) | GHC.Exts.considerAccessible -> 3 -- No warning The key bits of the implementation are: 1. Its definition is recognised as known-key (see "GHC.Builtin.Names"). 2. After "GHC.HsToCore.Pmc.Desugar", the guard will end up as a 'PmCon', where the match var is the known-key 'considerAccessible' and the constructor against which it matches is 'True'. 3. We recognise the 'PmCon' in 'GHC.HsToCore.Check.checkGrd' and inflate the incoming set of values for all guards downstream to the unconstrained 'initNablas' set, e.g. /all/ values. (The set of values that falls through that particular guard is empty, as matching 'considerAccessible' against 'True' can't fail.) Note that 'considerAccessible' breaks the invariant that incoming sets of values reaching syntactic children are subsets of that of the syntactic ancestor: A whole match, like that of the third clause of the example, might have no incoming value, but its single RHS has incoming values because of (3). That means the 'is_covered' flag computed in 'GHC.HsToCore.Pmc.cirbsMatch' is irrelevant and should not be used to flag all children as redundant (which is what we used to do). We achieve great benefits with a very simple implementation. There are caveats, though: (A) Putting potentially failing guards /after/ the 'considerAccessible' guard might lead to weird check results, e.g., h :: Bool -> Int h x = case (x, x) of (True, True) -> 1 (False, False) -> 2 (True, False) | GHC.Exts.considerAccessible, False <- x -> 3 -- Warning: Not matched: (_, _) That *is* fixable, although we would pay with a much more complicated implementation. (B) If the programmer puts a 'considerAccessible' marker on an accessible clause, the checker doesn't warn about it. E.g., f :: Bool -> Int f True | considerAccessible = 0 f False = 1 will not emit any warning whatsoever. We could implement code that warns here, but it wouldn't be as simple as it is now. -}