{-# LANGUAGE LambdaCase #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE TypeFamilies #-} ----------------------------------------------------------------------------- -- -- Machine-dependent assembly language -- -- (c) The University of Glasgow 1993-2004 -- ----------------------------------------------------------------------------- module GHC.CmmToAsm.X86.Instr ( Instr(..) , Operand(..) , PrefetchVariant(..) , FMAPermutation(..) , JumpDest(..) , getJumpDestBlockId , canShortcut , shortcutStatics , shortcutJump , allocMoreStack , maxSpillSlots , archWordFormat , takeRegRegMoveInstr , regUsageOfInstr , takeDeltaInstr , mkLoadInstr , mkJumpInstr , mkStackAllocInstr , mkStackDeallocInstr , mkSpillInstr , mkRegRegMoveInstr , movInstr , jumpDestsOfInstr , canFallthroughTo , patchRegsOfInstr , patchJumpInstr , isMetaInstr , isJumpishInstr , movdOutFormat , MinOrMax(..), MinMaxType(..) ) where import GHC.Prelude import GHC.Data.FastString import GHC.CmmToAsm.X86.Cond import GHC.CmmToAsm.X86.Regs import GHC.CmmToAsm.Format import GHC.CmmToAsm.Reg.Target (targetClassOfReg) import GHC.CmmToAsm.Types import GHC.CmmToAsm.Utils import GHC.CmmToAsm.Instr (RegUsage(..), noUsage) import GHC.Platform.Reg import GHC.Platform.Reg.Class.Unified import GHC.CmmToAsm.Config import GHC.Cmm.BlockId import GHC.Cmm.Dataflow.Label import GHC.Platform.Regs import GHC.Cmm import GHC.Utils.Constants ( debugIsOn ) import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Platform import GHC.Cmm.CLabel import GHC.Types.Unique.Set import GHC.Types.Unique import GHC.Types.Unique.DSM import GHC.Types.Basic (Alignment) import GHC.Cmm.DebugBlock (UnwindTable) import GHC.Utils.Misc ( HasDebugCallStack ) import GHC.Data.Maybe -- Format of an x86/x86_64 memory address, in bytes. -- archWordFormat :: Bool -> Format archWordFormat is32Bit | is32Bit = II32 | otherwise = II64 -- ----------------------------------------------------------------------------- -- Intel x86 instructions data Instr -- comment pseudo-op = COMMENT FastString -- location pseudo-op (file, line, col, name) | LOCATION Int Int Int String -- some static data spat out during code -- generation. Will be extracted before -- pretty-printing. | LDATA Section (Alignment, RawCmmStatics) -- start a new basic block. Useful during -- codegen, removed later. Preceding -- instruction should be a jump, as per the -- invariants for a BasicBlock (see Cmm). | NEWBLOCK BlockId -- unwinding information -- See Note [Unwinding information in the NCG]. | UNWIND CLabel UnwindTable -- specify current stack offset for benefit of subsequent passes. -- This carries a BlockId so it can be used in unwinding information. | DELTA Int -- | X86 scalar move instruction. -- -- When used at a vector format, only moves the lower 64 bits of data; -- the rest of the data in the destination may either be zeroed or -- preserved, depending on the specific format and operands. | MOV Format Operand Operand -- N.B. Due to AT&T assembler quirks, when used with 'II64' -- 'Format' immediate source and memory target operand, the source -- operand is interpreted to be a 32-bit sign-extended value. -- True 64-bit operands need to be either first moved to a register or moved -- with @MOVABS@; we currently do not use this instruction in GHC. -- See https://stackoverflow.com/questions/52434073/whats-the-difference-between-the-x86-64-att-instructions-movq-and-movabsq. | MOVD Format Operand Operand -- ^ MOVD/MOVQ SSE2 instructions -- (bitcast between a general purpose -- register and a float register). -- Format is input format, output format is -- calculated in the 'movdOutFormat' function. | CMOV Cond Format Operand Reg | MOVZxL Format Operand Operand -- ^ The format argument is the size of operand 1 (the number of bits we keep) -- We always zero *all* high bits, even though this isn't how the actual instruction -- works. The code generator also seems to rely on this behaviour and it's faster -- to execute on many cpus as well so for now I'm just documenting the fact. | MOVSxL Format Operand Operand -- format is the size of operand 1 -- x86_64 note: plain mov into a 32-bit register always zero-extends -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which -- don't affect the high bits of the register. -- Load effective address (also a very useful three-operand add instruction :-) | LEA Format Operand Operand -- Int Arithmetic. | ADD Format Operand Operand | ADC Format Operand Operand | SUB Format Operand Operand | SBB Format Operand Operand | MUL Format Operand Operand | MUL2 Format Operand -- %edx:%eax = operand * %rax | IMUL Format Operand Operand -- signed int mul | IMUL2 Format Operand -- %edx:%eax = operand * %eax | DIV Format Operand -- eax := eax:edx/op, edx := eax:edx%op | IDIV Format Operand -- ditto, but signed -- Int Arithmetic, where the effects on the condition register -- are important. Used in specialized sequences such as MO_Add2. -- Do not rewrite these instructions to "equivalent" ones that -- have different effect on the condition register! (See #9013.) | ADD_CC Format Operand Operand | SUB_CC Format Operand Operand -- Simple bit-twiddling. | AND Format Operand Operand | OR Format Operand Operand | XOR Format Operand Operand -- | AVX bitwise logical XOR operation | VXOR Format Operand Reg Reg | NOT Format Operand | NEGI Format Operand -- NEG instruction (name clash with Cond) | BSWAP Format Reg -- Shifts (amount may be immediate or %cl only) | SHL Format Operand{-amount-} Operand | SAR Format Operand{-amount-} Operand | SHR Format Operand{-amount-} Operand | SHRD Format Operand{-amount-} Operand Operand | SHLD Format Operand{-amount-} Operand Operand | BT Format Imm Operand | NOP -- We need to support the FSTP (x87 store and pop) instruction -- so that we can correctly read off the return value of an -- x86 CDECL C function call when its floating point. -- so we don't include a register argument, and just use st(0) -- this instruction is used ONLY for return values of C ffi calls -- in x86_32 abi | X87Store Format AddrMode -- st(0), dst -- SSE2 floating point: we use a restricted set of the available SSE2 -- instructions for floating-point. -- use MOV for moving (either movss or movsd (movlpd better?)) | CVTSS2SD Reg Reg -- F32 to F64 | CVTSD2SS Reg Reg -- F64 to F32 | CVTTSS2SIQ Format Operand Reg -- F32 to I32/I64 (with truncation) | CVTTSD2SIQ Format Operand Reg -- F64 to I32/I64 (with truncation) | CVTSI2SS Format Operand Reg -- I32/I64 to F32 | CVTSI2SD Format Operand Reg -- I32/I64 to F64 -- | FMA3 fused multiply-add operations. | FMA3 Format FMASign FMAPermutation Operand Reg Reg -- For the FMA213 permutation (the only one we use currently), -- this is: src3 (r/m), src2 (r), dst/src1 (r) -- (NB: this isexactly reversed from how Intel lists the arguments.) -- use ADD, SUB, and SQRT for arithmetic. In both cases, operands -- are Operand Reg. -- SSE2 floating-point division: | FDIV Format Operand Operand -- divisor, dividend(dst) -- use CMP for comparisons. ucomiss and ucomisd instructions -- compare single/double prec floating point respectively. | SQRT Format Operand Reg -- src, dst -- Comparison | TEST Format Operand Operand | CMP Format Operand Operand | SETCC Cond Operand -- Stack Operations. | PUSH Format Operand | POP Format Operand -- both unused (SDM): -- | PUSHA -- | POPA -- Jumping around. | JMP Operand [RegWithFormat] -- including live Regs at the call | JXX Cond BlockId -- includes unconditional branches | JXX_GBL Cond Imm -- non-local version of JXX -- Table jump | JMP_TBL Operand -- Address to jump to [Maybe JumpDest] -- Targets of the jump table Section -- Data section jump table should be put in CLabel -- Label of jump table -- | X86 call instruction | CALL (Either Imm Reg) -- ^ Jump target [RegWithFormat] -- ^ Arguments (required for register allocation) -- Other things. | CLTD Format -- sign extend %eax into %edx:%eax | FETCHGOT Reg -- pseudo-insn for ELF position-independent code -- pretty-prints as -- call 1f -- 1: popl %reg -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg | FETCHPC Reg -- pseudo-insn for Darwin position-independent code -- pretty-prints as -- call 1f -- 1: popl %reg -- bit counting instructions | POPCNT Format Operand Reg -- [SSE4.2] count number of bits set to 1 | LZCNT Format Operand Reg -- [BMI2] count number of leading zeros | TZCNT Format Operand Reg -- [BMI2] count number of trailing zeros | BSF Format Operand Reg -- bit scan forward | BSR Format Operand Reg -- bit scan reverse -- bit manipulation instructions | PDEP Format Operand Operand Reg -- [BMI2] deposit bits to the specified mask | PEXT Format Operand Operand Reg -- [BMI2] extract bits from the specified mask -- prefetch | PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch -- variant can be NTA, Lvl0, Lvl1, or Lvl2 | LOCK Instr -- lock prefix | XADD Format Operand Operand -- src (r), dst (r/m) | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit | XCHG Format Operand Reg -- src (r/m), dst (r/m) | MFENCE -- Vector Instructions -- -- NOTE: Instructions follow the AT&T syntax -- Constructors and deconstructors | VBROADCAST Format Operand Reg | VEXTRACT Format Imm Reg Operand | INSERTPS Format Imm Operand Reg -- move operations -- | SSE2 unaligned move of floating-point vectors | MOVU Format Operand Operand -- | AVX unaligned move of floating-point vectors | VMOVU Format Operand Operand -- | SSE2 move between memory and low-part of an xmm register | MOVL Format Operand Operand -- | SSE move between memory and high-part of an xmm register | MOVH Format Operand Operand -- | SSE2 unaligned move of integer vectors | MOVDQU Format Operand Operand -- | AVX unaligned move of integer vectors | VMOVDQU Format Operand Operand -- logic operations | PXOR Format Operand Reg | VPXOR Format Reg Reg Reg -- Arithmetic | VADD Format Operand Reg Reg | VSUB Format Operand Reg Reg | VMUL Format Operand Reg Reg | VDIV Format Operand Reg Reg -- Shuffle | SHUF Format Imm Operand Reg | VSHUF Format Imm Operand Reg Reg | PSHUFD Format Imm Operand Reg | VPSHUFD Format Imm Operand Reg -- | Move two 32-bit floats from the high part of an xmm register -- to the low part of another xmm register. | MOVHLPS Format Reg Reg | UNPCKL Format Operand Reg | PUNPCKLQDQ Format Operand Reg -- Shift | PSLLDQ Format Operand Reg | PSRLDQ Format Operand Reg -- min/max | MINMAX MinOrMax MinMaxType Format Operand Operand | VMINMAX MinOrMax MinMaxType Format Operand Reg Reg data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2 -- | 'MIN' or 'MAX' data MinOrMax = Min | Max deriving ( Eq, Show ) -- | What kind of min/max operation: signed or unsigned vector integer min/max, -- or (scalar or vector) floating point min/max? data MinMaxType = IntVecMinMax { minMaxSigned :: Bool } | FloatMinMax deriving ( Eq, Show ) data Operand = OpReg Reg -- register | OpImm Imm -- immediate value | OpAddr AddrMode -- memory reference -- NB: As of 2023 we only use the FMA213 permutation. data FMAPermutation = FMA132 | FMA213 | FMA231 -- | Returns which registers are read and written as a (read, written) -- pair. regUsageOfInstr :: Platform -> Instr -> RegUsage regUsageOfInstr platform instr = case instr of MOV fmt src dst -- MOVSS/MOVSD preserve the upper half of vector registers, -- but only for reg-2-reg moves | VecFormat _ sFmt <- fmt , isFloatScalarFormat sFmt , OpReg {} <- src , OpReg {} <- dst -> usageRM fmt src dst -- other MOV instructions zero any remaining upper part of the destination -- (largely to avoid partial register stalls) | otherwise -> usageRW fmt src dst MOVD fmt src dst -> -- NB: MOVD and MOVQ always zero any remaining upper part of destination, -- so the destination is "written" not "modified". usageRW' fmt (movdOutFormat fmt) src dst CMOV _ fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] MOVZxL fmt src dst -> usageRW fmt src dst MOVSxL fmt src dst -> usageRW fmt src dst LEA fmt src dst -> usageRW fmt src dst ADD fmt src dst -> usageRM fmt src dst ADC fmt src dst -> usageRM fmt src dst SUB fmt src dst -> usageRM fmt src dst SBB fmt src dst -> usageRM fmt src dst IMUL fmt src dst -> usageRM fmt src dst -- Result of IMULB will be in just in %ax IMUL2 II8 src -> mkRU (mk II8 eax:use_R II8 src []) [mk II8 eax] -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and -- %ax/%eax/%rax. IMUL2 fmt src -> mkRU (mk fmt eax:use_R fmt src []) [mk fmt eax,mk fmt edx] MUL fmt src dst -> usageRM fmt src dst MUL2 fmt src -> mkRU (mk fmt eax:use_R fmt src []) [mk fmt eax,mk fmt edx] DIV fmt op -> mkRU (mk fmt eax:mk fmt edx:use_R fmt op []) [mk fmt eax, mk fmt edx] IDIV fmt op -> mkRU (mk fmt eax:mk fmt edx:use_R fmt op []) [mk fmt eax, mk fmt edx] ADD_CC fmt src dst -> usageRM fmt src dst SUB_CC fmt src dst -> usageRM fmt src dst AND fmt src dst -> usageRM fmt src dst OR fmt src dst -> usageRM fmt src dst XOR fmt (OpReg src) (OpReg dst) | src == dst -> mkRU [] [mk fmt dst] XOR fmt src dst -> usageRM fmt src dst VXOR fmt (OpReg src1) src2 dst | src1 == src2, src1 == dst -> mkRU [] [mk fmt dst] VXOR fmt src1 src2 dst -> mkRU (use_R fmt src1 [mk fmt src2]) [mk fmt dst] NOT fmt op -> usageM fmt op BSWAP fmt reg -> mkRU [mk fmt reg] [mk fmt reg] NEGI fmt op -> usageM fmt op SHL fmt imm dst -> usageRM fmt imm dst SAR fmt imm dst -> usageRM fmt imm dst SHR fmt imm dst -> usageRM fmt imm dst SHLD fmt imm dst1 dst2 -> usageRMM fmt imm dst1 dst2 SHRD fmt imm dst1 dst2 -> usageRMM fmt imm dst1 dst2 BT fmt _ src -> mkRUR (use_R fmt src []) PUSH fmt op -> mkRUR (use_R fmt op []) POP fmt op -> mkRU [] (def_W fmt op) TEST fmt src dst -> mkRUR (use_R fmt src $! use_R fmt dst []) CMP fmt src dst -> mkRUR (use_R fmt src $! use_R fmt dst []) SETCC _ op -> mkRU [] (def_W II8 op) JXX _ _ -> mkRU [] [] JXX_GBL _ _ -> mkRU [] [] JMP op regs -> mkRU (use_R addrFmt op regs) [] JMP_TBL op _ _ _ -> mkRU (use_R addrFmt op []) [] CALL (Left _) params -> mkRU params (map mkFmt $ callClobberedRegs platform) CALL (Right reg) params -> mkRU (mk addrFmt reg:params) (map mkFmt $ callClobberedRegs platform) CLTD fmt -> mkRU [mk fmt eax] [mk fmt edx] NOP -> mkRU [] [] X87Store _fmt dst -> mkRUR (use_EA dst []) CVTSS2SD src dst -> mkRU [mk FF32 src] [mk FF64 dst] CVTSD2SS src dst -> mkRU [mk FF64 src] [mk FF32 dst] CVTTSS2SIQ fmt src dst -> mkRU (use_R FF32 src []) [mk fmt dst] CVTTSD2SIQ fmt src dst -> mkRU (use_R FF64 src []) [mk fmt dst] CVTSI2SS fmt src dst -> mkRU (use_R fmt src []) [mk FF32 dst] CVTSI2SD fmt src dst -> mkRU (use_R fmt src []) [mk FF64 dst] FDIV fmt src dst -> usageRM fmt src dst SQRT fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst] FETCHGOT reg -> mkRU [] [mk addrFmt reg] FETCHPC reg -> mkRU [] [mk addrFmt reg] COMMENT _ -> noUsage LOCATION{} -> noUsage UNWIND{} -> noUsage DELTA _ -> noUsage POPCNT fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst] LZCNT fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst] TZCNT fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst] BSF fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst] BSR fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst] PDEP fmt src mask dst -> mkRU (use_R fmt src $ use_R fmt mask []) [mk fmt dst] PEXT fmt src mask dst -> mkRU (use_R fmt src $ use_R fmt mask []) [mk fmt dst] FMA3 fmt _ _ src3 src2 dst -> usageFMA fmt src3 src2 dst -- note: might be a better way to do this PREFETCH _ fmt src -> mkRU (use_R fmt src []) [] LOCK i -> regUsageOfInstr platform i XADD fmt src dst -> usageMM fmt src dst CMPXCHG fmt src dst -> usageRMM fmt src dst (OpReg eax) XCHG fmt src dst -> usageMM fmt src (OpReg dst) MFENCE -> noUsage -- vector instructions VBROADCAST fmt src dst -> mkRU (use_R fmt src []) [mk fmt dst] VEXTRACT fmt _off src dst -> usageRW fmt (OpReg src) dst INSERTPS fmt (ImmInt off) src dst -> mkRU ((use_R fmt src []) ++ [mk fmt dst | not doesNotReadDst]) [mk fmt dst] where -- Compute whether the instruction reads the destination register or not. -- Immediate bits: ss_dd_zzzz s = src pos, d = dst pos, z = zeroed components. doesNotReadDst = and [ testBit off i | i <- [0, 1, 2, 3], i /= pos ] -- Check whether the positions in which we are not inserting -- are being zeroed. where pos = ( off `shiftR` 4 ) .&. 0b11 INSERTPS fmt _off src dst -> mkRU ((use_R fmt src []) ++ [mk fmt dst]) [mk fmt dst] VMOVU fmt src dst -> usageRW fmt src dst MOVU fmt src dst -> usageRW fmt src dst MOVL fmt src dst -> usageRM fmt src dst MOVH fmt src dst -> usageRM fmt src dst MOVDQU fmt src dst -> usageRW fmt src dst VMOVDQU fmt src dst -> usageRW fmt src dst PXOR fmt (OpReg src) dst | src == dst -> mkRU [] [mk fmt dst] | otherwise -> mkRU [mk fmt src, mk fmt dst] [mk fmt dst] VPXOR fmt s1 s2 dst | s1 == s2, s1 == dst -> mkRU [] [mk fmt dst] | otherwise -> mkRU [mk fmt s1, mk fmt s2] [mk fmt dst] VADD fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst] VSUB fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst] VMUL fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst] VDIV fmt s1 s2 dst -> mkRU ((use_R fmt s1 []) ++ [mk fmt s2]) [mk fmt dst] SHUF fmt _mask src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] VSHUF fmt _mask src1 src2 dst -> mkRU (use_R fmt src1 [mk fmt src2]) [mk fmt dst] PSHUFD fmt _mask src dst -> mkRU (use_R fmt src []) [mk fmt dst] VPSHUFD fmt _mask src dst -> mkRU (use_R fmt src []) [mk fmt dst] PSLLDQ fmt off dst -> mkRU (use_R fmt off []) [mk fmt dst] MOVHLPS fmt src dst -> mkRU [mk fmt src] [mk fmt dst] UNPCKL fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] PUNPCKLQDQ fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst] MINMAX _ _ fmt src dst -> usageRM fmt src dst VMINMAX _ _ fmt src1 src2 dst -> mkRU (use_R fmt src1 [mk fmt src2]) [mk fmt dst] _other -> panic "regUsage: unrecognised instr" where -- # Definitions -- -- Written: If the operand is a register, it's written. If it's an -- address, registers mentioned in the address are read. -- -- Modified: If the operand is a register, it's both read and -- written. If it's an address, registers mentioned in the address -- are read. -- 2 operand form; first operand Read; second Written usageRW :: HasDebugCallStack => Format -> Operand -> Operand -> RegUsage usageRW fmt op (OpReg reg) = mkRU (use_R fmt op []) [mk fmt reg] usageRW fmt op (OpAddr ea) = mkRUR (use_R fmt op $! use_EA ea []) usageRW _ _ _ = panic "X86.RegInfo.usageRW: no match" usageRW' :: HasDebugCallStack => Format -> Format -> Operand -> Operand -> RegUsage usageRW' fmt1 fmt2 op (OpReg reg) = mkRU (use_R fmt1 op []) [mk fmt2 reg] usageRW' fmt1 _ op (OpAddr ea) = mkRUR (use_R fmt1 op $! use_EA ea []) usageRW' _ _ _ _ = panic "X86.RegInfo.usageRW: no match" -- 2 operand form; first operand Read; second Modified usageRM :: HasDebugCallStack => Format -> Operand -> Operand -> RegUsage usageRM fmt op (OpReg reg) = mkRU (use_R fmt op [mk fmt reg]) [mk fmt reg] usageRM fmt op (OpAddr ea) = mkRUR (use_R fmt op $! use_EA ea []) usageRM _ _ _ = panic "X86.RegInfo.usageRM: no match" -- 2 operand form; first operand Modified; second Modified usageMM :: HasDebugCallStack => Format -> Operand -> Operand -> RegUsage usageMM fmt (OpReg src) (OpReg dst) = mkRU [mk fmt src, mk fmt dst] [mk fmt src, mk fmt dst] usageMM fmt (OpReg src) (OpAddr ea) = mkRU (use_EA ea [mk fmt src]) [mk fmt src] usageMM fmt (OpAddr ea) (OpReg dst) = mkRU (use_EA ea [mk fmt dst]) [mk fmt dst] usageMM _ _ _ = panic "X86.RegInfo.usageMM: no match" -- 3 operand form; first operand Read; second Modified; third Modified usageRMM :: HasDebugCallStack => Format -> Operand -> Operand -> Operand -> RegUsage usageRMM fmt (OpReg src) (OpReg dst) (OpReg reg) = mkRU [mk fmt src, mk fmt dst, mk fmt reg] [mk fmt dst, mk fmt reg] usageRMM fmt (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [mk fmt src, mk fmt reg]) [mk fmt reg] usageRMM _ _ _ _ = panic "X86.RegInfo.usageRMM: no match" -- 3 operand form of FMA instructions. usageFMA :: HasDebugCallStack => Format -> Operand -> Reg -> Reg -> RegUsage usageFMA fmt (OpReg src1) src2 dst = mkRU [mk fmt src1, mk fmt src2, mk fmt dst] [mk fmt dst] usageFMA fmt (OpAddr ea1) src2 dst = mkRU (use_EA ea1 [mk fmt src2, mk fmt dst]) [mk fmt dst] usageFMA _ _ _ _ = panic "X86.RegInfo.usageFMA: no match" -- 1 operand form; operand Modified usageM :: HasDebugCallStack => Format -> Operand -> RegUsage usageM fmt (OpReg reg) = let r' = mk fmt reg in mkRU [r'] [r'] usageM _ (OpAddr ea) = mkRUR (use_EA ea []) usageM _ _ = panic "X86.RegInfo.usageM: no match" -- Registers defd when an operand is written. def_W fmt (OpReg reg) = [mk fmt reg] def_W _ (OpAddr _ ) = [] def_W _ _ = panic "X86.RegInfo.def_W: no match" -- Registers used when an operand is read. use_R :: HasDebugCallStack => Format -> Operand -> [RegWithFormat] -> [RegWithFormat] use_R fmt (OpReg reg) tl = mk fmt reg : tl use_R _ (OpImm _) tl = tl use_R _ (OpAddr ea) tl = use_EA ea tl -- Registers used to compute an effective address. use_EA (ImmAddr _ _) tl = tl use_EA (AddrBaseIndex base index _) tl = use_base base $! use_index index tl where use_base (EABaseReg r) tl = mk addrFmt r : tl use_base _ tl = tl use_index EAIndexNone tl = tl use_index (EAIndex i _) tl = mk addrFmt i : tl mkRUR :: [RegWithFormat] -> RegUsage mkRUR src = mkRU src [] mkRU :: [RegWithFormat] -> [RegWithFormat] -> RegUsage mkRU src dst = src' `seq` dst' `seq` RU src' dst' where src' = filter (interesting platform . regWithFormat_reg) src dst' = filter (interesting platform . regWithFormat_reg) dst addrFmt = archWordFormat (target32Bit platform) mk :: Format -> Reg -> RegWithFormat mk fmt r = RegWithFormat r fmt mkFmt :: Reg -> RegWithFormat mkFmt r = RegWithFormat r $ case targetClassOfReg platform r of RcInteger -> addrFmt RcFloatOrVector -> FF64 -- | Is this register interesting for the register allocator? interesting :: Platform -> Reg -> Bool interesting _ (RegVirtual _) = True interesting platform (RegReal (RealRegSingle i)) = freeReg platform i movdOutFormat :: Format -> Format movdOutFormat format = case format of II32 -> FF32 II64 -> FF64 FF32 -> II32 FF64 -> II64 _ -> pprPanic "X86: improper format for movd/movq" (ppr format) -- | Applies the supplied function to all registers in instructions. -- Typically used to change virtual registers to real registers. patchRegsOfInstr :: HasDebugCallStack => Platform -> Instr -> (Reg -> Reg) -> Instr patchRegsOfInstr platform instr env = case instr of MOV fmt src dst -> MOV fmt (patchOp src) (patchOp dst) MOVD fmt src dst -> patch2 (MOVD fmt) src dst CMOV cc fmt src dst -> CMOV cc fmt (patchOp src) (env dst) MOVZxL fmt src dst -> patch2 (MOVZxL fmt) src dst MOVSxL fmt src dst -> patch2 (MOVSxL fmt) src dst LEA fmt src dst -> patch2 (LEA fmt) src dst ADD fmt src dst -> patch2 (ADD fmt) src dst ADC fmt src dst -> patch2 (ADC fmt) src dst SUB fmt src dst -> patch2 (SUB fmt) src dst SBB fmt src dst -> patch2 (SBB fmt) src dst IMUL fmt src dst -> patch2 (IMUL fmt) src dst IMUL2 fmt src -> patch1 (IMUL2 fmt) src MUL fmt src dst -> patch2 (MUL fmt) src dst MUL2 fmt src -> patch1 (MUL2 fmt) src IDIV fmt op -> patch1 (IDIV fmt) op DIV fmt op -> patch1 (DIV fmt) op ADD_CC fmt src dst -> patch2 (ADD_CC fmt) src dst SUB_CC fmt src dst -> patch2 (SUB_CC fmt) src dst AND fmt src dst -> patch2 (AND fmt) src dst OR fmt src dst -> patch2 (OR fmt) src dst XOR fmt src dst -> patch2 (XOR fmt) src dst VXOR fmt src1 src2 dst -> VXOR fmt (patchOp src1) (env src2) (env dst) NOT fmt op -> patch1 (NOT fmt) op BSWAP fmt reg -> BSWAP fmt (env reg) NEGI fmt op -> patch1 (NEGI fmt) op SHL fmt imm dst -> patch1 (SHL fmt imm) dst SAR fmt imm dst -> patch1 (SAR fmt imm) dst SHR fmt imm dst -> patch1 (SHR fmt imm) dst SHLD fmt imm dst1 dst2 -> patch2 (SHLD fmt imm) dst1 dst2 SHRD fmt imm dst1 dst2 -> patch2 (SHRD fmt imm) dst1 dst2 BT fmt imm src -> patch1 (BT fmt imm) src TEST fmt src dst -> patch2 (TEST fmt) src dst CMP fmt src dst -> patch2 (CMP fmt) src dst PUSH fmt op -> patch1 (PUSH fmt) op POP fmt op -> patch1 (POP fmt) op SETCC cond op -> patch1 (SETCC cond) op JMP op regs -> JMP (patchOp op) regs JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl FMA3 fmt perm var x1 x2 x3 -> patch3 (FMA3 fmt perm var) x1 x2 x3 -- literally only support storing the top x87 stack value st(0) X87Store fmt dst -> X87Store fmt (lookupAddr dst) CVTSS2SD src dst -> CVTSS2SD (env src) (env dst) CVTSD2SS src dst -> CVTSD2SS (env src) (env dst) CVTTSS2SIQ fmt src dst -> CVTTSS2SIQ fmt (patchOp src) (env dst) CVTTSD2SIQ fmt src dst -> CVTTSD2SIQ fmt (patchOp src) (env dst) CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst) CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst) FDIV fmt src dst -> FDIV fmt (patchOp src) (patchOp dst) SQRT fmt src dst -> SQRT fmt (patchOp src) (env dst) CALL (Left _) _ -> instr CALL (Right reg) p -> CALL (Right (env reg)) p FETCHGOT reg -> FETCHGOT (env reg) FETCHPC reg -> FETCHPC (env reg) NOP -> instr COMMENT _ -> instr LOCATION {} -> instr UNWIND {} -> instr DELTA _ -> instr LDATA {} -> instr NEWBLOCK {} -> instr JXX _ _ -> instr JXX_GBL _ _ -> instr CLTD _ -> instr POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst) LZCNT fmt src dst -> LZCNT fmt (patchOp src) (env dst) TZCNT fmt src dst -> TZCNT fmt (patchOp src) (env dst) PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst) PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst) BSF fmt src dst -> BSF fmt (patchOp src) (env dst) BSR fmt src dst -> BSR fmt (patchOp src) (env dst) PREFETCH lvl format src -> PREFETCH lvl format (patchOp src) LOCK i -> LOCK (patchRegsOfInstr platform i env) XADD fmt src dst -> patch2 (XADD fmt) src dst CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst XCHG fmt src dst -> XCHG fmt (patchOp src) (env dst) MFENCE -> instr -- vector instructions VBROADCAST fmt src dst -> VBROADCAST fmt (patchOp src) (env dst) VEXTRACT fmt off src dst -> VEXTRACT fmt off (env src) (patchOp dst) INSERTPS fmt off src dst -> INSERTPS fmt off (patchOp src) (env dst) VMOVU fmt src dst -> VMOVU fmt (patchOp src) (patchOp dst) MOVU fmt src dst -> MOVU fmt (patchOp src) (patchOp dst) MOVL fmt src dst -> MOVL fmt (patchOp src) (patchOp dst) MOVH fmt src dst -> MOVH fmt (patchOp src) (patchOp dst) MOVDQU fmt src dst -> MOVDQU fmt (patchOp src) (patchOp dst) VMOVDQU fmt src dst -> VMOVDQU fmt (patchOp src) (patchOp dst) PXOR fmt src dst -> PXOR fmt (patchOp src) (env dst) VPXOR fmt s1 s2 dst -> VPXOR fmt (env s1) (env s2) (env dst) VADD fmt s1 s2 dst -> VADD fmt (patchOp s1) (env s2) (env dst) VSUB fmt s1 s2 dst -> VSUB fmt (patchOp s1) (env s2) (env dst) VMUL fmt s1 s2 dst -> VMUL fmt (patchOp s1) (env s2) (env dst) VDIV fmt s1 s2 dst -> VDIV fmt (patchOp s1) (env s2) (env dst) SHUF fmt off src dst -> SHUF fmt off (patchOp src) (env dst) VSHUF fmt off src1 src2 dst -> VSHUF fmt off (patchOp src1) (env src2) (env dst) PSHUFD fmt off src dst -> PSHUFD fmt off (patchOp src) (env dst) VPSHUFD fmt off src dst -> VPSHUFD fmt off (patchOp src) (env dst) PSLLDQ fmt off dst -> PSLLDQ fmt (patchOp off) (env dst) PSRLDQ fmt off dst -> PSRLDQ fmt (patchOp off) (env dst) MOVHLPS fmt src dst -> MOVHLPS fmt (env src) (env dst) UNPCKL fmt src dst -> UNPCKL fmt (patchOp src) (env dst) PUNPCKLQDQ fmt src dst -> PUNPCKLQDQ fmt (patchOp src) (env dst) MINMAX minMax ty fmt src dst -> MINMAX minMax ty fmt (patchOp src) (patchOp dst) VMINMAX minMax ty fmt src1 src2 dst -> VMINMAX minMax ty fmt (patchOp src1) (env src2) (env dst) where patch1 :: (Operand -> a) -> Operand -> a patch1 insn op = insn $! patchOp op patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a patch2 insn src dst = (insn $! patchOp src) $! patchOp dst patch3 :: (Operand -> Reg -> Reg -> a) -> Operand -> Reg -> Reg -> a patch3 insn src1 src2 dst = ((insn $! patchOp src1) $! env src2) $! env dst patchOp (OpReg reg) = OpReg $! env reg patchOp (OpImm imm) = OpImm imm patchOp (OpAddr ea) = OpAddr $! lookupAddr ea lookupAddr (ImmAddr imm off) = ImmAddr imm off lookupAddr (AddrBaseIndex base index disp) = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp where lookupBase EABaseNone = EABaseNone lookupBase EABaseRip = EABaseRip lookupBase (EABaseReg r) = EABaseReg $! env r lookupIndex EAIndexNone = EAIndexNone lookupIndex (EAIndex r i) = (EAIndex $! env r) i -------------------------------------------------------------------------------- isJumpishInstr :: Instr -> Bool isJumpishInstr instr = case instr of JMP{} -> True JXX{} -> True JXX_GBL{} -> True JMP_TBL{} -> True CALL{} -> True _ -> False canFallthroughTo :: Instr -> BlockId -> Bool canFallthroughTo insn bid = case insn of JXX _ target -> bid == target JMP_TBL _ targets _ _ -> all isTargetBid targets _ -> False where isTargetBid target = case target of Nothing -> True Just (DestBlockId target) -> target == bid _ -> False jumpDestsOfInstr :: Instr -> [BlockId] jumpDestsOfInstr insn = case insn of JXX _ id -> [id] JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids] _ -> [] patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr patchJumpInstr insn patchF = case insn of JXX cc id -> JXX cc (patchF id) JMP_TBL op ids section lbl -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl _ -> insn where patchJumpDest f (DestBlockId id) = DestBlockId (f id) patchJumpDest _ dest = dest -- ----------------------------------------------------------------------------- -- | Make a spill instruction. mkSpillInstr :: HasDebugCallStack => NCGConfig -> RegWithFormat -- register to spill -> Int -- current stack delta -> Int -- spill slot to use -> [Instr] mkSpillInstr config (RegWithFormat reg fmt) delta slot = [ movInstr config fmt' (OpReg reg) (OpAddr (spRel platform off)) ] where fmt' | isVecFormat fmt = fmt | otherwise = scalarMoveFormat platform fmt -- Spill the platform word size, at a minimum platform = ncgPlatform config off = spillSlotToOffset platform slot - delta -- | Make a spill reload instruction. mkLoadInstr :: HasDebugCallStack => NCGConfig -> RegWithFormat -- register to load -> Int -- current stack delta -> Int -- spill slot to use -> [Instr] mkLoadInstr config (RegWithFormat reg fmt) delta slot = [ movInstr config fmt' (OpAddr (spRel platform off)) (OpReg reg) ] where fmt' | isVecFormat fmt = fmt | otherwise = scalarMoveFormat platform fmt -- Load the platform word size, at a minimum platform = ncgPlatform config off = spillSlotToOffset platform slot - delta -- | A move instruction for moving the entire contents of an operand -- at the given 'Format'. movInstr :: HasDebugCallStack => NCGConfig -> Format -> (Operand -> Operand -> Instr) movInstr config fmt = case fmt of VecFormat _ sFmt -> case formatToWidth fmt of W512 -> if avx512f then avx_move sFmt else sorry "512-bit wide vectors require -mavx512f" W256 -> if avx2 then avx_move sFmt else sorry "256-bit wide vectors require -mavx2" W128 -> if avx -- Prefer AVX instructions over SSE when available -- (usually results in better performance). then avx_move sFmt else sse_move sFmt w -> sorry $ "Unhandled SIMD vector width: " ++ show w ++ " bits" _ -> MOV fmt where assertCompatibleRegs :: ( Operand -> Operand -> Instr ) -> Operand -> Operand -> Instr assertCompatibleRegs f | debugIsOn = \ op1 op2 -> if | OpReg r1 <- op1 , OpReg r2 <- op2 , targetClassOfReg plat r1 /= targetClassOfReg plat r2 -> assertPpr False ( vcat [ text "movInstr: move between incompatible registers" , text "fmt:" <+> ppr fmt , text "r1:" <+> ppr r1 , text "r2:" <+> ppr r2 ] ) f op1 op2 | otherwise -> f op1 op2 | otherwise = f plat = ncgPlatform config avx = ncgAvxEnabled config avx2 = ncgAvx2Enabled config avx512f = ncgAvx512fEnabled config avx_move sFmt = if isFloatScalarFormat sFmt then assertCompatibleRegs $ VMOVU fmt else VMOVDQU fmt sse_move sFmt = if isFloatScalarFormat sFmt then assertCompatibleRegs $ MOVU fmt else MOVDQU fmt -- NB: we are using {V}MOVU and not {V}MOVA, because we have no guarantees -- about the stack being sufficiently aligned (even for even numbered stack slots). -- -- (Ben Gamari told me that using MOVA instead of MOVU does not make a -- difference in practice when moving between registers.) spillSlotSize :: Platform -> Int spillSlotSize platform | target32Bit platform = 12 | otherwise = 8 maxSpillSlots :: NCGConfig -> Int maxSpillSlots config = ((ncgSpillPreallocSize config - 64) `div` spillSlotSize (ncgPlatform config)) - 1 -- = 0 -- useful for testing allocMoreStack -- number of bytes that the stack pointer should be aligned to stackAlign :: Int stackAlign = 16 -- convert a spill slot number to a *byte* offset, with no sign: -- decide on a per arch basis whether you are spilling above or below -- the C stack pointer. spillSlotToOffset :: Platform -> Int -> Int spillSlotToOffset platform slot = 64 + spillSlotSize platform * slot -------------------------------------------------------------------------------- -- | See if this instruction is telling us the current C stack delta takeDeltaInstr :: Instr -> Maybe Int takeDeltaInstr instr = case instr of DELTA i -> Just i _ -> Nothing isMetaInstr :: Instr -> Bool isMetaInstr instr = case instr of COMMENT{} -> True LOCATION{} -> True LDATA{} -> True NEWBLOCK{} -> True UNWIND{} -> True DELTA{} -> True _ -> False -- | Make a reg-reg move instruction. mkRegRegMoveInstr :: HasDebugCallStack => NCGConfig -> Format -> Reg -> Reg -> Instr mkRegRegMoveInstr config fmt src dst = movInstr config fmt' (OpReg src) (OpReg dst) -- Move the platform word size, at a minimum. -- -- This ensures the upper part of the register is properly cleared -- and avoids partial register stalls. -- -- See also the 'ArithInt8' and 'ArithWord8' tests, -- which fail without this logic. where platform = ncgPlatform config fmt' | isVecFormat fmt = fmt | otherwise = scalarMoveFormat platform fmt scalarMoveFormat :: Platform -> Format -> Format scalarMoveFormat platform fmt | isFloatFormat fmt = FF64 | II64 <- fmt = II64 | otherwise = archWordFormat (target32Bit platform) -- | Check whether an instruction represents a reg-reg move. -- The register allocator attempts to eliminate reg->reg moves whenever it can, -- by assigning the src and dest temporaries to the same real register. -- takeRegRegMoveInstr :: Platform -> Instr -> Maybe (Reg,Reg) takeRegRegMoveInstr platform = \case MOV fmt (OpReg r1) (OpReg r2) -- When used with vector registers, MOV only moves the lower part, -- so it is not a real move. For example, MOVSS/MOVSD between xmm registers -- preserves the upper half, and MOVQ between xmm registers zeroes the upper half. | not $ isVecFormat fmt -- Don't eliminate a move between e.g. RAX and XMM: -- even though we might be using XMM to store a scalar integer value, -- some instructions only support XMM registers. , targetClassOfReg platform r1 == targetClassOfReg platform r2 -> Just (r1, r2) MOVD {} -- MOVD moves between xmm registers and general-purpose registers, -- and we don't want to eliminate those moves (as noted for MOV). -> Nothing -- SSE2/AVX move instructions always move the full register. MOVU _ (OpReg r1) (OpReg r2) -> Just (r1, r2) VMOVU _ (OpReg r1) (OpReg r2) -> Just (r1, r2) MOVDQU _ (OpReg r1) (OpReg r2) -> Just (r1, r2) VMOVDQU _ (OpReg r1) (OpReg r2) -> Just (r1, r2) -- TODO: perhaps we can eliminate MOVZxL in certain situations? MOVZxL {} -> Nothing MOVSxL {} -> Nothing -- MOVL, MOVH and MOVHLPS preserve some part of the destination register, -- so are not simple moves. MOVL {} -> Nothing MOVH {} -> Nothing MOVHLPS {} -> Nothing -- Other instructions are not moves. _ -> Nothing -- | Make an unconditional branch instruction. mkJumpInstr :: BlockId -> [Instr] mkJumpInstr id = [JXX ALWAYS id] -- Note [Windows stack layout] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- | On most OSes the kernel will place a guard page after the current stack -- page. If you allocate larger than a page worth you may jump over this -- guard page. Not only is this a security issue, but on certain OSes such -- as Windows a new page won't be allocated if you don't hit the guard. This -- will cause a segfault or access fault. -- -- This function defines if the current allocation amount requires a probe. -- On Windows (for now) we emit a call to _chkstk for this. For other OSes -- this is not yet implemented. -- See https://docs.microsoft.com/en-us/windows/desktop/DevNotes/-win32-chkstk -- The Windows stack looks like this: -- -- +-------------------+ -- | SP | -- +-------------------+ -- | | -- | GUARD PAGE | -- | | -- +-------------------+ -- | | -- | | -- | UNMAPPED | -- | | -- | | -- +-------------------+ -- -- In essence each allocation larger than a page size needs to be chunked and -- a probe emitted after each page allocation. You have to hit the guard -- page so the kernel can map in the next page, otherwise you'll segfault. -- See Note [Windows stack allocations]. -- needs_probe_call :: Platform -> Int -> Bool needs_probe_call platform amount = case platformOS platform of OSMinGW32 -> case platformArch platform of ArchX86_64 -> amount > (4 * 1024) _ -> False _ -> False mkStackAllocInstr :: Platform -> Int -> [Instr] mkStackAllocInstr platform amount = case platformOS platform of OSMinGW32 -> -- These will clobber AX but this should be ok because -- -- 1. It is the first thing we do when entering the closure and AX is -- a caller saved registers on Windows both on x86_64 and x86. -- -- 2. The closures are only entered via a call or longjmp in which case -- there are no expectations for volatile registers. -- -- 3. When the target is a local branch point it is re-targeted -- after the dealloc, preserving #2. See Note [extra spill slots]. -- -- We emit a call because the stack probes are quite involved and -- would bloat code size a lot. GHC doesn't really have an -Os. -- ___chkstk is guaranteed to leave all nonvolatile registers and AX -- untouched. It's part of the standard prologue code for any Windows -- function dropping the stack more than a page. -- See Note [Windows stack layout] case platformArch platform of ArchX86_64 | needs_probe_call platform amount -> [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax) , CALL (Left $ strImmLit (fsLit "___chkstk_ms")) [RegWithFormat rax II64] , SUB II64 (OpReg rax) (OpReg rsp) ] | otherwise -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) , TEST II64 (OpReg rsp) (OpReg rsp) ] _ -> panic "X86.mkStackAllocInstr" _ -> case platformArch platform of ArchX86 -> [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) ] ArchX86_64 -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) ] _ -> panic "X86.mkStackAllocInstr" mkStackDeallocInstr :: Platform -> Int -> [Instr] mkStackDeallocInstr platform amount = case platformArch platform of ArchX86 -> [ADD II32 (OpImm (ImmInt amount)) (OpReg esp)] ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)] _ -> panic "X86.mkStackDeallocInstr" -- Note [extra spill slots] -- ~~~~~~~~~~~~~~~~~~~~~~~~ -- If the register allocator used more spill slots than we have -- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more -- C stack space on entry and exit from this proc. Therefore we -- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp" -- before every non-local jump. -- -- This became necessary when the new codegen started bundling entire -- functions together into one proc, because the register allocator -- assigns a different stack slot to each virtual reg within a proc. -- To avoid using so many slots we could also: -- -- - split up the proc into connected components before code generator -- -- - rename the virtual regs, so that we re-use vreg names and hence -- stack slots for non-overlapping vregs. -- -- Note that when a block is both a non-local entry point (with an -- info table) and a local branch target, we have to split it into -- two, like so: -- -- -- L: -- -- -- becomes -- -- -- L: -- subl $rsp, N -- jmp Lnew -- Lnew: -- -- -- and all branches pointing to L are retargetted to point to Lnew. -- Otherwise, we would repeat the $rsp adjustment for each branch to -- L. -- -- Returns a list of (L,Lnew) pairs. -- allocMoreStack :: Platform -> Int -> NatCmmDecl statics GHC.CmmToAsm.X86.Instr.Instr -> UniqDSM (NatCmmDecl statics GHC.CmmToAsm.X86.Instr.Instr, [(BlockId,BlockId)]) allocMoreStack _ _ top@(CmmData _ _) = return (top,[]) allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do let entries = entryBlocks proc retargetList <- mapM (\e -> (e,) <$> newBlockId) entries let delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up where x = slots * spillSlotSize platform -- sp delta alloc = mkStackAllocInstr platform delta dealloc = mkStackDeallocInstr platform delta new_blockmap :: LabelMap BlockId new_blockmap = mapFromList retargetList insert_stack_insns (BasicBlock id insns) | Just new_blockid <- mapLookup id new_blockmap = [ BasicBlock id $ alloc ++ [JXX ALWAYS new_blockid] , BasicBlock new_blockid block' ] | otherwise = [ BasicBlock id block' ] where block' = foldr insert_dealloc [] insns insert_dealloc insn r = case insn of JMP _ _ -> dealloc ++ (insn : r) JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL" _other -> patchJumpInstr insn retarget : r where retarget b = fromMaybe b (mapLookup b new_blockmap) new_code = concatMap insert_stack_insns code -- in return (CmmProc info lbl live (ListGraph new_code), retargetList) data JumpDest = DestBlockId BlockId | DestImm Imm -- Debug Instance instance Outputable JumpDest where ppr (DestBlockId bid) = text "jd:" <> ppr bid ppr (DestImm _imm) = text "jd:noShow" -- Implementations of the methods of 'NgcImpl' getJumpDestBlockId :: JumpDest -> Maybe BlockId getJumpDestBlockId (DestBlockId bid) = Just bid getJumpDestBlockId _ = Nothing canShortcut :: Instr -> Maybe JumpDest canShortcut (JXX ALWAYS id) = Just (DestBlockId id) canShortcut (JMP (OpImm imm) _) = Just (DestImm imm) canShortcut _ = Nothing -- This helper shortcuts a sequence of branches. -- The blockset helps avoid following cycles. shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn where shortcutJump' :: (BlockId -> Maybe JumpDest) -> LabelSet -> Instr -> Instr shortcutJump' fn seen insn@(JXX cc id) = if setMember id seen then insn else case fn id of Nothing -> insn Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id') Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm) where seen' = setInsert id seen shortcutJump' fn _ (JMP_TBL addr blocks section tblId) = let updateBlock (Just (DestBlockId bid)) = case fn bid of Nothing -> Just (DestBlockId bid ) Just dest -> Just dest updateBlock dest = dest blocks' = map updateBlock blocks in JMP_TBL addr blocks' section tblId shortcutJump' _ _ other = other -- Here because it knows about JumpDest shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, RawCmmStatics) -> (Alignment, RawCmmStatics) shortcutStatics fn (align, CmmStaticsRaw lbl statics) = (align, CmmStaticsRaw lbl $ map (shortcutStatic fn) statics) -- we need to get the jump tables, so apply the mapping to the entries -- of a CmmData too. shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel shortcutLabel fn lab | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn emptyUniqueSet blkId | otherwise = lab shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic shortcutStatic fn (CmmStaticLit (CmmLabel lab)) = CmmStaticLit (CmmLabel (shortcutLabel fn lab)) shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w)) = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w) -- slightly dodgy, we're ignoring the second label, but this -- works with the way we use CmmLabelDiffOff for jump tables now. shortcutStatic _ other_static = other_static shortBlockId :: (BlockId -> Maybe JumpDest) -> UniqueSet -> BlockId -> CLabel shortBlockId fn seen blockid = case (memberUniqueSet uq seen, fn blockid) of (True, _) -> blockLbl blockid (_, Nothing) -> blockLbl blockid (_, Just (DestBlockId blockid')) -> shortBlockId fn (insertUniqueSet uq seen) blockid' (_, Just (DestImm (ImmCLbl lbl))) -> lbl (_, _other) -> panic "shortBlockId" where uq = getUnique blockid