commit fa31e8f4a0f853848d96549a429083941877bf8d Author: Sergei Trofimovich Date: Sun Dec 14 14:30:12 2014 +0000 powerpc: fix and enable shared libraries by default on linux Summary: And fix things all the way down to it. Namely: - remove 'r30' from free registers, it's an .LCTOC1 register for gcc. generated .plt stubs expect it to be initialised. - fix PicBase computation, which originally forgot to use 'tmp' reg in 'initializePicBase_ppc.fetchPC' - mark 'ForeighTarget's as implicitly using 'PicBase' register (see comment for details) - add 64-bit MO_Sub and test on alloclimit3/4 regtests - fix dynamic label offsets to match with .LCTOC1 offset Signed-off-by: Sergei Trofimovich Test Plan: validate passes equal amount of vanilla/dyn tests Reviewers: simonmar, erikd, austin Reviewed By: erikd, austin Subscribers: carter, thomie Differential Revision: https://phabricator.haskell.org/D560 GHC Trac Issues: #8024, #9831 diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index 0f2c0ae..37b8ada 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -1170,7 +1170,8 @@ pprDynamicLinkerAsmLabel platform dllInfo lbl else if osElfTarget (platformOS platform) then if platformArch platform == ArchPPC then case dllInfo of - CodeStub -> ppr lbl <> text "@plt" + CodeStub -> -- See Note [.LCTOC1 in PPC PIC code] + ppr lbl <> text "+32768@plt" SymbolPtr -> text ".LC_" <> ppr lbl _ -> panic "pprDynamicLinkerAsmLabel" else if platformArch platform == ArchX86_64 diff --git a/compiler/nativeGen/PIC.hs b/compiler/nativeGen/PIC.hs index 9b5c080..6326a8b 100644 --- a/compiler/nativeGen/PIC.hs +++ b/compiler/nativeGen/PIC.hs @@ -54,7 +54,6 @@ import qualified X86.Instr as X86 import Platform import Instruction -import Size import Reg import NCGMonad @@ -468,11 +467,8 @@ pprGotDeclaration dflags ArchX86 OSDarwin pprGotDeclaration _ _ OSDarwin = empty --- pprGotDeclaration +-- Emit GOT declaration -- Output whatever needs to be output once per .s file. --- The .LCTOC1 label is defined to point 32768 bytes into the table, --- to make the most of the PPC's 16-bit displacements. --- Only needed for PIC. pprGotDeclaration dflags arch os | osElfTarget os , arch /= ArchPPC_64 @@ -482,6 +478,7 @@ pprGotDeclaration dflags arch os | osElfTarget os , arch /= ArchPPC_64 = vcat [ + -- See Note [.LCTOC1 in PPC PIC code] ptext (sLit ".section \".got2\",\"aw\""), ptext (sLit ".LCTOC1 = .+32768") ] @@ -688,12 +685,7 @@ pprImportedSymbol _ _ _ -- Get a pointer to our own fake GOT, which is defined on a per-module basis. --- This is exactly how GCC does it, and it's quite horrible: --- We first fetch the address of a local label (mkPicBaseLabel). --- Then we add a 16-bit offset to that to get the address of a .long that we --- define in .text space right next to the proc. This .long literal contains --- the (32-bit) offset from our local label to our global offset table --- (.LCTOC1 aka gotOffLabel). +-- This is exactly how GCC does it in linux. initializePicBase_ppc :: Arch -> OS -> Reg @@ -704,18 +696,9 @@ initializePicBase_ppc ArchPPC os picReg (CmmProc info lab live (ListGraph blocks) : statics) | osElfTarget os = do - dflags <- getDynFlags - gotOffLabel <- getNewLabelNat - tmp <- getNewRegNat $ intSize (wordWidth dflags) let - gotOffset = CmmData Text $ Statics gotOffLabel [ - CmmStaticLit (CmmLabelDiffOff gotLabel - mkPicBaseLabel - 0) - ] - offsetToOffset - = PPC.ImmConstantDiff - (PPC.ImmCLbl gotOffLabel) + gotOffset = PPC.ImmConstantDiff + (PPC.ImmCLbl gotLabel) (PPC.ImmCLbl mkPicBaseLabel) blocks' = case blocks of @@ -726,15 +709,23 @@ initializePicBase_ppc ArchPPC os picReg | bID `mapMember` info = fetchPC b | otherwise = b + -- GCC does PIC prologs thusly: + -- bcl 20,31,.L1 + -- .L1: + -- mflr 30 + -- addis 30,30,.LCTOC1-.L1@ha + -- addi 30,30,.LCTOC1-.L1@l + -- TODO: below we use it over temporary register, + -- it can and should be optimised by picking + -- correct PIC reg. fetchPC (BasicBlock bID insns) = BasicBlock bID (PPC.FETCHPC picReg - : PPC.ADDIS tmp picReg (PPC.HI offsetToOffset) - : PPC.LD PPC.archWordSize tmp - (PPC.AddrRegImm tmp (PPC.LO offsetToOffset)) - : PPC.ADD picReg picReg (PPC.RIReg picReg) + : PPC.ADDIS picReg picReg (PPC.HA gotOffset) + : PPC.ADDI picReg picReg (PPC.LO gotOffset) + : PPC.MR PPC.r30 picReg : insns) - return (CmmProc info lab live (ListGraph blocks') : gotOffset : statics) + return (CmmProc info lab live (ListGraph blocks') : statics) initializePicBase_ppc ArchPPC OSDarwin picReg diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index ddf483a..c1c4a74 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -54,7 +54,7 @@ import Outputable import Unique import DynFlags -import Control.Monad ( mapAndUnzipM ) +import Control.Monad ( mapAndUnzipM, when ) import Data.Bits import Data.Word @@ -355,6 +355,19 @@ iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do ADDE rhi r1hi r2hi ] return (ChildCode64 code rlo) +iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do + ChildCode64 code1 r1lo <- iselExpr64 e1 + ChildCode64 code2 r2lo <- iselExpr64 e2 + (rlo,rhi) <- getNewRegPairNat II32 + let + r1hi = getHiVRegFromLo r1lo + r2hi = getHiVRegFromLo r2lo + code = code1 `appOL` + code2 `appOL` + toOL [ SUBFC rlo r2lo r1lo, + SUBFE rhi r2hi r1hi ] + return (ChildCode64 code rlo) + iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do (expr_reg,expr_code) <- getSomeReg expr (rlo, rhi) <- getNewRegPairNat II32 @@ -918,8 +931,12 @@ genCCall' dflags gcp target dest_regs args0 (toOL []) [] (labelOrExpr, reduceToFF32) <- case target of - ForeignTarget (CmmLit (CmmLabel lbl)) _ -> return (Left lbl, False) - ForeignTarget expr _ -> return (Right expr, False) + ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do + uses_pic_base_implicitly + return (Left lbl, False) + ForeignTarget expr _ -> do + uses_pic_base_implicitly + return (Right expr, False) PrimTarget mop -> outOfLineMachOp mop let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode @@ -940,6 +957,13 @@ genCCall' dflags gcp target dest_regs args0 where platform = targetPlatform dflags + uses_pic_base_implicitly = do + -- See Note [implicit register in PPC PIC code] + -- on why we claim to use PIC register here + when (gopt Opt_PIC dflags) $ do + _ <- getPicBaseNat archWordSize + return () + initialStackOffset = case gcp of GCPDarwin -> 24 GCPLinux -> 8 @@ -1431,3 +1455,21 @@ coerceFP2Int _ toRep x = do -- read low word of value (high word is undefined) LD II32 dst (spRel dflags 3)] return (Any (intSize toRep) code') + +-- Note [.LCTOC1 in PPC PIC code] +-- The .LCTOC1 label is defined to point 32768 bytes into the GOT table +-- to make the most of the PPC's 16-bit displacements. +-- As 16-bit signed offset is used (usually via addi/lwz instructions) +-- first element will have '-32768' offset against .LCTOC1. + +-- Note [implicit register in PPC PIC code] +-- PPC generates calls by labels in assembly +-- in form of: +-- bl puts+32768@plt +-- in this form it's not seen directly (by GHC NCG) +-- that r30 (PicBaseReg) is used, +-- but r30 is a required part of PLT code setup: +-- puts+32768@plt: +-- lwz r11,-30484(r30) ; offset in .LCTOC1 +-- mtctr r11 +-- bctr diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs index f5b9506..b7081f9 100644 --- a/compiler/nativeGen/PPC/Instr.hs +++ b/compiler/nativeGen/PPC/Instr.hs @@ -205,8 +205,11 @@ data Instr | ADD Reg Reg RI -- dst, src1, src2 | ADDC Reg Reg Reg -- (carrying) dst, src1, src2 | ADDE Reg Reg Reg -- (extend) dst, src1, src2 + | ADDI Reg Reg Imm -- Add Immediate dst, src1, src2 | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2 | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1 + | SUBFC Reg Reg Reg -- (carrying) dst, src1, src2 ; dst = src2 - src1 + | SUBFE Reg Reg Reg -- (extend) dst, src1, src2 ; dst = src2 - src1 | MULLW Reg Reg RI | DIVW Reg Reg Reg | DIVWU Reg Reg Reg @@ -284,8 +287,11 @@ ppc_regUsageOfInstr platform instr ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) ADDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) ADDE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + ADDI reg1 reg2 _ -> usage ([reg2], [reg1]) ADDIS reg1 reg2 _ -> usage ([reg2], [reg1]) SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + SUBFC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) DIVW reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) DIVWU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) @@ -358,8 +364,11 @@ ppc_patchRegsOfInstr instr env ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri) ADDC reg1 reg2 reg3 -> ADDC (env reg1) (env reg2) (env reg3) ADDE reg1 reg2 reg3 -> ADDE (env reg1) (env reg2) (env reg3) + ADDI reg1 reg2 imm -> ADDI (env reg1) (env reg2) imm ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3) + SUBFC reg1 reg2 reg3 -> SUBFC (env reg1) (env reg2) (env reg3) + SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3) MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri) DIVW reg1 reg2 reg3 -> DIVW (env reg1) (env reg2) (env reg3) DIVWU reg1 reg2 reg3 -> DIVWU (env reg1) (env reg2) (env reg3) diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs index 6851769..f59d51f 100644 --- a/compiler/nativeGen/PPC/Ppr.hs +++ b/compiler/nativeGen/PPC/Ppr.hs @@ -525,6 +525,16 @@ pprInstr (BCTRL _) = hcat [ ptext (sLit "bctrl") ] pprInstr (ADD reg1 reg2 ri) = pprLogic (sLit "add") reg1 reg2 ri +pprInstr (ADDI reg1 reg2 imm) = hcat [ + char '\t', + ptext (sLit "addi"), + char '\t', + pprReg reg1, + ptext (sLit ", "), + pprReg reg2, + ptext (sLit ", "), + pprImm imm + ] pprInstr (ADDIS reg1 reg2 imm) = hcat [ char '\t', ptext (sLit "addis"), @@ -539,6 +549,8 @@ pprInstr (ADDIS reg1 reg2 imm) = hcat [ pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3) pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3) pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3) +pprInstr (SUBFC reg1 reg2 reg3) = pprLogic (sLit "subfc") reg1 reg2 (RIReg reg3) +pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3) pprInstr (MULLW reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mullw") reg1 reg2 ri pprInstr (MULLW reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri pprInstr (DIVW reg1 reg2 reg3) = pprLogic (sLit "divw") reg1 reg2 (RIReg reg3) diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs index 0f636bf..69e69c0 100644 --- a/compiler/nativeGen/PPC/Regs.hs +++ b/compiler/nativeGen/PPC/Regs.hs @@ -37,7 +37,8 @@ module PPC.Regs ( fits16Bits, makeImmediate, fReg, - sp, r3, r4, r27, r28, f1, f20, f21, + sp, r3, r4, r27, r28, r30, + f1, f20, f21, allocatableRegs @@ -295,12 +296,13 @@ point registers. fReg :: Int -> RegNo fReg x = (32 + x) -sp, r3, r4, r27, r28, f1, f20, f21 :: Reg +sp, r3, r4, r27, r28, r30, f1, f20, f21 :: Reg sp = regSingle 1 r3 = regSingle 3 r4 = regSingle 4 r27 = regSingle 27 r28 = regSingle 28 +r30 = regSingle 30 f1 = regSingle $ fReg 1 f20 = regSingle $ fReg 20 f21 = regSingle $ fReg 21 diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs index 9916e0e..1d46a01 100644 --- a/includes/CodeGen.Platform.hs +++ b/includes/CodeGen.Platform.hs @@ -881,6 +881,8 @@ freeReg 1 = fastBool False -- The Stack Pointer # if !MACHREGS_darwin -- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that freeReg 2 = fastBool False +-- at least linux in -fPIC relies on r30 in PLT stubs +freeReg 30 = fastBool False # endif # ifdef REG_Base freeReg REG_Base = fastBool False diff --git a/mk/config.mk.in b/mk/config.mk.in index 0f5820f..8f134bc 100644 --- a/mk/config.mk.in +++ b/mk/config.mk.in @@ -95,7 +95,7 @@ TargetElf = YES endif # Some platforms don't support shared libraries -NoSharedLibsPlatformList = powerpc-unknown-linux \ +NoSharedLibsPlatformList = \ x86_64-unknown-mingw32 \ i386-unknown-mingw32