From 1f638bdc3fee95e7aaf094262c32f4d33a12a244 Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Thu, 8 May 2025 22:27:47 +0800 Subject: [PATCH 1/7] [X86][APX] Fix issues of suppressing APX for relocation 1. There is ADD64rm_ND instruction emitted with GOTPCREL relocation. Handled it in "Suppress APX for relocation" pass and transformed it to ADD64rm with register operand in non-rex2 register class. The relocation type R_X86_64_CODE_6_GOTPCRELX will be added later for APX enabled with relocation. 2. The register class for operands in instruction with relocation is updated to non-rex2 one in "Suppress APX for relocation" pass, but it may be updated/recomputed to larger register class (like GR64_NOREX2RegClass to GR64RegClass). Fixed by not updating the register class if it's non-rex2 register class and APX support for relocation is disabled. 3. After "Suppress APX for relocation" pass, the instruction with relocation may be folded with add NDD instruction to a add NDD instruction with relocation. The later will be emitted to instruction with APX relocation type which breaks backward compatibility. Fixed by not folding instruction with GOTPCREL relocation with NDD instruction. --- llvm/lib/Target/X86/X86InstrInfo.cpp | 8 + llvm/lib/Target/X86/X86InstrInfo.h | 34 ++++ llvm/lib/Target/X86/X86RegisterInfo.cpp | 22 +++ llvm/lib/Target/X86/X86RegisterInfo.h | 2 + .../lib/Target/X86/X86SuppressAPXForReloc.cpp | 3 +- llvm/test/CodeGen/X86/apx/reloc-regclass.ll | 187 ++++++++++++++++++ llvm/test/CodeGen/X86/apx/reloc.mir | 30 ++- 7 files changed, 283 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/X86/apx/reloc-regclass.ll diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 5220ae2e67bb6..963a2bb84e185 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -8122,6 +8122,14 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( shouldPreventUndefRegUpdateMemFold(MF, MI))) return nullptr; + // Do not fold a NDD instruction and a memory instruction with relocation to + // avoid emit APX relocation when the flag is disabled for backward + // compatibility. + uint64_t TSFlags = MI.getDesc().TSFlags; + if (!X86EnableAPXForRelocation && isMemInstrWithGOTPCREL(LoadMI) && + X86II::hasNewDataDest(TSFlags)) + return nullptr; + // Determine the alignment of the load. Align Alignment; unsigned LoadOpc = LoadMI.getOpcode(); diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 2a9f567689ecb..e53f2566dd892 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -187,6 +187,40 @@ inline static bool isAddMemInstrWithRelocation(const MachineInstr &MI) { return false; } +inline static bool isMemInstrWithGOTPCREL(const MachineInstr &MI) { + unsigned Op = MI.getOpcode(); + switch (Op) { + case X86::TEST32mr: + case X86::TEST64mr: + case X86::CMP32rm: + case X86::CMP64rm: + case X86::MOV32rm: + case X86::MOV64rm: + case X86::ADC32rm: + case X86::ADD32rm: + case X86::AND32rm: + case X86::OR32rm: + case X86::SBB32rm: + case X86::SUB32rm: + case X86::XOR32rm: + case X86::ADC64rm: + case X86::ADD64rm: + case X86::AND64rm: + case X86::OR64rm: + case X86::SBB64rm: + case X86::SUB64rm: + case X86::XOR64rm: { + int MemOpNo = X86II::getMemoryOperandNo(MI.getDesc().TSFlags) + + X86II::getOperandBias(MI.getDesc()); + const MachineOperand &MO = MI.getOperand(X86::AddrDisp + MemOpNo); + if (MO.getTargetFlags() == X86II::MO_GOTPCREL) + return true; + break; + } + } + return false; +} + class X86InstrInfo final : public X86GenInstrInfo { X86Subtarget &Subtarget; const X86RegisterInfo RI; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index ef58c7619b243..c192e8892995b 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -50,6 +50,8 @@ static cl::opt cl::desc("Disable two address hints for register " "allocation")); +extern cl::opt X86EnableAPXForRelocation; + X86RegisterInfo::X86RegisterInfo(const Triple &TT) : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP), X86_MC::getDwarfRegFlavour(TT, false), @@ -121,6 +123,11 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, if (RC == &X86::GR8_NOREXRegClass) return RC; + // Keep using non-rex2 register class when APX feature (EGPR/NDD/NF) is not + // enabled for relocation. + if (!X86EnableAPXForRelocation && isNonRex2RegClass(RC)) + return RC; + const X86Subtarget &Subtarget = MF.getSubtarget(); const TargetRegisterClass *Super = RC; @@ -1258,3 +1265,18 @@ const TargetRegisterClass *X86RegisterInfo::constrainRegClassToNonRex2( return &X86::GR64_NOREX2_NOSPRegClass; } } + +bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const { + switch (RC->getID()) { + default: + return false; + case X86::GR8_NOREX2RegClassID: + case X86::GR16_NOREX2RegClassID: + case X86::GR32_NOREX2RegClassID: + case X86::GR64_NOREX2RegClassID: + case X86::GR32_NOREX2_NOSPRegClassID: + case X86::GR64_NOREX2_NOSPRegClassID: + case X86::GR64_with_sub_16bit_in_GR16_NOREX2RegClassID: + return true; + } +} \ No newline at end of file diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index 13a5fbf16e981..19b409ae619d2 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -178,6 +178,8 @@ class X86RegisterInfo final : public X86GenRegisterInfo { const TargetRegisterClass * constrainRegClassToNonRex2(const TargetRegisterClass *RC) const; + + bool isNonRex2RegClass(const TargetRegisterClass *RC) const; }; } // End llvm namespace diff --git a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp index d40995cb1786d..a263fd39bc324 100644 --- a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp +++ b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp @@ -167,7 +167,8 @@ static bool handleNDDOrNFInstructions(MachineFunction &MF, int MemOpNo = X86II::getMemoryOperandNo(MI.getDesc().TSFlags) + X86II::getOperandBias(MI.getDesc()); const MachineOperand &MO = MI.getOperand(X86::AddrDisp + MemOpNo); - if (MO.getTargetFlags() == X86II::MO_GOTTPOFF) { + if (MO.getTargetFlags() == X86II::MO_GOTTPOFF || + MO.getTargetFlags() == X86II::MO_GOTPCREL) { LLVM_DEBUG(dbgs() << "Transform instruction with relocation type:\n " << MI); Register Reg = MRI->createVirtualRegister(&X86::GR64_NOREX2RegClass); diff --git a/llvm/test/CodeGen/X86/apx/reloc-regclass.ll b/llvm/test/CodeGen/X86/apx/reloc-regclass.ll new file mode 100644 index 0000000000000..685a64cee2b47 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/reloc-regclass.ll @@ -0,0 +1,187 @@ +; RUN: llc -mcpu=diamondrapids %s -mtriple=x86_64 -filetype=obj -o %t.o +; RUN: llvm-objdump --no-print-imm-hex -dr %t.o | FileCheck %s --check-prefixes=NOAPXREL,CHECK + +; RUN: llc -mcpu=diamondrapids %s -mtriple=x86_64 -filetype=obj -o %t.o -x86-enable-apx-for-relocation=true +; RUN: llvm-objdump --no-print-imm-hex -dr %t.o | FileCheck %s --check-prefixes=APXREL,CHECK + + +; The first 2 tests are used to check if the register class is not +; updated/recomputed by register allocator. It's originally updated to non-rex2 +; register class by "Suppress APX for relocation" pass. + + +; CHECK-LABEL: test_regclass_not_updated_by_regalloc_1 +; APXREL: movq (%rip), %r16 +; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4 +; NOAPXREL: movq (%rip), %rdi +; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4 +; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX + +@gvar = external global [20000 x i8] + +define void @test_regclass_not_updated_by_regalloc_1(ptr %ptr1, ptr %0, i32 %int1, i64 %int_sext, i64 %mul.447, i64 %int_sext3, i32 %fetch.2508, i32 %fetch.2513, i32 %mul.442, i64 %int_sext6, i64 %int_sext7, i64 %int_sext8, i1 %cond1, i1 %cond2) { +alloca_38: + %int_sext4 = sext i32 %int1 to i64 + tail call void @llvm.memset.p0.i64(ptr @gvar, i8 0, i64 20000, i1 false) + %div.161 = sdiv i64 %int_sext3, %int_sext + %cmp.2 = icmp sgt i64 %div.161, 0 + %1 = sub i64 %int_sext7, %mul.447 + br label %loop.41 + +loop.41: ; preds = %ifmerge.2, %alloca_38 + br i1 %cmp.2, label %L.53, label %ifmerge.2 + +L.53: ; preds = %loop.41 + %2 = getelementptr i8, ptr %ptr1, i64 %int_sext8 + br label %loop.83 + +loop.83: ; preds = %loop.83, %L.53 + %i2.i64.1 = phi i64 [ 0, %L.53 ], [ %nextloop.83, %loop.83 ] + %3 = mul i64 %i2.i64.1, %int_sext4 + %.r275 = add i64 %3, %1 + %4 = getelementptr float, ptr getelementptr ([20000 x i8], ptr @gvar, i64 0, i64 8000), i64 %.r275 + %gepload = load float, ptr %2, align 1 + store float %gepload, ptr %4, align 4 + %nextloop.83 = add i64 %i2.i64.1, 1 + br i1 %cond1, label %ifmerge.2, label %loop.83 + +ifmerge.2: ; preds = %loop.83, %loop.41 + br i1 %cond2, label %afterloop.41, label %loop.41 + +afterloop.41: ; preds = %ifmerge.2 + %mul.469 = mul i32 %mul.442, %fetch.2508 + %div.172 = mul i32 %fetch.2513, %mul.469 + %mul.471 = mul i32 %int1, %div.172 + %int_sext39 = sext i32 %mul.471 to i64 + %5 = mul i64 %int_sext6, %int_sext39 + %6 = getelementptr i8, ptr %ptr1, i64 %5 + %7 = load float, ptr %6, align 1 + store float %7, ptr null, align 4 + ret void +} + +declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) + +; Will update after R_X86_64_CODE_6_GOTPCRELX is supported. +; CHECK-LABEL: test_regclass_not_updated_by_regalloc_2 +; APXREL: {nf} addq (%rip), %r16, %rcx +; APXREL-NEXT: R_X86_64_GOTPCREL gvar2-0x4 +; NOAPXREL: addq (%rip), %rbx +; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar2-0x4 +; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX + +@gvar2 = external constant [8 x [8 x i32]] + +define void @test_regclass_not_updated_by_regalloc_2(ptr %pSrc1, i32 %srcStep1, ptr %pSrc2, i32 %srcStep2, i32 %width, i32 %0, i1 %cmp71.not783, i1 %cmp11.i, ptr %pSrc2.addr.0535.i) { +entry: + %1 = ashr i32 %srcStep2, 1 + %conv.i = sext i32 %width to i64 + %conv6.i = and i32 %srcStep1, 1 + %cmp.i = icmp sgt i32 %srcStep1, 0 + %idx.ext.i = zext i32 %conv6.i to i64 + %2 = getelementptr <4 x i64>, ptr @gvar2, i64 %idx.ext.i + %idx.ext183.i = sext i32 %1 to i64 + br i1 %cmp71.not783, label %for.end, label %for.body73.lr.ph + +for.body73.lr.ph: ; preds = %entry + %3 = load <4 x i64>, ptr %2, align 32 + %..i = select i1 %cmp11.i, <4 x i64> zeroinitializer, <4 x i64> splat (i64 1) + %4 = bitcast <4 x i64> %..i to <8 x i32> + %5 = bitcast <4 x i64> %3 to <8 x i32> + %. = select i1 %cmp.i, <8 x i32> splat (i32 1), <8 x i32> %4 + %.833 = select i1 %cmp.i, <8 x i32> %5, <8 x i32> zeroinitializer + br i1 %cmp11.i, label %for.end.i, label %for.end + +for.end.i: ; preds = %if.end153.i, %for.body73.lr.ph + %pSrc2.addr.0535.i5 = phi ptr [ %add.ptr184.i, %if.end153.i ], [ %pSrc2, %for.body73.lr.ph ] + %eSum0.0531.i = phi <4 x i64> [ %add.i452.i, %if.end153.i ], [ zeroinitializer, %for.body73.lr.ph ] + br i1 %cmp71.not783, label %if.end153.i, label %if.then90.i + +if.then90.i: ; preds = %for.end.i + %6 = tail call <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr null, <8 x i32> %.) + %add.i464.i = or <4 x i64> %eSum0.0531.i, zeroinitializer + %7 = bitcast <8 x i32> %.833 to <4 x i64> + %add.ptr152.i = getelementptr i16, ptr %pSrc2.addr.0535.i5, i64 %conv.i + br label %if.end153.i + +if.end153.i: ; preds = %if.then90.i, %for.end.i + %eSum0.2.i = phi <4 x i64> [ %7, %if.then90.i ], [ %eSum0.0531.i, %for.end.i ] + %pLocSrc2.1.i = phi ptr [ %add.ptr152.i, %if.then90.i ], [ %pSrc1, %for.end.i ] + %8 = load i16, ptr %pLocSrc2.1.i, align 2 + %conv165.i = zext i16 %8 to i32 + %vecinit3.i.i = insertelement <4 x i32> zeroinitializer, i32 %conv165.i, i64 0 + %9 = bitcast <4 x i32> %vecinit3.i.i to <2 x i64> + %shuffle.i503.i = shufflevector <2 x i64> %9, <2 x i64> zeroinitializer, <4 x i32> + %add.i452.i = or <4 x i64> %eSum0.2.i, %shuffle.i503.i + %add.ptr184.i = getelementptr i16, ptr %pSrc2.addr.0535.i, i64 %idx.ext183.i + br label %for.end.i + +for.end: ; preds = %for.body73.lr.ph, %entry + br label %for.cond29.preheader.i227 + +for.cond29.preheader.i227: ; preds = %for.end + br label %for.body32.i328 + +for.body32.i328: ; preds = %for.body32.i328, %for.cond29.preheader.i227 + %w.0524.i329 = phi i32 [ %sub.i381, %for.body32.i328 ], [ 0, %for.cond29.preheader.i227 ] + %sub.i381 = or i32 %w.0524.i329, 0 + %cmp30.i384 = icmp sgt i32 %w.0524.i329, 0 + br label %for.body32.i328 +} + +declare <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr, <8 x i32>) + + +; The test is used to check MOV64rm instruction with relocation and ADD64rr_ND +; instruction are not folded to ADD64rm_ND with relocation. The later will emit +; APX relocation which is not recognized by the builtin linker on released OS. + +; CHECK-LABEL: test_no_mem_fold +; NOAPXREL: movq (%rip), %rbx +; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar3-0x4 +; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX + +@gvar3 = external global [40000 x i8] + +define void @test_no_mem_fold(i32 %fetch.1644, i32 %sub.1142, i32 %mul.455, ptr %dval1, ptr %j1, ptr %j2, <4 x i1> %0, i1 %condloop.41.not, i32 %fetch.1646, i32 %fetch.1647, i32 %sub.1108, i64 %int_sext16, i64 %sub.1114, i1 %condloop.45.not.not, <4 x i1> %1) { +alloca_28: + br label %ifmerge.52 + +do.body903: ; preds = %ifmerge.2 + %mul.453 = mul i32 %sub.1108, %fetch.1647 + %sub.1144.neg = or i32 %mul.455, %fetch.1646 + %mul.454.neg = mul i32 %sub.1144.neg, %fetch.1644 + %sub.1147 = sub i32 0, %sub.1142 + %int_sext36 = sext i32 %mul.453 to i64 + %int_sext38 = sext i32 %mul.454.neg to i64 + %add.974 = or i64 %int_sext36, %int_sext38 + %div.98 = sdiv i64 %add.974, %int_sext16 + br label %do.body907 + +do.body907: ; preds = %do.body907, %do.body903 + %do.count41.0 = phi i64 [ %sub.1173, %do.body907 ], [ %div.98, %do.body903 ] + %gvar3.load = load double, ptr @gvar3, align 8 + store double %gvar3.load, ptr null, align 8 + call void (...) null(ptr null, ptr null, ptr null, ptr null, ptr %dval1, ptr null, ptr %j1, ptr %j2, ptr null, ptr null, ptr null, ptr null, ptr null, i64 0) + store i32 %sub.1147, ptr null, align 4 + %sub.1173 = or i64 %do.count41.0, 1 + %rel.314 = icmp sgt i64 %do.count41.0, 0 + br label %do.body907 + +ifmerge.52: ; preds = %ifmerge.2, %alloca_28 + %i1.i64.012 = phi i64 [ 0, %alloca_28 ], [ %sub.1114, %ifmerge.2 ] + %2 = getelementptr double, ptr @gvar3, i64 %i1.i64.012 + br label %loop.45 + +loop.45: ; preds = %loop.45, %ifmerge.52 + %3 = getelementptr double, ptr %2, <4 x i64> zeroinitializer + %4 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %3, i32 0, <4 x i1> %0, <4 x double> zeroinitializer) + call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %4, <4 x ptr> zeroinitializer, i32 0, <4 x i1> %0) + br i1 %condloop.45.not.not, label %loop.45, label %ifmerge.2 + +ifmerge.2: ; preds = %loop.45 + br i1 %condloop.41.not, label %do.body903, label %ifmerge.52 +} + +declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x double>) +declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32 immarg, <4 x i1>) diff --git a/llvm/test/CodeGen/X86/apx/reloc.mir b/llvm/test/CodeGen/X86/apx/reloc.mir index 9009f5b1a669c..877549b4322d1 100644 --- a/llvm/test/CodeGen/X86/apx/reloc.mir +++ b/llvm/test/CodeGen/X86/apx/reloc.mir @@ -57,7 +57,12 @@ ret i32 undef } - define i32 @add64rm_nd() { + define i32 @add64rm_nd_gotpcrel() { + entry: + ret i32 undef + } + + define i32 @add64rm_nd_gottpoff() { entry: ret i32 undef } @@ -253,7 +258,28 @@ body: | # NOAPXREL: %1:gr64_norex2 = XOR64rm %0, $rip, 1, $noreg, target-flags(x86-gottpoff) @i, $noreg, implicit-def $eflags :: (load (s64)) ... --- -name: add64rm_nd +name: add64rm_nd_gotpcrel +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr64 } + - { id: 2, class: gr32 } +body: | + bb.0.entry: + %0:gr64 = MOV64rm $rip, 1, $noreg, @x, $noreg :: (load (s64)) + %1:gr64 = ADD64rm_ND %0, $rip, 1, $noreg, target-flags(x86-gotpcrel) @i, $noreg, implicit-def dead $eflags :: (load (s64) from got) + %2:gr32 = MOV32rm killed %1, 1, $noreg, 0, $fs :: (load (s32)) + $eax = COPY %2 + RET 0, $eax + +# CHECK: name: add64rm_nd_gotpcrel +# APXREL: %1:gr64 = ADD64rm_ND %0, $rip, 1, $noreg, target-flags(x86-gotpcrel) @i, $noreg, implicit-def dead $eflags :: (load (s64) from got) +# NOAPXREL: %3:gr64_norex2 = COPY %0 +# NOAPXREL: %1:gr64_norex2 = ADD64rm %3, $rip, 1, $noreg, target-flags(x86-gotpcrel) @i, $noreg, implicit-def dead $eflags +... +--- +name: add64rm_nd_gottpoff alignment: 16 tracksRegLiveness: true registers: From a078f6ef507790339bb83783c1a9ae568aa833e1 Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Sat, 10 May 2025 23:11:18 +0800 Subject: [PATCH 2/7] Add fix for another issue --- .../lib/Target/X86/X86SuppressAPXForReloc.cpp | 16 ++++ llvm/test/CodeGen/X86/apx/reloc-regclass.ll | 95 +++++++++++++++++-- 2 files changed, 104 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp index a263fd39bc324..55fee2fcfc86e 100644 --- a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp +++ b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp @@ -77,6 +77,22 @@ static void suppressEGPRRegClass(MachineFunction &MF, MachineInstr &MI, const X86RegisterInfo *RI = ST.getRegisterInfo(); const TargetRegisterClass *NewRC = RI->constrainRegClassToNonRex2(RC); MRI->setRegClass(Reg, NewRC); + + for (MachineInstr &Use : MRI->use_instructions(Reg)) { + switch (Use.getOpcode()) { + case X86::PHI: { + Register DstReg = Use.getOperand(0).getReg(); + if (!DstReg.isVirtual()) { + assert(!X86II::isApxExtendedReg(DstReg) && "APX EGPR is used unexpectedly."); + return; + } + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + const TargetRegisterClass *NewDstRC = RI->constrainRegClassToNonRex2(DstRC); + MRI->setRegClass(DstReg, NewDstRC); + break; + } + } + } } static bool handleInstructionWithEGPR(MachineFunction &MF, diff --git a/llvm/test/CodeGen/X86/apx/reloc-regclass.ll b/llvm/test/CodeGen/X86/apx/reloc-regclass.ll index 685a64cee2b47..39921945adc9a 100644 --- a/llvm/test/CodeGen/X86/apx/reloc-regclass.ll +++ b/llvm/test/CodeGen/X86/apx/reloc-regclass.ll @@ -5,6 +5,8 @@ ; RUN: llvm-objdump --no-print-imm-hex -dr %t.o | FileCheck %s --check-prefixes=APXREL,CHECK +; All tests are used to check no R_X86_64_CODE_4_GOTPCRELX relocation type +; emitted if APX features is disabled for relocation. ; The first 2 tests are used to check if the register class is not ; updated/recomputed by register allocator. It's originally updated to non-rex2 ; register class by "Suppress APX for relocation" pass. @@ -15,7 +17,7 @@ ; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4 ; NOAPXREL: movq (%rip), %rdi ; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4 -; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX +; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4 @gvar = external global [20000 x i8] @@ -62,13 +64,13 @@ afterloop.41: ; preds = %ifmerge.2 declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) -; Will update after R_X86_64_CODE_6_GOTPCRELX is supported. +; TODO: update after R_X86_64_CODE_6_GOTPCRELX is supported. ; CHECK-LABEL: test_regclass_not_updated_by_regalloc_2 ; APXREL: {nf} addq (%rip), %r16, %rcx ; APXREL-NEXT: R_X86_64_GOTPCREL gvar2-0x4 ; NOAPXREL: addq (%rip), %rbx ; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar2-0x4 -; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX +; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar2-0x4 @gvar2 = external constant [8 x [8 x i32]] @@ -136,14 +138,14 @@ declare <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr, <8 x i32>) ; instruction are not folded to ADD64rm_ND with relocation. The later will emit ; APX relocation which is not recognized by the builtin linker on released OS. -; CHECK-LABEL: test_no_mem_fold -; NOAPXREL: movq (%rip), %rbx +; CHECK-LABEL: test_mem_fold +; NOAPXREL: movq (%rip), %r12 ; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar3-0x4 -; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX +; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar3-0x4 @gvar3 = external global [40000 x i8] -define void @test_no_mem_fold(i32 %fetch.1644, i32 %sub.1142, i32 %mul.455, ptr %dval1, ptr %j1, ptr %j2, <4 x i1> %0, i1 %condloop.41.not, i32 %fetch.1646, i32 %fetch.1647, i32 %sub.1108, i64 %int_sext16, i64 %sub.1114, i1 %condloop.45.not.not, <4 x i1> %1) { +define void @test_mem_fold(i32 %fetch.1644, i32 %sub.1142, i32 %mul.455, ptr %dval1, ptr %j1, ptr %j2, <4 x i1> %0, i1 %condloop.41.not, i32 %fetch.1646, i32 %fetch.1647, i32 %sub.1108, i64 %int_sext16, i64 %sub.1114, i1 %condloop.45.not.not, <4 x i1> %1) { alloca_28: br label %ifmerge.52 @@ -185,3 +187,82 @@ ifmerge.2: ; preds = %loop.45 declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x double>) declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32 immarg, <4 x i1>) + + +; The test is to check no R_X86_64_CODE_4_GOTPCRELX relocation emitted when the +; register in operand 0 of instruction with relocation is used in the PHI +; instruction. In PHI elimination pass, PHI instruction is eliminated by +; inserting COPY instruction. And in the late pass (Machine Copy Propagation +; pass), the COPY instruction may be optimized and the register in operand 0 of +; instruction with relocation may be replaced with EGPR. + + +; CHECK-LABEL: test_phi_uses +; APXREL: addq (%rip), %r16 +; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar4-0x4 +; APXREL: movq (%rip), %r17 +; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar5-0x4 +; APXREL: movq (%rip), %r18 +; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar6-0x4 +; APXREL: movq (%rip), %r19 +; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar7-0x4 +; APXREL: movq (%rip), %r22 +; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar8-0x4 +; APXREL: movq (%rip), %r23 +; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar9-0x4 +; APXREL: movq (%rip), %r24 +; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar10-0x4 +; NOAPXREL: movq (%rip), %r15 +; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar5-0x4 +; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar5-0x4 + +@gvar4 = external global [33 x [33 x double]] +@gvar5 = external global [33 x [33 x float]] +@gvar6 = external global [33 x [33 x float]] +@gvar7 = external global [33 x [33 x float]] +@gvar8 = external global [33 x [33 x float]] +@gvar9 = external global [33 x [33 x float]] +@gvar10 = external global [33 x [33 x float]] + +define void @test_phi_uses(i64 %i1.i64.0, ptr %0, ptr %1, ptr %2, ptr %3, ptr %in0, ptr %4, ptr %5, i1 %cmp.144) #0 { +alloca_15: + br label %loop.253 + +loop.253: ; preds = %loop.1500, %alloca_15 + %i1.i64.01 = phi i64 [ 0, %alloca_15 ], [ %6, %loop.1500 ] + %6 = add i64 %i1.i64.01, 1 + br label %loop.254 + +loop.254: ; preds = %loop.254, %loop.253 + %i2.i64.02 = phi i64 [ %13, %loop.254 ], [ 0, %loop.253 ] + %7 = getelementptr [33 x [33 x float]], ptr @gvar10, i64 0, i64 %i2.i64.02, i64 %i1.i64.01 + %gepload368 = load float, ptr %7, align 4 + store double 0.000000e+00, ptr %0, align 8 + %8 = getelementptr [33 x [33 x float]], ptr @gvar9, i64 0, i64 %i2.i64.02, i64 %i1.i64.01 + %gepload369 = load float, ptr %8, align 4 + store double 0.000000e+00, ptr %1, align 8 + %9 = getelementptr [33 x [33 x float]], ptr @gvar8, i64 0, i64 %i2.i64.02, i64 %i1.i64.01 + %gepload371 = load float, ptr %9, align 4 + store double 0.000000e+00, ptr %2, align 8 + %10 = getelementptr [33 x [33 x float]], ptr @gvar7, i64 0, i64 %i2.i64.02, i64 %i1.i64.01 + %gepload373 = load float, ptr %10, align 4 + %11 = getelementptr [33 x [33 x double]], ptr @gvar4, i64 0, i64 %i2.i64.02, i64 %i1.i64.0 + store double 0.000000e+00, ptr %11, align 8 + %12 = getelementptr [33 x [33 x float]], ptr @gvar6, i64 0, i64 %i2.i64.02, i64 %i1.i64.01 + %gepload375 = load float, ptr %12, align 4 + store double 0.000000e+00, ptr %3, align 8 + store double 0.000000e+00, ptr %5, align 8 + %13 = add i64 %i2.i64.02, 1 + store double 0.000000e+00, ptr %in0, align 8 + store double 0.000000e+00, ptr %4, align 8 + %14 = getelementptr [33 x [33 x float]], ptr @gvar5, i64 0, i64 %i2.i64.02, i64 %i1.i64.01 + %gepload392 = load float, ptr %14, align 4 + br i1 %cmp.144, label %loop.1500, label %loop.254 + +loop.1500: ; preds = %loop.254 + %15 = getelementptr [33 x [33 x float]], ptr @gvar5, i64 0, i64 0, i64 %i1.i64.0 + %gepload444 = load float, ptr %15, align 4 + %16 = fpext float %gepload444 to double + store double %16, ptr null, align 8 + br label %loop.253 +} \ No newline at end of file From b22127b71d37f9e9a2eff8ff8ea248f1aedcf8f8 Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Sun, 11 May 2025 11:51:42 +0800 Subject: [PATCH 3/7] Refine the code and update test --- .../lib/Target/X86/X86SuppressAPXForReloc.cpp | 36 +++++++++---------- llvm/test/CodeGen/X86/apx/reloc-regclass.ll | 2 +- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp index 55fee2fcfc86e..4a321e85ad9cc 100644 --- a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp +++ b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp @@ -65,9 +65,8 @@ FunctionPass *llvm::createX86SuppressAPXForRelocationPass() { return new X86SuppressAPXForRelocationPass(); } -static void suppressEGPRRegClass(MachineFunction &MF, MachineInstr &MI, +static void suppressEGPRRegClass(MachineRegisterInfo *MRI, MachineInstr &MI, const X86Subtarget &ST, unsigned int OpNum) { - MachineRegisterInfo *MRI = &MF.getRegInfo(); Register Reg = MI.getOperand(OpNum).getReg(); if (!Reg.isVirtual()) { assert(!X86II::isApxExtendedReg(Reg) && "APX EGPR is used unexpectedly."); @@ -77,22 +76,14 @@ static void suppressEGPRRegClass(MachineFunction &MF, MachineInstr &MI, const X86RegisterInfo *RI = ST.getRegisterInfo(); const TargetRegisterClass *NewRC = RI->constrainRegClassToNonRex2(RC); MRI->setRegClass(Reg, NewRC); +} - for (MachineInstr &Use : MRI->use_instructions(Reg)) { - switch (Use.getOpcode()) { - case X86::PHI: { - Register DstReg = Use.getOperand(0).getReg(); - if (!DstReg.isVirtual()) { - assert(!X86II::isApxExtendedReg(DstReg) && "APX EGPR is used unexpectedly."); - return; - } - const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); - const TargetRegisterClass *NewDstRC = RI->constrainRegClassToNonRex2(DstRC); - MRI->setRegClass(DstReg, NewDstRC); - break; - } - } - } +static void suppressEGPRRegClassInRegUses(MachineRegisterInfo *MRI, + const X86Subtarget &ST, + Register Reg) { + for (MachineInstr &Use : MRI->use_instructions(Reg)) + if (Use.getOpcode() == X86::PHI) + suppressEGPRRegClass(MRI, Use, ST, 0); } static bool handleInstructionWithEGPR(MachineFunction &MF, @@ -100,6 +91,7 @@ static bool handleInstructionWithEGPR(MachineFunction &MF, if (!ST.hasEGPR()) return false; + MachineRegisterInfo *MRI = &MF.getRegInfo(); auto suppressEGPRInInstrWithReloc = [&](MachineInstr &MI, ArrayRef OpNoArray) { int MemOpNo = X86II::getMemoryOperandNo(MI.getDesc().TSFlags) + @@ -110,7 +102,7 @@ static bool handleInstructionWithEGPR(MachineFunction &MF, LLVM_DEBUG(dbgs() << "Transform instruction with relocation type:\n " << MI); for (unsigned OpNo : OpNoArray) - suppressEGPRRegClass(MF, MI, ST, OpNo); + suppressEGPRRegClass(MRI, MI, ST, OpNo); LLVM_DEBUG(dbgs() << "to:\n " << MI << "\n"); } }; @@ -133,6 +125,7 @@ static bool handleInstructionWithEGPR(MachineFunction &MF, case X86::MOV32rm: case X86::MOV64rm: { suppressEGPRInInstrWithReloc(MI, {0}); + suppressEGPRRegClassInRegUses(MRI, ST, MI.getOperand(0).getReg()); break; } case X86::ADC32rm: @@ -150,6 +143,7 @@ static bool handleInstructionWithEGPR(MachineFunction &MF, case X86::SUB64rm: case X86::XOR64rm: { suppressEGPRInInstrWithReloc(MI, {0, 1}); + suppressEGPRRegClassInRegUses(MRI, ST, MI.getOperand(0).getReg()); break; } } @@ -195,7 +189,8 @@ static bool handleNDDOrNFInstructions(MachineFunction &MF, MI.getOperand(1).setReg(Reg); const MCInstrDesc &NewDesc = TII->get(X86::ADD64rm); MI.setDesc(NewDesc); - suppressEGPRRegClass(MF, MI, ST, 0); + suppressEGPRRegClass(MRI, MI, ST, 0); + suppressEGPRRegClassInRegUses(MRI, ST, MI.getOperand(0).getReg()); MI.tieOperands(0, 1); LLVM_DEBUG(dbgs() << "to:\n " << *CopyMIB << "\n"); LLVM_DEBUG(dbgs() << " " << MI << "\n"); @@ -208,7 +203,8 @@ static bool handleNDDOrNFInstructions(MachineFunction &MF, if (MO.getTargetFlags() == X86II::MO_GOTTPOFF) { LLVM_DEBUG(dbgs() << "Transform instruction with relocation type:\n " << MI); - suppressEGPRRegClass(MF, MI, ST, 0); + suppressEGPRRegClass(MRI, MI, ST, 0); + suppressEGPRRegClassInRegUses(MRI, ST, MI.getOperand(0).getReg()); Register Reg = MRI->createVirtualRegister(&X86::GR64_NOREX2RegClass); [[maybe_unused]] MachineInstrBuilder CopyMIB = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), diff --git a/llvm/test/CodeGen/X86/apx/reloc-regclass.ll b/llvm/test/CodeGen/X86/apx/reloc-regclass.ll index 39921945adc9a..625bcd3332a4b 100644 --- a/llvm/test/CodeGen/X86/apx/reloc-regclass.ll +++ b/llvm/test/CodeGen/X86/apx/reloc-regclass.ll @@ -139,7 +139,7 @@ declare <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr, <8 x i32>) ; APX relocation which is not recognized by the builtin linker on released OS. ; CHECK-LABEL: test_mem_fold -; NOAPXREL: movq (%rip), %r12 +; NOAPXREL: movq (%rip), %rbx ; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar3-0x4 ; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar3-0x4 From fe18e76be46e2fe62508e28bbf9e88755ccb35ef Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Sun, 11 May 2025 16:12:57 +0800 Subject: [PATCH 4/7] Add comments --- llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp index 4a321e85ad9cc..fced381392c11 100644 --- a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp +++ b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp @@ -78,6 +78,12 @@ static void suppressEGPRRegClass(MachineRegisterInfo *MRI, MachineInstr &MI, MRI->setRegClass(Reg, NewRC); } +// Suppress EGPR in operand 0 of PHI instruction to avoid APX relocation types +// emitted. If the register in operand 0 of instruction with relocation is used +// in the PHI instruction, it may be replaced with operand 0 of PHI instruction +// (maybe EGPR) after PHI elimination and Machine Copy Propagation pass. That +// may lead to emit APX relocation types which may break the backward +// compatibility with builtin linkers on existing OS. static void suppressEGPRRegClassInRegUses(MachineRegisterInfo *MRI, const X86Subtarget &ST, Register Reg) { From 682707a84afc916c3ecbae5c9530a9642e57b3bb Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Sun, 11 May 2025 16:53:31 +0800 Subject: [PATCH 5/7] Rename test --- llvm/test/CodeGen/X86/apx/{reloc-regclass.ll => reloc-opt.ll} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/X86/apx/{reloc-regclass.ll => reloc-opt.ll} (100%) diff --git a/llvm/test/CodeGen/X86/apx/reloc-regclass.ll b/llvm/test/CodeGen/X86/apx/reloc-opt.ll similarity index 100% rename from llvm/test/CodeGen/X86/apx/reloc-regclass.ll rename to llvm/test/CodeGen/X86/apx/reloc-opt.ll From b7352381f1a0a7fd8d1b6d5bb1729e7e955fc3df Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Sun, 11 May 2025 22:51:27 +0800 Subject: [PATCH 6/7] Suppress EGPR in all of register uses. --- .../lib/Target/X86/X86SuppressAPXForReloc.cpp | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp index fced381392c11..c6a7340a3826c 100644 --- a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp +++ b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp @@ -78,18 +78,24 @@ static void suppressEGPRRegClass(MachineRegisterInfo *MRI, MachineInstr &MI, MRI->setRegClass(Reg, NewRC); } -// Suppress EGPR in operand 0 of PHI instruction to avoid APX relocation types -// emitted. If the register in operand 0 of instruction with relocation is used -// in the PHI instruction, it may be replaced with operand 0 of PHI instruction -// (maybe EGPR) after PHI elimination and Machine Copy Propagation pass. That -// may lead to emit APX relocation types which may break the backward -// compatibility with builtin linkers on existing OS. -static void suppressEGPRRegClassInRegUses(MachineRegisterInfo *MRI, - const X86Subtarget &ST, - Register Reg) { - for (MachineInstr &Use : MRI->use_instructions(Reg)) - if (Use.getOpcode() == X86::PHI) - suppressEGPRRegClass(MRI, Use, ST, 0); +// Suppress EGPR in operand 0 of uses to avoid APX relocation types emitted. The +// register in operand 0 of instruction with relocation may be replaced with +// operand 0 of uses which may be EGPR. That may lead to emit APX relocation +// types which breaks the backward compatibility with builtin linkers on +// existing OS. For example, the register in operand 0 of instruction with +// relocation is used in PHI instruction, and it may be replaced with operand 0 +// of PHI instruction after PHI elimination and Machine Copy Propagation pass. +static void suppressEGPRRegClassInRegAndUses(MachineRegisterInfo *MRI, + MachineInstr &MI, + const X86Subtarget &ST, + unsigned int OpNum) { + suppressEGPRRegClass(MRI, MI, ST, OpNum); + Register Reg = MI.getOperand(OpNum).getReg(); + for (MachineInstr &Use : MRI->use_instructions(Reg)) { + const unsigned UseOpNum = 0; + if (Use.getOperand(UseOpNum).isReg()) + suppressEGPRRegClass(MRI, Use, ST, UseOpNum); + } } static bool handleInstructionWithEGPR(MachineFunction &MF, @@ -108,7 +114,7 @@ static bool handleInstructionWithEGPR(MachineFunction &MF, LLVM_DEBUG(dbgs() << "Transform instruction with relocation type:\n " << MI); for (unsigned OpNo : OpNoArray) - suppressEGPRRegClass(MRI, MI, ST, OpNo); + suppressEGPRRegClassInRegAndUses(MRI, MI, ST, OpNo); LLVM_DEBUG(dbgs() << "to:\n " << MI << "\n"); } }; @@ -131,7 +137,6 @@ static bool handleInstructionWithEGPR(MachineFunction &MF, case X86::MOV32rm: case X86::MOV64rm: { suppressEGPRInInstrWithReloc(MI, {0}); - suppressEGPRRegClassInRegUses(MRI, ST, MI.getOperand(0).getReg()); break; } case X86::ADC32rm: @@ -149,7 +154,6 @@ static bool handleInstructionWithEGPR(MachineFunction &MF, case X86::SUB64rm: case X86::XOR64rm: { suppressEGPRInInstrWithReloc(MI, {0, 1}); - suppressEGPRRegClassInRegUses(MRI, ST, MI.getOperand(0).getReg()); break; } } @@ -195,8 +199,7 @@ static bool handleNDDOrNFInstructions(MachineFunction &MF, MI.getOperand(1).setReg(Reg); const MCInstrDesc &NewDesc = TII->get(X86::ADD64rm); MI.setDesc(NewDesc); - suppressEGPRRegClass(MRI, MI, ST, 0); - suppressEGPRRegClassInRegUses(MRI, ST, MI.getOperand(0).getReg()); + suppressEGPRRegClassInRegAndUses(MRI, MI, ST, 0); MI.tieOperands(0, 1); LLVM_DEBUG(dbgs() << "to:\n " << *CopyMIB << "\n"); LLVM_DEBUG(dbgs() << " " << MI << "\n"); @@ -209,8 +212,7 @@ static bool handleNDDOrNFInstructions(MachineFunction &MF, if (MO.getTargetFlags() == X86II::MO_GOTTPOFF) { LLVM_DEBUG(dbgs() << "Transform instruction with relocation type:\n " << MI); - suppressEGPRRegClass(MRI, MI, ST, 0); - suppressEGPRRegClassInRegUses(MRI, ST, MI.getOperand(0).getReg()); + suppressEGPRRegClassInRegAndUses(MRI, MI, ST, 0); Register Reg = MRI->createVirtualRegister(&X86::GR64_NOREX2RegClass); [[maybe_unused]] MachineInstrBuilder CopyMIB = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), From 088a1354dd74e55eeeedf0f38bb1601b890ed063 Mon Sep 17 00:00:00 2001 From: Feng Zou Date: Mon, 12 May 2025 16:12:51 +0800 Subject: [PATCH 7/7] Limit updating reg uses for PHI instruction and update tests. --- llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp | 8 +++----- llvm/test/CodeGen/X86/apx/reloc-opt.ll | 9 +++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp index c6a7340a3826c..68b6ddeb0b15a 100644 --- a/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp +++ b/llvm/lib/Target/X86/X86SuppressAPXForReloc.cpp @@ -91,11 +91,9 @@ static void suppressEGPRRegClassInRegAndUses(MachineRegisterInfo *MRI, unsigned int OpNum) { suppressEGPRRegClass(MRI, MI, ST, OpNum); Register Reg = MI.getOperand(OpNum).getReg(); - for (MachineInstr &Use : MRI->use_instructions(Reg)) { - const unsigned UseOpNum = 0; - if (Use.getOperand(UseOpNum).isReg()) - suppressEGPRRegClass(MRI, Use, ST, UseOpNum); - } + for (MachineInstr &Use : MRI->use_instructions(Reg)) + if (Use.getOpcode() == X86::PHI) + suppressEGPRRegClass(MRI, Use, ST, 0); } static bool handleInstructionWithEGPR(MachineFunction &MF, diff --git a/llvm/test/CodeGen/X86/apx/reloc-opt.ll b/llvm/test/CodeGen/X86/apx/reloc-opt.ll index 625bcd3332a4b..a5ab94b00d64b 100644 --- a/llvm/test/CodeGen/X86/apx/reloc-opt.ll +++ b/llvm/test/CodeGen/X86/apx/reloc-opt.ll @@ -15,9 +15,9 @@ ; CHECK-LABEL: test_regclass_not_updated_by_regalloc_1 ; APXREL: movq (%rip), %r16 ; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4 +; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4 ; NOAPXREL: movq (%rip), %rdi ; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4 -; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4 @gvar = external global [20000 x i8] @@ -68,9 +68,9 @@ declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immar ; CHECK-LABEL: test_regclass_not_updated_by_regalloc_2 ; APXREL: {nf} addq (%rip), %r16, %rcx ; APXREL-NEXT: R_X86_64_GOTPCREL gvar2-0x4 +; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar2-0x4 ; NOAPXREL: addq (%rip), %rbx ; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar2-0x4 -; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar2-0x4 @gvar2 = external constant [8 x [8 x i32]] @@ -139,9 +139,9 @@ declare <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr, <8 x i32>) ; APX relocation which is not recognized by the builtin linker on released OS. ; CHECK-LABEL: test_mem_fold +; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar3-0x4 ; NOAPXREL: movq (%rip), %rbx ; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar3-0x4 -; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar3-0x4 @gvar3 = external global [40000 x i8] @@ -212,9 +212,10 @@ declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32 immarg ; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar9-0x4 ; APXREL: movq (%rip), %r24 ; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar10-0x4 +; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar5-0x4 ; NOAPXREL: movq (%rip), %r15 ; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar5-0x4 -; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar5-0x4 + @gvar4 = external global [33 x [33 x double]] @gvar5 = external global [33 x [33 x float]]