From ea7c0db16f61c98b2bb9c3c7e99c838cb4fa8933 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Thu, 15 May 2025 18:11:44 -0400 Subject: [PATCH 1/5] [DirectX] replace byte splitting via vector bitcast with scalar instructions - instead of bitcasting and extract element lets use trunc or trunc and logical shift right to split. - fixes #139020 --- llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 46 +++++++++++++++++++ .../legalize-i64-high-low-vec-spilt.ll | 18 ++++++++ 2 files changed, 64 insertions(+) create mode 100644 llvm/test/CodeGen/DirectX/legalize-i64-high-low-vec-spilt.ll diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp index 3e21f3c109456..543f76e753ba6 100644 --- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp +++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp @@ -8,6 +8,8 @@ #include "DXILLegalizePass.h" #include "DirectX.h" +#include "llvm/ADT/APInt.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" @@ -510,6 +512,49 @@ static void updateFnegToFsub(Instruction &I, ToRemove.push_back(&I); } +static void +legalizeGetHighLowi64Bytes(Instruction &I, + SmallVectorImpl &ToRemove, + DenseMap &ReplacedValues) { + if (auto *BitCast = dyn_cast(&I)) { + if (BitCast->getDestTy() == + FixedVectorType::get(Type::getInt32Ty(I.getContext()), 2) && + BitCast->getSrcTy()->isIntegerTy(64)) { + ToRemove.push_back(BitCast); + ReplacedValues[BitCast] = BitCast->getOperand(0); + } + } + + if (auto *Extract = dyn_cast(&I)) { + auto *VecTy = dyn_cast(Extract->getVectorOperandType()); + if (VecTy && VecTy->getElementType()->isIntegerTy(32) && + VecTy->getNumElements() == 2) { + if (auto *Index = dyn_cast(Extract->getIndexOperand())) { + unsigned Idx = Index->getZExtValue(); + IRBuilder<> Builder(&I); + assert(dyn_cast(Extract->getVectorOperand())); + auto *Replacement = ReplacedValues[Extract->getVectorOperand()]; + if (Idx == 0) { + Value *LowBytes = Builder.CreateTrunc( + Replacement, Type::getInt32Ty(I.getContext())); + ReplacedValues[Extract] = LowBytes; + } else { + assert(Idx == 1); + Value *LogicalShiftRight = Builder.CreateLShr( + Replacement, + ConstantInt::get( + Replacement->getType(), + APInt(Replacement->getType()->getIntegerBitWidth(), 32))); + Value *HighBytes = Builder.CreateTrunc( + LogicalShiftRight, Type::getInt32Ty(I.getContext())); + ReplacedValues[Extract] = HighBytes; + } + ToRemove.push_back(Extract); + } + } + } +} + namespace { class DXILLegalizationPipeline { @@ -544,6 +589,7 @@ class DXILLegalizationPipeline { LegalizationPipeline.push_back(legalizeMemCpy); LegalizationPipeline.push_back(removeMemSet); LegalizationPipeline.push_back(updateFnegToFsub); + LegalizationPipeline.push_back(legalizeGetHighLowi64Bytes); } }; diff --git a/llvm/test/CodeGen/DirectX/legalize-i64-high-low-vec-spilt.ll b/llvm/test/CodeGen/DirectX/legalize-i64-high-low-vec-spilt.ll new file mode 100644 index 0000000000000..17fd3bf54acda --- /dev/null +++ b/llvm/test/CodeGen/DirectX/legalize-i64-high-low-vec-spilt.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes='dxil-legalize' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +define void @split_via_extract(i64 noundef %a) { +; CHECK-LABEL: define void @split_via_extract( +; CHECK-SAME: i64 noundef [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[A]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A]], 32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: ret void +; +entry: + %vecA = bitcast i64 %a to <2 x i32> + %low = extractelement <2 x i32> %vecA, i32 0 ; low 32 bits + %high = extractelement <2 x i32> %vecA, i32 1 ; high 32 bits + ret void +} From b63502c0ec98f64e9cbf4773d5da90c872518b92 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Fri, 16 May 2025 12:34:24 -0400 Subject: [PATCH 2/5] address pr comment --- llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp index 543f76e753ba6..2b4ac6e8f6e61 100644 --- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp +++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp @@ -522,6 +522,7 @@ legalizeGetHighLowi64Bytes(Instruction &I, BitCast->getSrcTy()->isIntegerTy(64)) { ToRemove.push_back(BitCast); ReplacedValues[BitCast] = BitCast->getOperand(0); + return; } } @@ -534,6 +535,8 @@ legalizeGetHighLowi64Bytes(Instruction &I, IRBuilder<> Builder(&I); assert(dyn_cast(Extract->getVectorOperand())); auto *Replacement = ReplacedValues[Extract->getVectorOperand()]; + assert(Replacement && "The BitCast replacement should have been set " + "before working on ExtractElementInst."); if (Idx == 0) { Value *LowBytes = Builder.CreateTrunc( Replacement, Type::getInt32Ty(I.getContext())); From fb6ef483c944a5123edba4b10c39c6220d8cea65 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Fri, 16 May 2025 14:50:30 -0700 Subject: [PATCH 3/5] Add Legalization stages. Passes that change the same instructions need to be staggered. --- llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 49 ++++++++++++-------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp index 2b4ac6e8f6e61..28805e201947a 100644 --- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp +++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp @@ -553,6 +553,7 @@ legalizeGetHighLowi64Bytes(Instruction &I, ReplacedValues[Extract] = HighBytes; } ToRemove.push_back(Extract); + Extract->replaceAllUsesWith(ReplacedValues[Extract]); } } } @@ -565,34 +566,42 @@ class DXILLegalizationPipeline { DXILLegalizationPipeline() { initializeLegalizationPipeline(); } bool runLegalizationPipeline(Function &F) { - SmallVector ToRemove; - DenseMap ReplacedValues; - for (auto &I : instructions(F)) { - for (auto &LegalizationFn : LegalizationPipeline) - LegalizationFn(I, ToRemove, ReplacedValues); - } + bool MadeChange = false; + for (int Stage = 0; Stage < NumStages; ++Stage) { + SmallVector ToRemove; + DenseMap ReplacedValues; + for (auto &I : instructions(F)) { + for (auto &LegalizationFn : LegalizationPipeline[Stage]) + LegalizationFn(I, ToRemove, ReplacedValues); + } - for (auto *Inst : reverse(ToRemove)) - Inst->eraseFromParent(); + for (auto *Inst : reverse(ToRemove)) + Inst->eraseFromParent(); - return !ToRemove.empty(); + MadeChange |= !ToRemove.empty(); + } + return MadeChange; } private: - SmallVector< + enum LegalizationStage { Stage1 = 0, Stage2 = 1, NumStages }; + + using LegalizationFnTy = std::function &, - DenseMap &)>> - LegalizationPipeline; + DenseMap &)>; + + SmallVector LegalizationPipeline[NumStages]; void initializeLegalizationPipeline() { - LegalizationPipeline.push_back(upcastI8AllocasAndUses); - LegalizationPipeline.push_back(fixI8UseChain); - LegalizationPipeline.push_back(downcastI64toI32InsertExtractElements); - LegalizationPipeline.push_back(legalizeFreeze); - LegalizationPipeline.push_back(legalizeMemCpy); - LegalizationPipeline.push_back(removeMemSet); - LegalizationPipeline.push_back(updateFnegToFsub); - LegalizationPipeline.push_back(legalizeGetHighLowi64Bytes); + LegalizationPipeline[Stage1].push_back(upcastI8AllocasAndUses); + LegalizationPipeline[Stage1].push_back(fixI8UseChain); + LegalizationPipeline[Stage1].push_back(legalizeGetHighLowi64Bytes); + LegalizationPipeline[Stage1].push_back(legalizeFreeze); + LegalizationPipeline[Stage1].push_back(legalizeMemCpy); + LegalizationPipeline[Stage1].push_back(removeMemSet); + LegalizationPipeline[Stage1].push_back(updateFnegToFsub); + LegalizationPipeline[Stage2].push_back( + downcastI64toI32InsertExtractElements); } }; From 2002521568aae185f23ec431bc70fd8e1ce0effa Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Tue, 3 Jun 2025 12:34:20 -0400 Subject: [PATCH 4/5] address pr feedback --- llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 9 ++++++++- ...w-vec-spilt.ll => legalize-i64-high-low-vec-split.ll} | 0 2 files changed, 8 insertions(+), 1 deletion(-) rename llvm/test/CodeGen/DirectX/{legalize-i64-high-low-vec-spilt.ll => legalize-i64-high-low-vec-split.ll} (100%) diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp index 28805e201947a..4a31e3e470ee0 100644 --- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp +++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp @@ -527,13 +527,15 @@ legalizeGetHighLowi64Bytes(Instruction &I, } if (auto *Extract = dyn_cast(&I)) { + if (!dyn_cast(Extract->getVectorOperand())) + return; auto *VecTy = dyn_cast(Extract->getVectorOperandType()); if (VecTy && VecTy->getElementType()->isIntegerTy(32) && VecTy->getNumElements() == 2) { if (auto *Index = dyn_cast(Extract->getIndexOperand())) { unsigned Idx = Index->getZExtValue(); IRBuilder<> Builder(&I); - assert(dyn_cast(Extract->getVectorOperand())); + auto *Replacement = ReplacedValues[Extract->getVectorOperand()]; assert(Replacement && "The BitCast replacement should have been set " "before working on ExtractElementInst."); @@ -600,6 +602,11 @@ class DXILLegalizationPipeline { LegalizationPipeline[Stage1].push_back(legalizeMemCpy); LegalizationPipeline[Stage1].push_back(removeMemSet); LegalizationPipeline[Stage1].push_back(updateFnegToFsub); + // Note: legalizeGetHighLowi64Bytes and + // downcastI64toI32InsertExtractElements both modify extractelement, so they + // must run staggered stages. legalizeGetHighLowi64Bytes runs first b\c it + // removes extractelements, reducing the number that + // downcastI64toI32InsertExtractElements needs to handle. LegalizationPipeline[Stage2].push_back( downcastI64toI32InsertExtractElements); } diff --git a/llvm/test/CodeGen/DirectX/legalize-i64-high-low-vec-spilt.ll b/llvm/test/CodeGen/DirectX/legalize-i64-high-low-vec-split.ll similarity index 100% rename from llvm/test/CodeGen/DirectX/legalize-i64-high-low-vec-spilt.ll rename to llvm/test/CodeGen/DirectX/legalize-i64-high-low-vec-split.ll From ccbe953bb6026b7c782f9be17717e15f4ed05a1d Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Wed, 4 Jun 2025 21:09:46 -0400 Subject: [PATCH 5/5] address PR comment to hoist data structures and clear them in loops instead --- llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp index 4a31e3e470ee0..79b0cf261ba31 100644 --- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp +++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp @@ -569,9 +569,11 @@ class DXILLegalizationPipeline { bool runLegalizationPipeline(Function &F) { bool MadeChange = false; + SmallVector ToRemove; + DenseMap ReplacedValues; for (int Stage = 0; Stage < NumStages; ++Stage) { - SmallVector ToRemove; - DenseMap ReplacedValues; + ToRemove.clear(); + ReplacedValues.clear(); for (auto &I : instructions(F)) { for (auto &LegalizationFn : LegalizationPipeline[Stage]) LegalizationFn(I, ToRemove, ReplacedValues);