-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[DirectX] replace byte splitting via vector bitcast with scalar #140167
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-directx Author: Farzon Lotfi (farzonl) Changesinstructions
Full diff: https://github.com/llvm/llvm-project/pull/140167.diff 2 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index be77a70fa46ba..a99f706763c0f 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -8,6 +8,8 @@
#include "DXILLegalizePass.h"
#include "DirectX.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -317,6 +319,48 @@ static void removeMemSet(Instruction &I,
ToRemove.push_back(CI);
}
+static void
+legalizeGetHighLowi64Bytes(Instruction &I,
+ SmallVectorImpl<Instruction *> &ToRemove,
+ DenseMap<Value *, Value *> &ReplacedValues) {
+ if (auto *BitCast = dyn_cast<BitCastInst>(&I)) {
+ if (BitCast->getDestTy() ==
+ FixedVectorType::get(Type::getInt32Ty(I.getContext()), 2) &&
+ BitCast->getSrcTy()->isIntegerTy(64)) {
+ ToRemove.push_back(BitCast);
+ ReplacedValues[BitCast] = BitCast->getOperand(0);
+ }
+ }
+
+ if (auto *Extract = dyn_cast<ExtractElementInst>(&I)) {
+ auto *VecTy = dyn_cast<FixedVectorType>(Extract->getVectorOperandType());
+ if (VecTy && VecTy->getElementType()->isIntegerTy(32) &&
+ VecTy->getNumElements() == 2) {
+ if (auto *Index = dyn_cast<ConstantInt>(Extract->getIndexOperand())) {
+ unsigned Idx = Index->getZExtValue();
+ IRBuilder<> Builder(&I);
+ assert(dyn_cast<BitCastInst>(Extract->getVectorOperand()));
+ auto *Replacement = ReplacedValues[Extract->getVectorOperand()];
+ if (Idx == 0) {
+ Value *LowBytes = Builder.CreateTrunc(
+ Replacement, Type::getInt32Ty(I.getContext()));
+ ReplacedValues[Extract] = LowBytes;
+ } else {
+ assert(Idx == 1);
+ Value *LogicalShiftRight = Builder.CreateLShr(
+ Replacement,
+ ConstantInt::get(
+ Replacement->getType(),
+ APInt(Replacement->getType()->getIntegerBitWidth(), 32)));
+ Value *HighBytes = Builder.CreateTrunc(
+ LogicalShiftRight, Type::getInt32Ty(I.getContext()));
+ ReplacedValues[Extract] = HighBytes;
+ }
+ ToRemove.push_back(Extract);
+ }
+ }
+ }
+}
namespace {
class DXILLegalizationPipeline {
@@ -349,6 +393,7 @@ class DXILLegalizationPipeline {
LegalizationPipeline.push_back(downcastI64toI32InsertExtractElements);
LegalizationPipeline.push_back(legalizeFreeze);
LegalizationPipeline.push_back(removeMemSet);
+ LegalizationPipeline.push_back(legalizeGetHighLowi64Bytes);
}
};
diff --git a/llvm/test/CodeGen/DirectX/legalize-i64-high-low-vec-spilt.ll b/llvm/test/CodeGen/DirectX/legalize-i64-high-low-vec-spilt.ll
new file mode 100644
index 0000000000000..17fd3bf54acda
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/legalize-i64-high-low-vec-spilt.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='dxil-legalize' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+define void @split_via_extract(i64 noundef %a) {
+; CHECK-LABEL: define void @split_via_extract(
+; CHECK-SAME: i64 noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[A]], 32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT: ret void
+;
+entry:
+ %vecA = bitcast i64 %a to <2 x i32>
+ %low = extractelement <2 x i32> %vecA, i32 0 ; low 32 bits
+ %high = extractelement <2 x i32> %vecA, i32 1 ; high 32 bits
+ ret void
+}
|
0f83129
to
ba88ccb
Compare
ba88ccb
to
f7116fc
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks reasonable. I think we'd want to avoid adding too many stages, but ordering like this is simple enough.
SmallVector<Instruction *> ToRemove; | ||
DenseMap<Value *, Value *> ReplacedValues; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better to hoist the vector and the map outside of the loop and reset
them here.
instructions - instead of bitcasting and extract element lets use trunc or trunc and logical shift right to split. - fixes llvm#139020
…d to be staggered.
f7116fc
to
ccbe953
Compare
…#140167) instructions - instead of bitcasting and extract element lets use trunc or trunc and logical shift right to split. - fixes llvm#139020
…#140167) instructions - instead of bitcasting and extract element lets use trunc or trunc and logical shift right to split. - fixes llvm#139020
instructions