-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[DAGCombiner] Fix the "subtraction if above a constant threshold" miscompile #140042
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag Author: Piotr Fusik (pfusik) ChangesThis fixes #135194 incorrectly leaving Full diff: https://github.com/llvm/llvm-project/pull/140042.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d6e288a59b2ee..fc7295c0bc1db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12136,15 +12136,19 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
return NewSel;
- // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x)
+ // (select (ugt x, C), (add x, ~C), x) -> (umin x, (add x, ~C))
// (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C))
APInt C;
if (sd_match(Cond1, m_ConstInt(C)) && hasUMin(VT)) {
+ APInt AddC;
if ((CC == ISD::SETUGT && Cond0 == N2 &&
- sd_match(N1, m_Add(m_Specific(N2), m_SpecificInt(~C)))) ||
+ sd_match(N1, m_Add(m_Specific(N2), m_ConstInt(AddC))) &&
+ AddC == ~C) ||
(CC == ISD::SETULT && Cond0 == N1 &&
- sd_match(N2, m_Add(m_Specific(N1), m_SpecificInt(-C)))))
- return DAG.getNode(ISD::UMIN, DL, VT, N1, N2);
+ sd_match(N2, m_Add(m_Specific(N1), m_ConstInt(AddC))) && AddC == -C))
+ return DAG.getNode(ISD::UMIN, DL, VT, Cond0,
+ DAG.getNode(ISD::ADD, DL, VT, Cond0,
+ DAG.getConstant(AddC, DL, VT)));
}
}
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 0f2284637ca6a..0a3729b3de6b6 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -1742,7 +1742,7 @@ define i8 @sub_if_uge_C_i8(i8 zeroext %x) {
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: addi a1, a0, -13
; RV32ZBB-NEXT: zext.b a1, a1
-; RV32ZBB-NEXT: minu a0, a1, a0
+; RV32ZBB-NEXT: minu a0, a0, a1
; RV32ZBB-NEXT: ret
%cmp = icmp ugt i8 %x, 12
%sub = add i8 %x, -13
@@ -1763,7 +1763,7 @@ define i16 @sub_if_uge_C_i16(i16 zeroext %x) {
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: addi a1, a0, -251
; RV32ZBB-NEXT: zext.h a1, a1
-; RV32ZBB-NEXT: minu a0, a1, a0
+; RV32ZBB-NEXT: minu a0, a0, a1
; RV32ZBB-NEXT: ret
%cmp = icmp ugt i16 %x, 250
%sub = add i16 %x, -251
@@ -1789,7 +1789,7 @@ define i32 @sub_if_uge_C_i32(i32 signext %x) {
; RV32ZBB-NEXT: lui a1, 1048560
; RV32ZBB-NEXT: addi a1, a1, 15
; RV32ZBB-NEXT: add a1, a0, a1
-; RV32ZBB-NEXT: minu a0, a1, a0
+; RV32ZBB-NEXT: minu a0, a0, a1
; RV32ZBB-NEXT: ret
%cmp = icmp ugt i32 %x, 65520
%sub = add i32 %x, -65521
@@ -1850,7 +1850,7 @@ define i32 @sub_if_uge_C_multiuse_cmp_i32(i32 signext %x, ptr %z) {
; RV32ZBB-NEXT: addi a3, a3, 15
; RV32ZBB-NEXT: sltu a2, a2, a0
; RV32ZBB-NEXT: add a3, a0, a3
-; RV32ZBB-NEXT: minu a0, a3, a0
+; RV32ZBB-NEXT: minu a0, a0, a3
; RV32ZBB-NEXT: sw a2, 0(a1)
; RV32ZBB-NEXT: ret
%cmp = icmp ugt i32 %x, 65520
@@ -1882,7 +1882,7 @@ define i32 @sub_if_uge_C_multiuse_sub_i32(i32 signext %x, ptr %z) {
; RV32ZBB-NEXT: lui a2, 1048560
; RV32ZBB-NEXT: addi a2, a2, 15
; RV32ZBB-NEXT: add a2, a0, a2
-; RV32ZBB-NEXT: minu a0, a2, a0
+; RV32ZBB-NEXT: minu a0, a0, a2
; RV32ZBB-NEXT: sw a2, 0(a1)
; RV32ZBB-NEXT: ret
%sub = add i32 %x, -65521
@@ -1917,3 +1917,24 @@ define i32 @sub_if_uge_C_swapped_i32(i32 %x) {
%cond = select i1 %cmp, i32 %x, i32 %sub
ret i32 %cond
}
+
+define noundef i8 @sub_if_uge_C_nsw_i8(i8 zeroext %x) {
+; RV32I-LABEL: sub_if_uge_C_nsw_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltiu a1, a0, 13
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: andi a1, a1, -13
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBB-LABEL: sub_if_uge_C_nsw_i8:
+; RV32ZBB: # %bb.0:
+; RV32ZBB-NEXT: addi a1, a0, -13
+; RV32ZBB-NEXT: zext.b a1, a1
+; RV32ZBB-NEXT: minu a0, a0, a1
+; RV32ZBB-NEXT: ret
+ %cmp = icmp ugt i8 %x, 12
+ %sub = add nsw i8 %x, -13
+ %conv4 = select i1 %cmp, i8 %sub, i8 %x
+ ret i8 %conv4
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index 17eb0817d548a..1faafbe8b8187 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -1898,7 +1898,7 @@ define i8 @sub_if_uge_C_i8(i8 zeroext %x) {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: addi a1, a0, -13
; RV64ZBB-NEXT: zext.b a1, a1
-; RV64ZBB-NEXT: minu a0, a1, a0
+; RV64ZBB-NEXT: minu a0, a0, a1
; RV64ZBB-NEXT: ret
%cmp = icmp ugt i8 %x, 12
%sub = add i8 %x, -13
@@ -1919,7 +1919,7 @@ define i16 @sub_if_uge_C_i16(i16 zeroext %x) {
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: addi a1, a0, -251
; RV64ZBB-NEXT: zext.h a1, a1
-; RV64ZBB-NEXT: minu a0, a1, a0
+; RV64ZBB-NEXT: minu a0, a0, a1
; RV64ZBB-NEXT: ret
%cmp = icmp ugt i16 %x, 250
%sub = add i16 %x, -251
@@ -1945,7 +1945,7 @@ define i32 @sub_if_uge_C_i32(i32 signext %x) {
; RV64ZBB-NEXT: lui a1, 1048560
; RV64ZBB-NEXT: addi a1, a1, 15
; RV64ZBB-NEXT: addw a1, a0, a1
-; RV64ZBB-NEXT: minu a0, a1, a0
+; RV64ZBB-NEXT: minu a0, a0, a1
; RV64ZBB-NEXT: ret
%cmp = icmp ugt i32 %x, 65520
%sub = add i32 %x, -65521
@@ -1975,7 +1975,7 @@ define i64 @sub_if_uge_C_i64(i64 %x) {
; RV64ZBB-NEXT: addiw a1, a1, -761
; RV64ZBB-NEXT: slli a1, a1, 9
; RV64ZBB-NEXT: add a1, a0, a1
-; RV64ZBB-NEXT: minu a0, a1, a0
+; RV64ZBB-NEXT: minu a0, a0, a1
; RV64ZBB-NEXT: ret
%cmp = icmp ugt i64 %x, 4999999999
%sub = add i64 %x, -5000000000
@@ -2005,7 +2005,7 @@ define i32 @sub_if_uge_C_multiuse_cmp_i32(i32 signext %x, ptr %z) {
; RV64ZBB-NEXT: addi a3, a3, 15
; RV64ZBB-NEXT: sltu a2, a2, a0
; RV64ZBB-NEXT: addw a3, a0, a3
-; RV64ZBB-NEXT: minu a0, a3, a0
+; RV64ZBB-NEXT: minu a0, a0, a3
; RV64ZBB-NEXT: sw a2, 0(a1)
; RV64ZBB-NEXT: ret
%cmp = icmp ugt i32 %x, 65520
@@ -2037,7 +2037,7 @@ define i32 @sub_if_uge_C_multiuse_sub_i32(i32 signext %x, ptr %z) {
; RV64ZBB-NEXT: lui a2, 1048560
; RV64ZBB-NEXT: addi a2, a2, 15
; RV64ZBB-NEXT: addw a2, a0, a2
-; RV64ZBB-NEXT: minu a0, a2, a0
+; RV64ZBB-NEXT: minu a0, a0, a2
; RV64ZBB-NEXT: sw a2, 0(a1)
; RV64ZBB-NEXT: ret
%sub = add i32 %x, -65521
|
llvm/test/CodeGen/RISCV/rv32zbb.ll
Outdated
define noundef i8 @sub_if_uge_C_nsw_i8(i8 zeroext %x) { | ||
; RV32I-LABEL: sub_if_uge_C_nsw_i8: | ||
; RV32I: # %bb.0: | ||
; RV32I-NEXT: sltiu a1, a0, 13 | ||
; RV32I-NEXT: addi a1, a1, -1 | ||
; RV32I-NEXT: andi a1, a1, -13 | ||
; RV32I-NEXT: add a0, a0, a1 | ||
; RV32I-NEXT: ret | ||
; | ||
; RV32ZBB-LABEL: sub_if_uge_C_nsw_i8: | ||
; RV32ZBB: # %bb.0: | ||
; RV32ZBB-NEXT: addi a1, a0, -13 | ||
; RV32ZBB-NEXT: zext.b a1, a1 | ||
; RV32ZBB-NEXT: minu a0, a0, a1 | ||
; RV32ZBB-NEXT: ret | ||
%cmp = icmp ugt i8 %x, 12 | ||
%sub = add nsw i8 %x, -13 | ||
%conv4 = select i1 %cmp, i8 %sub, i8 %x | ||
ret i8 %conv4 | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't test this fix. How to test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this might do the trick (at least for the ult variant):
define i7 @src(i7 %a) {
%x = or i7 %a, 51
%c = icmp ult i7 %x, -17
%add = add nsw i7 %x, 17
%s = select i1 %c, i7 %x, i7 %add
ret i7 %s
}
sd_match(N2, m_Add(m_Specific(N1), m_ConstInt(AddC))) && AddC == -C)) | ||
return DAG.getNode(ISD::UMIN, DL, VT, Cond0, | ||
DAG.getNode(ISD::ADD, DL, VT, Cond0, | ||
DAG.getConstant(AddC, DL, VT))); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It might be cleaner just to split these and then do:
if (CC == ISD::SETULT && Cond0 == N1 &&
sd_match(N2, m_Add(m_Specific(N1), m_SpecificInt(-C))))
return DAG.getNode(ISD::UMIN, DL, VT, Cond0, DAG.getNode(ISD::ADD, DL, VT, Cond0,
DAG.getConstant(-C, DL, VT)));
etc.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
Nevermind, I guess if |
sd_match(N2, m_Add(m_Specific(N1), m_SpecificInt(-C))))) | ||
return DAG.getNode(ISD::UMIN, DL, VT, N1, N2); | ||
if (CC == ISD::SETUGT && Cond0 == N2 && | ||
sd_match(N1, m_Add(m_Specific(N2), m_SpecificInt(~C)))) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Braces, and/or use some temporary values to reduce this ugly wrapping
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
…compile This fixes llvm#135194 incorrectly reusing the existing `add nuw/nsw` while the transformed code relies on an unsigned wrap.
@@ -12140,11 +12140,16 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { | ||
// (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C)) | ||
APInt C; | ||
if (sd_match(Cond1, m_ConstInt(C)) && hasUMin(VT)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Could add a small comment here telling why we need to recreate the ADD. Just to avoid that someone tries to simplify this in the future, or if someone wonder why we can do this without introducing FREEZE.
Maybe something like this:
// Need to drop any poison generating flags from the ADD, to ensure that the result isn't more poisonous.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
This fixes #135194 incorrectly reusing the existing
add nuw/nsw
while the transformed code relies on an unsigned wrap.