Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b931731

Browse filesBrowse files
authored
[InstCombine] Narrow trunc(lshr) in more cases (#139645)
We can narrow `trunc(lshr(i32)) to i8` to `trunc(lshr(i16)) to i8` even when the bits that we are shifting in are not zero, in the cases where the MSBs of the shifted value don't actually matter and actually end up being truncated away. This kind of narrowing does not remove the trunc but can help the vectorizer generate better code in a smaller type. Motivation: libyuv, functions like ARGBToUV444Row_C(). Proof: https://alive2.llvm.org/ce/z/9Ao2aJ
1 parent a1e1a84 commit b931731
Copy full SHA for b931731

File tree

2 files changed

+131
-6
lines changed
Filter options

2 files changed

+131
-6
lines changed

‎llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+16-6Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
5151
Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
5252
Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
5353
Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
54+
if (Opc == Instruction::LShr || Opc == Instruction::AShr)
55+
Res->setIsExact(I->isExact());
5456
break;
5557
}
5658
case Instruction::Trunc:
@@ -319,13 +321,21 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
319321
// zero - use AmtKnownBits.getMaxValue().
320322
uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
321323
uint32_t BitWidth = Ty->getScalarSizeInBits();
322-
KnownBits AmtKnownBits =
323-
llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
324+
KnownBits AmtKnownBits = IC.computeKnownBits(I->getOperand(1), 0, CxtI);
325+
APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
324326
APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
325-
if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
326-
IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
327-
return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
328-
canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
327+
if (MaxShiftAmt.ult(BitWidth)) {
328+
// If the only user is a trunc then we can narrow the shift if any new
329+
// MSBs are not going to be used.
330+
if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
331+
auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
332+
if ((MaxShiftAmt + DemandedBits).ule(BitWidth))
333+
return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
334+
canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
335+
}
336+
if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI))
337+
return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
338+
canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
329339
}
330340
break;
331341
}

‎llvm/test/Transforms/InstCombine/cast.ll

Copy file name to clipboardExpand all lines: llvm/test/Transforms/InstCombine/cast.ll
+115Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
; RUN: opt < %s -passes=instcombine -S -data-layout="E-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64" -use-constant-fp-for-fixed-length-splat -use-constant-int-for-fixed-length-splat | FileCheck %s --check-prefixes=ALL,BE
66
; RUN: opt < %s -passes=instcombine -S -data-layout="e-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64" -use-constant-fp-for-fixed-length-splat -use-constant-int-for-fixed-length-splat | FileCheck %s --check-prefixes=ALL,LE
77

8+
declare void @use_i8(i8)
89
declare void @use_i32(i32)
910
declare void @use_v2i32(<2 x i32>)
1011

@@ -2041,6 +2042,120 @@ define <2 x i8> @trunc_lshr_zext_uses1(<2 x i8> %A) {
20412042
ret <2 x i8> %D
20422043
}
20432044

2045+
define i8 @trunc_lshr_ext_halfWidth(i16 %a, i16 %b, i16 range(i16 0, 8) %shiftAmt) {
2046+
; ALL-LABEL: @trunc_lshr_ext_halfWidth(
2047+
; ALL-NEXT: [[ADD:%.*]] = add i16 [[A:%.*]], [[B:%.*]]
2048+
; ALL-NEXT: [[SHR:%.*]] = lshr i16 [[ADD]], [[SHIFTAMT:%.*]]
2049+
; ALL-NEXT: [[TRUNC:%.*]] = trunc i16 [[SHR]] to i8
2050+
; ALL-NEXT: ret i8 [[TRUNC]]
2051+
;
2052+
%zext_a = zext i16 %a to i32
2053+
%zext_b = zext i16 %b to i32
2054+
%zext_shiftAmt = zext i16 %shiftAmt to i32
2055+
%add = add nuw nsw i32 %zext_a, %zext_b
2056+
%shr = lshr i32 %add, %zext_shiftAmt
2057+
%trunc = trunc i32 %shr to i8
2058+
ret i8 %trunc
2059+
}
2060+
2061+
define i8 @trunc_lshr_ext_halfWidth_rhsOutofRange_neg(i16 %a, i16 %b, i16 range(i16 0, 10) %shiftAmt) {
2062+
; ALL-LABEL: @trunc_lshr_ext_halfWidth_rhsOutofRange_neg(
2063+
; ALL-NEXT: [[ZEXT_A:%.*]] = zext i16 [[A:%.*]] to i32
2064+
; ALL-NEXT: [[ZEXT_B:%.*]] = zext i16 [[B:%.*]] to i32
2065+
; ALL-NEXT: [[ZEXT_SHIFTAMT:%.*]] = zext nneg i16 [[SHIFTAMT:%.*]] to i32
2066+
; ALL-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[ZEXT_A]], [[ZEXT_B]]
2067+
; ALL-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], [[ZEXT_SHIFTAMT]]
2068+
; ALL-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
2069+
; ALL-NEXT: ret i8 [[TRUNC]]
2070+
;
2071+
%zext_a = zext i16 %a to i32
2072+
%zext_b = zext i16 %b to i32
2073+
%zext_shiftAmt = zext i16 %shiftAmt to i32
2074+
%add = add nuw nsw i32 %zext_a, %zext_b
2075+
%shr = lshr i32 %add, %zext_shiftAmt
2076+
%trunc = trunc i32 %shr to i8
2077+
ret i8 %trunc
2078+
}
2079+
2080+
define i8 @trunc_lshr_ext_halfWidth_rhsNoRange_neg(i16 %a, i16 %b, i16 %shiftAmt) {
2081+
; ALL-LABEL: @trunc_lshr_ext_halfWidth_rhsNoRange_neg(
2082+
; ALL-NEXT: [[ZEXT_A:%.*]] = zext i16 [[A:%.*]] to i32
2083+
; ALL-NEXT: [[ZEXT_B:%.*]] = zext i16 [[B:%.*]] to i32
2084+
; ALL-NEXT: [[ZEXT_SHIFTAMT:%.*]] = zext nneg i16 [[SHIFTAMT:%.*]] to i32
2085+
; ALL-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[ZEXT_A]], [[ZEXT_B]]
2086+
; ALL-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], [[ZEXT_SHIFTAMT]]
2087+
; ALL-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
2088+
; ALL-NEXT: ret i8 [[TRUNC]]
2089+
;
2090+
%zext_a = zext i16 %a to i32
2091+
%zext_b = zext i16 %b to i32
2092+
%zext_shiftAmt = zext i16 %shiftAmt to i32
2093+
%add = add nuw nsw i32 %zext_a, %zext_b
2094+
%shr = lshr i32 %add, %zext_shiftAmt
2095+
%trunc = trunc i32 %shr to i8
2096+
ret i8 %trunc
2097+
}
2098+
2099+
define i8 @trunc_lshr_ext_halfWidth_twouse_neg1(i16 %a, i16 %b, i16 range(i16 0, 8) %shiftAmt) {
2100+
; ALL-LABEL: @trunc_lshr_ext_halfWidth_twouse_neg1(
2101+
; ALL-NEXT: [[ZEXT_A:%.*]] = zext i16 [[A:%.*]] to i32
2102+
; ALL-NEXT: [[ZEXT_B:%.*]] = zext i16 [[B:%.*]] to i32
2103+
; ALL-NEXT: [[ZEXT_SHIFTAMT:%.*]] = zext nneg i16 [[SHIFTAMT:%.*]] to i32
2104+
; ALL-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[ZEXT_A]], [[ZEXT_B]]
2105+
; ALL-NEXT: call void @use_i32(i32 [[ADD]])
2106+
; ALL-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], [[ZEXT_SHIFTAMT]]
2107+
; ALL-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
2108+
; ALL-NEXT: ret i8 [[TRUNC]]
2109+
;
2110+
%zext_a = zext i16 %a to i32
2111+
%zext_b = zext i16 %b to i32
2112+
%zext_shiftAmt = zext i16 %shiftAmt to i32
2113+
%add = add nuw nsw i32 %zext_a, %zext_b
2114+
call void @use_i32(i32 %add)
2115+
%shr = lshr i32 %add, %zext_shiftAmt
2116+
%trunc = trunc i32 %shr to i8
2117+
ret i8 %trunc
2118+
}
2119+
2120+
define i8 @trunc_lshr_ext_halfWidth_twouse_neg2(i16 %a, i16 %b, i16 range(i16 0, 8) %shiftAmt) {
2121+
; ALL-LABEL: @trunc_lshr_ext_halfWidth_twouse_neg2(
2122+
; ALL-NEXT: [[ZEXT_A:%.*]] = zext i16 [[A:%.*]] to i32
2123+
; ALL-NEXT: [[ZEXT_B:%.*]] = zext i16 [[B:%.*]] to i32
2124+
; ALL-NEXT: [[ZEXT_SHIFTAMT:%.*]] = zext nneg i16 [[SHIFTAMT:%.*]] to i32
2125+
; ALL-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[ZEXT_A]], [[ZEXT_B]]
2126+
; ALL-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], [[ZEXT_SHIFTAMT]]
2127+
; ALL-NEXT: call void @use_i32(i32 [[SHR]])
2128+
; ALL-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
2129+
; ALL-NEXT: ret i8 [[TRUNC]]
2130+
;
2131+
%zext_a = zext i16 %a to i32
2132+
%zext_b = zext i16 %b to i32
2133+
%zext_shiftAmt = zext i16 %shiftAmt to i32
2134+
%add = add nuw nsw i32 %zext_a, %zext_b
2135+
%shr = lshr i32 %add, %zext_shiftAmt
2136+
call void @use_i32(i32 %shr)
2137+
%trunc = trunc i32 %shr to i8
2138+
ret i8 %trunc
2139+
}
2140+
2141+
; The narrowing transform only happens for integer types.
2142+
define <2 x i8> @trunc_lshr_ext_halfWidth_vector_neg(<2 x i16> %a, <2 x i16> %b) {
2143+
; ALL-LABEL: @trunc_lshr_ext_halfWidth_vector_neg(
2144+
; ALL-NEXT: [[ZEXT_A:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32>
2145+
; ALL-NEXT: [[ZEXT_B:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32>
2146+
; ALL-NEXT: [[ADD:%.*]] = add nuw nsw <2 x i32> [[ZEXT_A]], [[ZEXT_B]]
2147+
; ALL-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[ADD]], splat (i32 6)
2148+
; ALL-NEXT: [[TRUNC:%.*]] = trunc <2 x i32> [[SHR]] to <2 x i8>
2149+
; ALL-NEXT: ret <2 x i8> [[TRUNC]]
2150+
;
2151+
%zext_a = zext <2 x i16> %a to <2 x i32>
2152+
%zext_b = zext <2 x i16> %b to <2 x i32>
2153+
%add = add nuw nsw <2 x i32> %zext_a, %zext_b
2154+
%shr = lshr <2 x i32> %add, <i32 6, i32 6>
2155+
%trunc = trunc <2 x i32> %shr to <2 x i8>
2156+
ret <2 x i8> %trunc
2157+
}
2158+
20442159
; The following four tests sext + lshr + trunc patterns.
20452160
; PR33078
20462161

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.