Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a4760e9

Browse filesBrowse files
committed
[SelectionDAG] Convert to or mask if all insertions are -1
We did this for 0 and and, but we can do this with or and -1.
1 parent 4b1f1f7 commit a4760e9
Copy full SHA for a4760e9

File tree

Expand file treeCollapse file tree

4 files changed

+57
-163
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+57
-163
lines changed

‎llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Copy file name to clipboardExpand all lines: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+13-1Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22974,7 +22974,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2297422974
}
2297522975

2297622976
// If all insertions are zero value, try to convert to AND mask.
22977-
// TODO: Do this for -1 with OR mask?
2297822977
if (!LegalOperations && llvm::isNullConstant(InVal) &&
2297922978
all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
2298022979
count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
@@ -22987,6 +22986,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
2298722986
DAG.getBuildVector(VT, DL, Mask));
2298822987
}
2298922988

22989+
// If all insertions are -1, try to convert to OR mask.
22990+
if (!LegalOperations && llvm::isAllOnesConstant(InVal) &&
22991+
all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
22992+
count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
22993+
SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
22994+
SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
22995+
SmallVector<SDValue, 8> Mask(NumElts);
22996+
for (unsigned I = 0; I != NumElts; ++I)
22997+
Mask[I] = Ops[I] ? AllOnes : Zero;
22998+
return DAG.getNode(ISD::OR, DL, VT, CurVec,
22999+
DAG.getBuildVector(VT, DL, Mask));
23000+
}
23001+
2299023002
// Failed to find a match in the chain - bail.
2299123003
break;
2299223004
}

‎llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+5-11Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -101,19 +101,13 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
101101
define i8 @test_v9i8(<9 x i8> %a) nounwind {
102102
; CHECK-LABEL: test_v9i8:
103103
; CHECK: // %bb.0:
104-
; CHECK-NEXT: mov v1.16b, v0.16b
105-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
106-
; CHECK-NEXT: mov v1.b[9], w8
107-
; CHECK-NEXT: mov v1.b[10], w8
108-
; CHECK-NEXT: mov v1.b[11], w8
109-
; CHECK-NEXT: mov v1.b[12], w8
110-
; CHECK-NEXT: mov v1.b[13], w8
111-
; CHECK-NEXT: mov v1.b[14], w8
112-
; CHECK-NEXT: mov v1.b[15], w8
104+
; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff00
105+
; CHECK-NEXT: fmov x8, d0
106+
; CHECK-NEXT: orr v1.16b, v0.16b, v1.16b
113107
; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
114108
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
115-
; CHECK-NEXT: fmov x8, d0
116-
; CHECK-NEXT: and x8, x8, x8, lsr #32
109+
; CHECK-NEXT: fmov x9, d0
110+
; CHECK-NEXT: and x8, x9, x8, lsr #32
117111
; CHECK-NEXT: and x8, x8, x8, lsr #16
118112
; CHECK-NEXT: lsr x9, x8, #8
119113
; CHECK-NEXT: and w0, w8, w9

‎llvm/test/CodeGen/X86/avx-cvt-3.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/X86/avx-cvt-3.ll
+2-6Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,13 @@ define <8 x float> @sitofp_shuffle_zero_v8i32(<8 x i32> %a0) {
4848
define <8 x float> @sitofp_insert_allbits_v8i32(<8 x i32> %a0) {
4949
; X86-LABEL: sitofp_insert_allbits_v8i32:
5050
; X86: # %bb.0:
51-
; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
52-
; X86-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
53-
; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
51+
; X86-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
5452
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
5553
; X86-NEXT: retl
5654
;
5755
; X64-LABEL: sitofp_insert_allbits_v8i32:
5856
; X64: # %bb.0:
59-
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
60-
; X64-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
61-
; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
57+
; X64-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
6258
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
6359
; X64-NEXT: retq
6460
%1 = insertelement <8 x i32> %a0, i32 -1, i32 0

‎llvm/test/CodeGen/X86/insertelement-ones.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/X86/insertelement-ones.ll
+37-145Lines changed: 37 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -150,59 +150,32 @@ define <4 x i32> @insert_v4i32_01x3(<4 x i32> %a) {
150150
define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
151151
; SSE2-LABEL: insert_v8i32_x12345x7:
152152
; SSE2: # %bb.0:
153-
; SSE2-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
154-
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
155-
; SSE2-NEXT: movl $-1, %eax
156-
; SSE2-NEXT: movd %eax, %xmm2
157-
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
158-
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
153+
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
154+
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
159155
; SSE2-NEXT: retq
160156
;
161157
; SSE3-LABEL: insert_v8i32_x12345x7:
162158
; SSE3: # %bb.0:
163-
; SSE3-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
164-
; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
165-
; SSE3-NEXT: movl $-1, %eax
166-
; SSE3-NEXT: movd %eax, %xmm2
167-
; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
168-
; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
159+
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
160+
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
169161
; SSE3-NEXT: retq
170162
;
171163
; SSSE3-LABEL: insert_v8i32_x12345x7:
172164
; SSSE3: # %bb.0:
173-
; SSSE3-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
174-
; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
175-
; SSSE3-NEXT: movl $-1, %eax
176-
; SSSE3-NEXT: movd %eax, %xmm2
177-
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
178-
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
165+
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
166+
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
179167
; SSSE3-NEXT: retq
180168
;
181169
; SSE41-LABEL: insert_v8i32_x12345x7:
182170
; SSE41: # %bb.0:
183-
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
184-
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
185-
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
171+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
172+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
186173
; SSE41-NEXT: retq
187174
;
188-
; AVX1-LABEL: insert_v8i32_x12345x7:
189-
; AVX1: # %bb.0:
190-
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
191-
; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
192-
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
193-
; AVX1-NEXT: retq
194-
;
195-
; AVX2-LABEL: insert_v8i32_x12345x7:
196-
; AVX2: # %bb.0:
197-
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
198-
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
199-
; AVX2-NEXT: retq
200-
;
201-
; AVX512-LABEL: insert_v8i32_x12345x7:
202-
; AVX512: # %bb.0:
203-
; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
204-
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
205-
; AVX512-NEXT: retq
175+
; AVX-LABEL: insert_v8i32_x12345x7:
176+
; AVX: # %bb.0:
177+
; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
178+
; AVX-NEXT: retq
206179
%1 = insertelement <8 x i32> %a, i32 -1, i32 0
207180
%2 = insertelement <8 x i32> %1, i32 -1, i32 6
208181
ret <8 x i32> %2
@@ -211,35 +184,27 @@ define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
211184
define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
212185
; SSE2-LABEL: insert_v8i16_x12345x7:
213186
; SSE2: # %bb.0:
214-
; SSE2-NEXT: movl $65535, %eax # imm = 0xFFFF
215-
; SSE2-NEXT: pinsrw $0, %eax, %xmm0
216-
; SSE2-NEXT: pinsrw $6, %eax, %xmm0
187+
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
217188
; SSE2-NEXT: retq
218189
;
219190
; SSE3-LABEL: insert_v8i16_x12345x7:
220191
; SSE3: # %bb.0:
221-
; SSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
222-
; SSE3-NEXT: pinsrw $0, %eax, %xmm0
223-
; SSE3-NEXT: pinsrw $6, %eax, %xmm0
192+
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
224193
; SSE3-NEXT: retq
225194
;
226195
; SSSE3-LABEL: insert_v8i16_x12345x7:
227196
; SSSE3: # %bb.0:
228-
; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
229-
; SSSE3-NEXT: pinsrw $0, %eax, %xmm0
230-
; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
197+
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
231198
; SSSE3-NEXT: retq
232199
;
233200
; SSE41-LABEL: insert_v8i16_x12345x7:
234201
; SSE41: # %bb.0:
235-
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
236-
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
202+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
237203
; SSE41-NEXT: retq
238204
;
239205
; AVX-LABEL: insert_v8i16_x12345x7:
240206
; AVX: # %bb.0:
241-
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
242-
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
207+
; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
243208
; AVX-NEXT: retq
244209
%1 = insertelement <8 x i16> %a, i16 -1, i32 0
245210
%2 = insertelement <8 x i16> %1, i16 -1, i32 6
@@ -249,60 +214,32 @@ define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
249214
define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
250215
; SSE2-LABEL: insert_v16i16_x12345x789ABCDEx:
251216
; SSE2: # %bb.0:
252-
; SSE2-NEXT: movl $65535, %eax # imm = 0xFFFF
253-
; SSE2-NEXT: pinsrw $0, %eax, %xmm0
254-
; SSE2-NEXT: pinsrw $6, %eax, %xmm0
255-
; SSE2-NEXT: pinsrw $7, %eax, %xmm1
217+
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
218+
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
256219
; SSE2-NEXT: retq
257220
;
258221
; SSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
259222
; SSE3: # %bb.0:
260-
; SSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
261-
; SSE3-NEXT: pinsrw $0, %eax, %xmm0
262-
; SSE3-NEXT: pinsrw $6, %eax, %xmm0
263-
; SSE3-NEXT: pinsrw $7, %eax, %xmm1
223+
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
224+
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
264225
; SSE3-NEXT: retq
265226
;
266227
; SSSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
267228
; SSSE3: # %bb.0:
268-
; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
269-
; SSSE3-NEXT: pinsrw $0, %eax, %xmm0
270-
; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
271-
; SSSE3-NEXT: pinsrw $7, %eax, %xmm1
229+
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
230+
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
272231
; SSSE3-NEXT: retq
273232
;
274233
; SSE41-LABEL: insert_v16i16_x12345x789ABCDEx:
275234
; SSE41: # %bb.0:
276-
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
277-
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
278-
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
235+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
236+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
279237
; SSE41-NEXT: retq
280238
;
281-
; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx:
282-
; AVX1: # %bb.0:
283-
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [65535,0,0,0]
284-
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
285-
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
286-
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
287-
; AVX1-NEXT: retq
288-
;
289-
; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
290-
; AVX2: # %bb.0:
291-
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
292-
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
293-
; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
294-
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
295-
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
296-
; AVX2-NEXT: retq
297-
;
298-
; AVX512-LABEL: insert_v16i16_x12345x789ABCDEx:
299-
; AVX512: # %bb.0:
300-
; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
301-
; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
302-
; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
303-
; AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
304-
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
305-
; AVX512-NEXT: retq
239+
; AVX-LABEL: insert_v16i16_x12345x789ABCDEx:
240+
; AVX: # %bb.0:
241+
; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
242+
; AVX-NEXT: retq
306243
%1 = insertelement <16 x i16> %a, i16 -1, i32 0
307244
%2 = insertelement <16 x i16> %1, i16 -1, i32 6
308245
%3 = insertelement <16 x i16> %2, i16 -1, i32 15
@@ -313,33 +250,26 @@ define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) {
313250
; SSE2-LABEL: insert_v16i8_x123456789ABCDEx:
314251
; SSE2: # %bb.0:
315252
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
316-
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
317253
; SSE2-NEXT: retq
318254
;
319255
; SSE3-LABEL: insert_v16i8_x123456789ABCDEx:
320256
; SSE3: # %bb.0:
321257
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
322-
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
323258
; SSE3-NEXT: retq
324259
;
325260
; SSSE3-LABEL: insert_v16i8_x123456789ABCDEx:
326261
; SSSE3: # %bb.0:
327262
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
328-
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
329263
; SSSE3-NEXT: retq
330264
;
331265
; SSE41-LABEL: insert_v16i8_x123456789ABCDEx:
332266
; SSE41: # %bb.0:
333-
; SSE41-NEXT: movl $255, %eax
334-
; SSE41-NEXT: pinsrb $0, %eax, %xmm0
335-
; SSE41-NEXT: pinsrb $15, %eax, %xmm0
267+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
336268
; SSE41-NEXT: retq
337269
;
338270
; AVX-LABEL: insert_v16i8_x123456789ABCDEx:
339271
; AVX: # %bb.0:
340-
; AVX-NEXT: movl $255, %eax
341-
; AVX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
342-
; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
272+
; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
343273
; AVX-NEXT: retq
344274
%1 = insertelement <16 x i8> %a, i8 -1, i32 0
345275
%2 = insertelement <16 x i8> %1, i8 -1, i32 15
@@ -350,69 +280,31 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
350280
; SSE2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
351281
; SSE2: # %bb.0:
352282
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
353-
; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
354-
; SSE2-NEXT: orps %xmm2, %xmm0
355283
; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
356-
; SSE2-NEXT: orps %xmm2, %xmm1
357284
; SSE2-NEXT: retq
358285
;
359286
; SSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
360287
; SSE3: # %bb.0:
361288
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
362-
; SSE3-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
363-
; SSE3-NEXT: orps %xmm2, %xmm0
364289
; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
365-
; SSE3-NEXT: orps %xmm2, %xmm1
366290
; SSE3-NEXT: retq
367291
;
368292
; SSSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
369293
; SSSE3: # %bb.0:
370294
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
371-
; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
372-
; SSSE3-NEXT: orps %xmm2, %xmm0
373295
; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
374-
; SSSE3-NEXT: orps %xmm2, %xmm1
375296
; SSSE3-NEXT: retq
376297
;
377298
; SSE41-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
378299
; SSE41: # %bb.0:
379-
; SSE41-NEXT: movl $255, %eax
380-
; SSE41-NEXT: pinsrb $0, %eax, %xmm0
381-
; SSE41-NEXT: pinsrb $15, %eax, %xmm0
382-
; SSE41-NEXT: pinsrb $14, %eax, %xmm1
383-
; SSE41-NEXT: pinsrb $15, %eax, %xmm1
300+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
301+
; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
384302
; SSE41-NEXT: retq
385303
;
386-
; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
387-
; AVX1: # %bb.0:
388-
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [255,0,0,0]
389-
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
390-
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
391-
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
392-
; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
393-
; AVX1-NEXT: retq
394-
;
395-
; AVX2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
396-
; AVX2: # %bb.0:
397-
; AVX2-NEXT: movl $255, %eax
398-
; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
399-
; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
400-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
401-
; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
402-
; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
403-
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
404-
; AVX2-NEXT: retq
405-
;
406-
; AVX512-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
407-
; AVX512: # %bb.0:
408-
; AVX512-NEXT: movl $255, %eax
409-
; AVX512-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
410-
; AVX512-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
411-
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
412-
; AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
413-
; AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
414-
; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
415-
; AVX512-NEXT: retq
304+
; AVX-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
305+
; AVX: # %bb.0:
306+
; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
307+
; AVX-NEXT: retq
416308
%1 = insertelement <32 x i8> %a, i8 -1, i32 0
417309
%2 = insertelement <32 x i8> %1, i8 -1, i32 15
418310
%3 = insertelement <32 x i8> %2, i8 -1, i32 30

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.