@@ -106,25 +106,7 @@ define <vscale x 4 x i32> @usdot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
106
106
;
107
107
; CHECK-NEWLOWERING-LABEL: usdot:
108
108
; CHECK-NEWLOWERING: // %bb.0: // %entry
109
- ; CHECK-NEWLOWERING-NEXT: uunpklo z3.h, z1.b
110
- ; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
111
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.h, z1.b
112
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
113
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
114
- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.s, z3.h
115
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
116
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
117
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
118
- ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z1.h
119
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z1.s, z1.h
120
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z2.h
121
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
122
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
123
- ; CHECK-NEWLOWERING-NEXT: mul z3.s, z3.s, z4.s
124
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
125
- ; CHECK-NEWLOWERING-NEXT: movprfx z1, z3
126
- ; CHECK-NEWLOWERING-NEXT: mla z1.s, p0/m, z7.s, z24.s
127
- ; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
109
+ ; CHECK-NEWLOWERING-NEXT: usdot z0.s, z1.b, z2.b
128
110
; CHECK-NEWLOWERING-NEXT: ret
129
111
entry:
130
112
%a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i32 >
@@ -165,25 +147,7 @@ define <vscale x 4 x i32> @sudot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a,
165
147
;
166
148
; CHECK-NEWLOWERING-LABEL: sudot:
167
149
; CHECK-NEWLOWERING: // %bb.0: // %entry
168
- ; CHECK-NEWLOWERING-NEXT: sunpklo z3.h, z1.b
169
- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
170
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z1.h, z1.b
171
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
172
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.s
173
- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.s, z3.h
174
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
175
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
176
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
177
- ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z1.h
178
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z1.s, z1.h
179
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z2.h
180
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
181
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z5.s, z6.s
182
- ; CHECK-NEWLOWERING-NEXT: mul z3.s, z3.s, z4.s
183
- ; CHECK-NEWLOWERING-NEXT: mla z0.s, p0/m, z1.s, z2.s
184
- ; CHECK-NEWLOWERING-NEXT: movprfx z1, z3
185
- ; CHECK-NEWLOWERING-NEXT: mla z1.s, p0/m, z7.s, z24.s
186
- ; CHECK-NEWLOWERING-NEXT: add z0.s, z1.s, z0.s
150
+ ; CHECK-NEWLOWERING-NEXT: usdot z0.s, z2.b, z1.b
187
151
; CHECK-NEWLOWERING-NEXT: ret
188
152
entry:
189
153
%a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i32 >
@@ -415,59 +379,12 @@ define <vscale x 4 x i64> @usdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
415
379
;
416
380
; CHECK-NEWLOWERING-LABEL: usdot_8to64:
417
381
; CHECK-NEWLOWERING: // %bb.0: // %entry
418
- ; CHECK-NEWLOWERING-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
419
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #-2
420
- ; CHECK-NEWLOWERING-NEXT: str z9, [sp] // 16-byte Folded Spill
421
- ; CHECK-NEWLOWERING-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
422
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
423
- ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
424
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
425
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
426
- ; CHECK-NEWLOWERING-NEXT: uunpklo z4.h, z2.b
427
- ; CHECK-NEWLOWERING-NEXT: sunpklo z5.h, z3.b
428
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.h, z2.b
429
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.h, z3.b
430
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
431
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.s, z4.h
432
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.s, z4.h
433
- ; CHECK-NEWLOWERING-NEXT: sunpklo z7.s, z5.h
434
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.s, z5.h
435
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.s, z2.h
436
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z2.s, z2.h
437
- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.s, z3.h
438
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.s, z3.h
439
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z26.d, z6.s
440
- ; CHECK-NEWLOWERING-NEXT: uunpklo z6.d, z6.s
441
- ; CHECK-NEWLOWERING-NEXT: uunpklo z27.d, z4.s
442
- ; CHECK-NEWLOWERING-NEXT: sunpklo z28.d, z7.s
443
- ; CHECK-NEWLOWERING-NEXT: sunpklo z29.d, z5.s
444
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z4.d, z4.s
445
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z7.d, z7.s
446
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z5.d, z5.s
447
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z30.d, z24.s
448
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z31.d, z2.s
449
- ; CHECK-NEWLOWERING-NEXT: uunpklo z24.d, z24.s
450
- ; CHECK-NEWLOWERING-NEXT: uunpklo z2.d, z2.s
451
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z8.d, z25.s
452
- ; CHECK-NEWLOWERING-NEXT: sunpklo z25.d, z25.s
453
- ; CHECK-NEWLOWERING-NEXT: sunpklo z9.d, z3.s
454
- ; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
455
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
456
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z3.s
457
- ; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
458
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
459
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
460
- ; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
461
- ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
462
- ; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
463
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
464
- ; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
465
- ; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
466
- ; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
467
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
468
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
469
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #2
470
- ; CHECK-NEWLOWERING-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
382
+ ; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0
383
+ ; CHECK-NEWLOWERING-NEXT: usdot z4.s, z2.b, z3.b
384
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s
385
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s
386
+ ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
387
+ ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
471
388
; CHECK-NEWLOWERING-NEXT: ret
472
389
entry:
473
390
%a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
@@ -548,59 +465,12 @@ define <vscale x 4 x i64> @sudot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i
548
465
;
549
466
; CHECK-NEWLOWERING-LABEL: sudot_8to64:
550
467
; CHECK-NEWLOWERING: // %bb.0: // %entry
551
- ; CHECK-NEWLOWERING-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
552
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #-2
553
- ; CHECK-NEWLOWERING-NEXT: str z9, [sp] // 16-byte Folded Spill
554
- ; CHECK-NEWLOWERING-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
555
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
556
- ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
557
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
558
- ; CHECK-NEWLOWERING-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
559
- ; CHECK-NEWLOWERING-NEXT: sunpklo z4.h, z2.b
560
- ; CHECK-NEWLOWERING-NEXT: uunpklo z5.h, z3.b
561
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.h, z2.b
562
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.h, z3.b
563
- ; CHECK-NEWLOWERING-NEXT: ptrue p0.d
564
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.s, z4.h
565
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.s, z4.h
566
- ; CHECK-NEWLOWERING-NEXT: uunpklo z7.s, z5.h
567
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.s, z5.h
568
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.s, z2.h
569
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z2.s, z2.h
570
- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.s, z3.h
571
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.s, z3.h
572
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z26.d, z6.s
573
- ; CHECK-NEWLOWERING-NEXT: sunpklo z6.d, z6.s
574
- ; CHECK-NEWLOWERING-NEXT: sunpklo z27.d, z4.s
575
- ; CHECK-NEWLOWERING-NEXT: uunpklo z28.d, z7.s
576
- ; CHECK-NEWLOWERING-NEXT: uunpklo z29.d, z5.s
577
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z4.d, z4.s
578
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z7.d, z7.s
579
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z5.d, z5.s
580
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z30.d, z24.s
581
- ; CHECK-NEWLOWERING-NEXT: sunpkhi z31.d, z2.s
582
- ; CHECK-NEWLOWERING-NEXT: sunpklo z24.d, z24.s
583
- ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z2.s
584
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z8.d, z25.s
585
- ; CHECK-NEWLOWERING-NEXT: uunpklo z25.d, z25.s
586
- ; CHECK-NEWLOWERING-NEXT: uunpklo z9.d, z3.s
587
- ; CHECK-NEWLOWERING-NEXT: mul z27.d, z27.d, z29.d
588
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z6.d, z28.d
589
- ; CHECK-NEWLOWERING-NEXT: uunpkhi z3.d, z3.s
590
- ; CHECK-NEWLOWERING-NEXT: mul z4.d, z4.d, z5.d
591
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z26.d, z7.d
592
- ; CHECK-NEWLOWERING-NEXT: mla z0.d, p0/m, z2.d, z9.d
593
- ; CHECK-NEWLOWERING-NEXT: movprfx z2, z27
594
- ; CHECK-NEWLOWERING-NEXT: mla z2.d, p0/m, z24.d, z25.d
595
- ; CHECK-NEWLOWERING-NEXT: ldr z9, [sp] // 16-byte Folded Reload
596
- ; CHECK-NEWLOWERING-NEXT: mla z1.d, p0/m, z31.d, z3.d
597
- ; CHECK-NEWLOWERING-NEXT: movprfx z3, z4
598
- ; CHECK-NEWLOWERING-NEXT: mla z3.d, p0/m, z30.d, z8.d
599
- ; CHECK-NEWLOWERING-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
600
- ; CHECK-NEWLOWERING-NEXT: add z0.d, z2.d, z0.d
601
- ; CHECK-NEWLOWERING-NEXT: add z1.d, z3.d, z1.d
602
- ; CHECK-NEWLOWERING-NEXT: addvl sp, sp, #2
603
- ; CHECK-NEWLOWERING-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
468
+ ; CHECK-NEWLOWERING-NEXT: mov z4.s, #0 // =0x0
469
+ ; CHECK-NEWLOWERING-NEXT: usdot z4.s, z3.b, z2.b
470
+ ; CHECK-NEWLOWERING-NEXT: sunpklo z2.d, z4.s
471
+ ; CHECK-NEWLOWERING-NEXT: sunpkhi z3.d, z4.s
472
+ ; CHECK-NEWLOWERING-NEXT: add z0.d, z0.d, z2.d
473
+ ; CHECK-NEWLOWERING-NEXT: add z1.d, z1.d, z3.d
604
474
; CHECK-NEWLOWERING-NEXT: ret
605
475
entry:
606
476
%a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
0 commit comments