@@ -46,7 +46,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_1_1__cbsz1__blgp1(<8 x
46
46
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
47
47
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
48
48
; GCN-NEXT: s_nop 1
49
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0,0]
49
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[0,0,0]
50
50
; GCN-NEXT: s_nop 7
51
51
; GCN-NEXT: s_nop 3
52
52
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -70,7 +70,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_2__cbsz1__blgp1(<8 x
70
70
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
71
71
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
72
72
; GCN-NEXT: s_nop 1
73
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0 ,0]
73
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[1,1 ,0]
74
74
; GCN-NEXT: s_nop 7
75
75
; GCN-NEXT: s_nop 3
76
76
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -94,7 +94,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_3__cbsz1__blgp1(<8 x
94
94
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
95
95
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
96
96
; GCN-NEXT: s_nop 1
97
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0,0 ,0]
97
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,1,0] op_sel_hi:[1,1 ,0]
98
98
; GCN-NEXT: s_nop 7
99
99
; GCN-NEXT: s_nop 3
100
100
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -118,7 +118,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_3__cbsz1__blgp1(<8 x
118
118
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
119
119
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
120
120
; GCN-NEXT: s_nop 1
121
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi :[0,0 ,0]
121
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel :[0,1,0] op_sel_hi:[0,1 ,0]
122
122
; GCN-NEXT: s_nop 7
123
123
; GCN-NEXT: s_nop 3
124
124
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -142,7 +142,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_0__cbsz1__blgp1(<8 x
142
142
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
143
143
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
144
144
; GCN-NEXT: s_nop 1
145
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[0 ,0,0]
145
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1,0,0] op_sel_hi:[1 ,0,0]
146
146
; GCN-NEXT: s_nop 7
147
147
; GCN-NEXT: s_nop 3
148
148
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -166,7 +166,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_2_3__cbsz1__blgp1(<8 x
166
166
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
167
167
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
168
168
; GCN-NEXT: s_nop 1
169
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi :[0,0 ,0]
169
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel :[0,1,0] op_sel_hi:[1,1 ,0]
170
170
; GCN-NEXT: s_nop 7
171
171
; GCN-NEXT: s_nop 3
172
172
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -190,7 +190,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_3_2__cbsz1__blgp1(<8 x
190
190
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
191
191
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
192
192
; GCN-NEXT: s_nop 1
193
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel_hi:[ 0,0,0]
193
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v20, v21 op_sel:[1, 0,0] op_sel_hi:[1,1 ,0]
194
194
; GCN-NEXT: s_nop 7
195
195
; GCN-NEXT: s_nop 3
196
196
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1797,7 +1797,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_inlineimm__
1797
1797
; GCN-NEXT: v_accvgpr_write_b32 a2, v18
1798
1798
; GCN-NEXT: v_accvgpr_write_b32 a3, v19
1799
1799
; GCN-NEXT: s_nop 1
1800
- ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[0,0 ,0]
1800
+ ; GCN-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 33, -2 op_sel_hi:[1,1 ,0]
1801
1801
; GCN-NEXT: s_nop 7
1802
1802
; GCN-NEXT: s_nop 3
1803
1803
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1819,7 +1819,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
1819
1819
; SDAG-NEXT: v_accvgpr_write_b32 a2, v18
1820
1820
; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
1821
1821
; SDAG-NEXT: s_nop 1
1822
- ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[0,0 ,0]
1822
+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, -2 op_sel_hi:[1,1 ,0]
1823
1823
; SDAG-NEXT: s_nop 7
1824
1824
; SDAG-NEXT: s_nop 3
1825
1825
; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1837,7 +1837,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
1837
1837
; GISEL-NEXT: v_accvgpr_write_b32 a3, v19
1838
1838
; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
1839
1839
; GISEL-NEXT: s_nop 1
1840
- ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0,0 ,0]
1840
+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[1,1 ,0]
1841
1841
; GISEL-NEXT: s_nop 7
1842
1842
; GISEL-NEXT: s_nop 3
1843
1843
; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1860,7 +1860,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
1860
1860
; SDAG-NEXT: v_accvgpr_write_b32 a3, v19
1861
1861
; SDAG-NEXT: v_mov_b32_e32 v16, 0x4d
1862
1862
; SDAG-NEXT: s_nop 1
1863
- ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[0,0 ,0]
1863
+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s0, v16 op_sel_hi:[1,1 ,0]
1864
1864
; SDAG-NEXT: s_nop 7
1865
1865
; SDAG-NEXT: s_nop 3
1866
1866
; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1879,7 +1879,7 @@ define <4 x float> @test_mfma_scale_f32_16x16x128_f8f6f4_0_0__scaleA_kimm__scale
1879
1879
; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
1880
1880
; GISEL-NEXT: v_mov_b32_e32 v17, 0x4d
1881
1881
; GISEL-NEXT: s_nop 1
1882
- ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[0,0 ,0]
1882
+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, v17 op_sel_hi:[1,1 ,0]
1883
1883
; GISEL-NEXT: s_nop 7
1884
1884
; GISEL-NEXT: s_nop 3
1885
1885
; GISEL-NEXT: v_accvgpr_read_b32 v0, a0
@@ -1921,7 +1921,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
1921
1921
; SDAG-NEXT: v_accvgpr_write_b32 a3, s11
1922
1922
; SDAG-NEXT: v_mov_b32_e32 v17, s13
1923
1923
; SDAG-NEXT: s_nop 1
1924
- ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel_hi:[0 ,0,0] blgp:2
1924
+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s12, v17 op_sel:[1,1,0] op_sel_hi:[1 ,0,0] blgp:2
1925
1925
; SDAG-NEXT: s_nop 7
1926
1926
; SDAG-NEXT: s_nop 3
1927
1927
; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[14:15]
@@ -1946,7 +1946,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
1946
1946
; GISEL-NEXT: v_accvgpr_write_b32 a3, s27
1947
1947
; GISEL-NEXT: v_mov_b32_e32 v16, s29
1948
1948
; GISEL-NEXT: s_nop 1
1949
- ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel_hi:[0 ,0,0] blgp:2
1949
+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel:[1,1,0] op_sel_hi:[1 ,0,0] blgp:2
1950
1950
; GISEL-NEXT: v_mov_b32_e32 v0, 0
1951
1951
; GISEL-NEXT: s_nop 7
1952
1952
; GISEL-NEXT: s_nop 2
@@ -1987,7 +1987,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
1987
1987
; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
1988
1988
; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
1989
1989
; SDAG-NEXT: s_nop 1
1990
- ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel_hi:[0 ,0,0]
1990
+ ; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s6, -2 op_sel:[1,1,0] op_sel_hi:[1 ,0,0]
1991
1991
; SDAG-NEXT: s_nop 7
1992
1992
; SDAG-NEXT: s_nop 3
1993
1993
; SDAG-NEXT: global_store_dwordx4 v16, a[0:3], s[4:5]
@@ -2013,7 +2013,7 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
2013
2013
; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
2014
2014
; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
2015
2015
; GISEL-NEXT: s_nop 1
2016
- ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel_hi:[0 ,0,0]
2016
+ ; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel:[1,1,0] op_sel_hi:[1 ,0,0]
2017
2017
; GISEL-NEXT: v_mov_b32_e32 v0, 0
2018
2018
; GISEL-NEXT: s_nop 7
2019
2019
; GISEL-NEXT: s_nop 2
0 commit comments