From 733135ea6efc0cb336a4f06bd8d07262a25590ef Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Thu, 27 Feb 2025 17:41:41 -0800 Subject: [PATCH 1/7] funnel shift combiner port from SelectionDAG ISel to GlobalISel --- .../include/llvm/Target/GlobalISel/Combine.td | 22 +++++++- llvm/test/CodeGen/AArch64/funnel-shift.ll | 12 ++--- llvm/test/CodeGen/RISCV/GlobalISel/shift.ll | 50 +++++++++++++++++++ 3 files changed, 76 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 5309d5952f087..9d0b1cab559be 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1033,6 +1033,24 @@ def funnel_shift_overshift: GICombineRule< (apply [{ Helper.applyFunnelShiftConstantModulo(*${root}); }]) >; +// Transform: fshl x, z, y | shl x, y -> fshl x, z, y +def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule< + (defs root:$root), + (match (G_FSHL $out1, $x, $z, $y), + (G_SHL $out2, $x, $y), + (G_OR $root, $out1, $out2)), + (apply (G_FSHL $root, $x, $z, $y)) +>; + +// Transform: fshr z, x, y | srl x, y -> fshr z, x, y +def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule< + (defs root:$root), + (match (G_FSHR $out1, $z, $x, $y), + (G_LSHR $out2, $x, $y), + (G_OR $root, $out1, $out2)), + (apply (G_FSHR $root, $z, $x, $y)) +>; + def rotate_out_of_range : GICombineRule< (defs root:$root), (match (wip_match_opcode G_ROTR, G_ROTL):$root, @@ -1105,7 +1123,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift, funnel_shift_to_rotate, funnel_shift_right_zero, funnel_shift_left_zero, - funnel_shift_overshift]>; + funnel_shift_overshift, + funnel_shift_or_shift_to_funnel_shift_left, + funnel_shift_or_shift_to_funnel_shift_right]>; def bitfield_extract_from_sext_inreg : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll index e5aa360f804c1..254bbc934f10f 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -674,14 +674,12 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) { ; CHECK-GI-LABEL: or_shl_fshl_simplify: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #31 // =0x1f -; CHECK-GI-NEXT: and w9, w2, #0x1f -; CHECK-GI-NEXT: lsr w10, w0, #1 -; CHECK-GI-NEXT: lsl w11, w1, w2 +; CHECK-GI-NEXT: lsr w9, w0, #1 +; CHECK-GI-NEXT: and w10, w2, #0x1f ; CHECK-GI-NEXT: bic w8, w8, w2 -; CHECK-GI-NEXT: lsl w9, w1, w9 -; CHECK-GI-NEXT: lsr w8, w10, w8 -; CHECK-GI-NEXT: orr w9, w9, w11 -; CHECK-GI-NEXT: orr w0, w9, w8 +; CHECK-GI-NEXT: lsl w10, w1, w10 +; CHECK-GI-NEXT: lsr w8, w9, w8 +; CHECK-GI-NEXT: orr w0, w10, w8 ; CHECK-GI-NEXT: ret %shy = shl i32 %y, %s %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll index 75e318a58fd45..f564abf3d3184 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll @@ -105,3 +105,53 @@ define i16 @test_shl_i48_2(i48 %x, i48 %y) { %trunc = trunc i48 %shl to i16 ret i16 %trunc } + +define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) { +; RV32-LABEL: test_fshl_i32: +; RV32: # %bb.0: +; RV32-NEXT: not a3, a2 +; RV32-NEXT: sll a0, a0, a2 +; RV32-NEXT: srli a1, a1, 1 +; RV32-NEXT: srl a1, a1, a3 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: test_fshl_i32: +; RV64: # %bb.0: +; RV64-NEXT: not a3, a2 +; RV64-NEXT: sllw a0, a0, a2 +; RV64-NEXT: srliw a1, a1, 1 +; RV64-NEXT: srlw a1, a1, a3 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y) + %shl = shl i32 %x, %y + %or = or i32 %fshl, %shl + %trunc = trunc i32 %or to i16 + ret i16 %trunc +} + +define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) { +; RV32-LABEL: test_fshr_i32: +; RV32: # %bb.0: +; RV32-NEXT: not a3, a2 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: sll a0, a0, a3 +; RV32-NEXT: srl a1, a1, a2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: test_fshr_i32: +; RV64: # %bb.0: +; RV64-NEXT: not a3, a2 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: sllw a0, a0, a3 +; RV64-NEXT: srlw a1, a1, a2 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret + %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y) + %lshr = lshr i32 %x, %y + %or = or i32 %fshr, %lshr + %trunc = trunc i32 %or to i16 + ret i16 %trunc +} From 2fc366c432a669d00017b591deff3be3cdd83f07 Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Sat, 3 May 2025 05:30:12 -0700 Subject: [PATCH 2/7] pre-commit test --- .../include/llvm/Target/GlobalISel/Combine.td | 16 +-- llvm/test/CodeGen/AArch64/funnel-shift.ll | 12 +- llvm/test/CodeGen/RISCV/GlobalISel/shift.ll | 115 ++++++++++++------ 3 files changed, 94 insertions(+), 49 deletions(-) diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 9d0b1cab559be..2cbf0c794628a 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1038,8 +1038,9 @@ def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule< (defs root:$root), (match (G_FSHL $out1, $x, $z, $y), (G_SHL $out2, $x, $y), - (G_OR $root, $out1, $out2)), - (apply (G_FSHL $root, $x, $z, $y)) + (G_OR $root, $out1, $out2), + [{ return MRI.hasOneUse(${out2}.getReg()); }]), + (apply (GIReplaceReg $root, $out1)) >; // Transform: fshr z, x, y | srl x, y -> fshr z, x, y @@ -1047,8 +1048,9 @@ def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule< (defs root:$root), (match (G_FSHR $out1, $z, $x, $y), (G_LSHR $out2, $x, $y), - (G_OR $root, $out1, $out2)), - (apply (G_FSHR $root, $z, $x, $y)) + (G_OR $root, $out1, $out2), + [{ return MRI.hasOneUse(${out2}.getReg()); }]), + (apply (GIReplaceReg $root, $out1)) >; def rotate_out_of_range : GICombineRule< @@ -1123,9 +1125,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift, funnel_shift_to_rotate, funnel_shift_right_zero, funnel_shift_left_zero, - funnel_shift_overshift, - funnel_shift_or_shift_to_funnel_shift_left, - funnel_shift_or_shift_to_funnel_shift_right]>; + funnel_shift_overshift]>; + //funnel_shift_or_shift_to_funnel_shift_left, + //funnel_shift_or_shift_to_funnel_shift_right]>; def bitfield_extract_from_sext_inreg : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll index 254bbc934f10f..e5aa360f804c1 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -674,12 +674,14 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) { ; CHECK-GI-LABEL: or_shl_fshl_simplify: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #31 // =0x1f -; CHECK-GI-NEXT: lsr w9, w0, #1 -; CHECK-GI-NEXT: and w10, w2, #0x1f +; CHECK-GI-NEXT: and w9, w2, #0x1f +; CHECK-GI-NEXT: lsr w10, w0, #1 +; CHECK-GI-NEXT: lsl w11, w1, w2 ; CHECK-GI-NEXT: bic w8, w8, w2 -; CHECK-GI-NEXT: lsl w10, w1, w10 -; CHECK-GI-NEXT: lsr w8, w9, w8 -; CHECK-GI-NEXT: orr w0, w10, w8 +; CHECK-GI-NEXT: lsl w9, w1, w9 +; CHECK-GI-NEXT: lsr w8, w10, w8 +; CHECK-GI-NEXT: orr w9, w9, w11 +; CHECK-GI-NEXT: orr w0, w9, w8 ; CHECK-GI-NEXT: ret %shy = shl i32 %y, %s %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll index f564abf3d3184..36000e3496276 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll @@ -2,60 +2,91 @@ ; RUN: llc -mtriple=riscv32 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV32 ; RUN: llc -mtriple=riscv64 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV64 -define i16 @test_lshr_i48(i48 %x) { -; RV32-LABEL: test_lshr_i48: +define i16 @test_lshr_i32(i32 %_, i32 %x, i32 %y) { +; RV32-LABEL: test_lshr_i32: ; RV32: # %bb.0: -; RV32-NEXT: srli a0, a0, 16 +; RV32-NEXT: srl a1, a1, a2 +; RV32-NEXT: not a2, a2 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: sll a0, a0, a2 +; RV32-NEXT: or a2, a1, a1 +; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; -; RV64-LABEL: test_lshr_i48: +; RV64-LABEL: test_lshr_i32: ; RV64: # %bb.0: -; RV64-NEXT: srliw a0, a0, 16 +; RV64-NEXT: srlw a1, a1, a2 +; RV64-NEXT: not a2, a2 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: sllw a0, a0, a2 +; RV64-NEXT: or a2, a1, a1 +; RV64-NEXT: or a0, a0, a2 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret - %lshr = lshr i48 %x, 16 - %trunc = trunc i48 %lshr to i16 + %lshr = lshr i32 %x, %y + %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y) + %or = or i32 %fshr, %lshr + %and = and i32 %or, %lshr + %trunc = trunc i32 %and to i16 ret i16 %trunc } -define i16 @test_ashr_i48(i48 %x) { -; RV32-LABEL: test_ashr_i48: +define i16 @test_ashr_i32(i32 %x) { +; RV32-LABEL: test_ashr_i32: ; RV32: # %bb.0: ; RV32-NEXT: srai a0, a0, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: test_ashr_i48: +; RV64-LABEL: test_ashr_i32: ; RV64: # %bb.0: ; RV64-NEXT: sraiw a0, a0, 16 ; RV64-NEXT: ret - %ashr = ashr i48 %x, 16 - %trunc = trunc i48 %ashr to i16 + %ashr = ashr i32 %x, 16 + %trunc = trunc i32 %ashr to i16 ret i16 %trunc } -define i16 @test_shl_i48(i48 %x) { -; RV32-LABEL: test_shl_i48: +define i16 @test_shl_i32(i32 %_, i32 %x, i32 %y) { +; RV32-LABEL: test_shl_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: not a3, a2 +; RV32-NEXT: sll a1, a1, a2 +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: srl a0, a0, a3 +; RV32-NEXT: or a2, a1, a1 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; -; RV64-LABEL: test_shl_i48: +; RV64-LABEL: test_shl_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 8 +; RV64-NEXT: not a3, a2 +; RV64-NEXT: sllw a1, a1, a2 +; RV64-NEXT: srliw a0, a0, 1 +; RV64-NEXT: srlw a0, a0, a3 +; RV64-NEXT: or a2, a1, a1 +; RV64-NEXT: or a0, a2, a0 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret - %shl = shl i48 %x, 8 - %trunc = trunc i48 %shl to i16 + %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y) + %shl = shl i32 %x, %y + %or = or i32 %fshl, %shl + %and = and i32 %or, %shl + %trunc = trunc i32 %and to i16 ret i16 %trunc + } ; FIXME: Could use srlw to remove slli+srli. -define i16 @test_lshr_i48_2(i48 %x, i48 %y) { -; RV32-LABEL: test_lshr_i48_2: +define i16 @test_lshr_i48(i48 %x, i48 %y) { +; RV32-LABEL: test_lshr_i48: ; RV32: # %bb.0: ; RV32-NEXT: andi a2, a2, 15 ; RV32-NEXT: srl a0, a0, a2 ; RV32-NEXT: ret ; -; RV64-LABEL: test_lshr_i48_2: +; RV64-LABEL: test_lshr_i48: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 ; RV64-NEXT: slli a0, a0, 32 @@ -69,14 +100,14 @@ define i16 @test_lshr_i48_2(i48 %x, i48 %y) { } ; FIXME: Could use sraw to remove the sext.w. -define i16 @test_ashr_i48_2(i48 %x, i48 %y) { -; RV32-LABEL: test_ashr_i48_2: +define i16 @test_ashr_i48(i48 %x, i48 %y) { +; RV32-LABEL: test_ashr_i48: ; RV32: # %bb.0: ; RV32-NEXT: andi a2, a2, 15 ; RV32-NEXT: sra a0, a0, a2 ; RV32-NEXT: ret ; -; RV64-LABEL: test_ashr_i48_2: +; RV64-LABEL: test_ashr_i48: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 ; RV64-NEXT: sext.w a0, a0 @@ -88,21 +119,21 @@ define i16 @test_ashr_i48_2(i48 %x, i48 %y) { ret i16 %trunc } -define i16 @test_shl_i48_2(i48 %x, i48 %y) { -; RV32-LABEL: test_shl_i48_2: +define i16 @test_shl_i32_2(i32 %x, i32 %y) { +; RV32-LABEL: test_shl_i32_2: ; RV32: # %bb.0: -; RV32-NEXT: andi a2, a2, 15 -; RV32-NEXT: sll a0, a0, a2 +; RV32-NEXT: andi a1, a1, 15 +; RV32-NEXT: sll a0, a0, a1 ; RV32-NEXT: ret ; -; RV64-LABEL: test_shl_i48_2: +; RV64-LABEL: test_shl_i32_2: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 ; RV64-NEXT: sll a0, a0, a1 ; RV64-NEXT: ret - %and = and i48 %y, 15 - %shl = shl i48 %x, %and - %trunc = trunc i48 %shl to i16 + %and = and i32 %y, 15 + %shl = shl i32 %x, %and + %trunc = trunc i32 %shl to i16 ret i16 %trunc } @@ -113,7 +144,9 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) { ; RV32-NEXT: sll a0, a0, a2 ; RV32-NEXT: srli a1, a1, 1 ; RV32-NEXT: srl a1, a1, a3 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: or a1, a0, a1 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: test_fshl_i32: @@ -122,12 +155,15 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) { ; RV64-NEXT: sllw a0, a0, a2 ; RV64-NEXT: srliw a1, a1, 1 ; RV64-NEXT: srlw a1, a1, a3 -; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: or a1, a0, a1 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y) %shl = shl i32 %x, %y %or = or i32 %fshl, %shl - %trunc = trunc i32 %or to i16 + %and = and i32 %or, %fshl + %trunc = trunc i32 %and to i16 ret i16 %trunc } @@ -139,6 +175,8 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) { ; RV32-NEXT: sll a0, a0, a3 ; RV32-NEXT: srl a1, a1, a2 ; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: or a1, a0, a1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_fshr_i32: @@ -148,10 +186,13 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) { ; RV64-NEXT: sllw a0, a0, a3 ; RV64-NEXT: srlw a1, a1, a2 ; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: or a1, a0, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ret %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y) %lshr = lshr i32 %x, %y %or = or i32 %fshr, %lshr - %trunc = trunc i32 %or to i16 + %and = and i32 %or, %fshr + %trunc = trunc i32 %and to i16 ret i16 %trunc } From ddf3bd130253dc5a3a4de50ef4b895bd030a3dc5 Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Sat, 3 May 2025 05:32:51 -0700 Subject: [PATCH 3/7] With combiner on (optimized code) --- llvm/include/llvm/Target/GlobalISel/Combine.td | 6 +++--- llvm/test/CodeGen/AArch64/funnel-shift.ll | 12 +++++------- llvm/test/CodeGen/RISCV/GlobalISel/shift.ll | 12 ++---------- 3 files changed, 10 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 2cbf0c794628a..7293a92889086 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1125,9 +1125,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift, funnel_shift_to_rotate, funnel_shift_right_zero, funnel_shift_left_zero, - funnel_shift_overshift]>; - //funnel_shift_or_shift_to_funnel_shift_left, - //funnel_shift_or_shift_to_funnel_shift_right]>; + funnel_shift_overshift, + funnel_shift_or_shift_to_funnel_shift_left, + funnel_shift_or_shift_to_funnel_shift_right]>; def bitfield_extract_from_sext_inreg : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll index e5aa360f804c1..254bbc934f10f 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -674,14 +674,12 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) { ; CHECK-GI-LABEL: or_shl_fshl_simplify: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #31 // =0x1f -; CHECK-GI-NEXT: and w9, w2, #0x1f -; CHECK-GI-NEXT: lsr w10, w0, #1 -; CHECK-GI-NEXT: lsl w11, w1, w2 +; CHECK-GI-NEXT: lsr w9, w0, #1 +; CHECK-GI-NEXT: and w10, w2, #0x1f ; CHECK-GI-NEXT: bic w8, w8, w2 -; CHECK-GI-NEXT: lsl w9, w1, w9 -; CHECK-GI-NEXT: lsr w8, w10, w8 -; CHECK-GI-NEXT: orr w9, w9, w11 -; CHECK-GI-NEXT: orr w0, w9, w8 +; CHECK-GI-NEXT: lsl w10, w1, w10 +; CHECK-GI-NEXT: lsr w8, w9, w8 +; CHECK-GI-NEXT: orr w0, w10, w8 ; CHECK-GI-NEXT: ret %shy = shl i32 %y, %s %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll index 36000e3496276..d617559bc90ab 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll @@ -144,9 +144,7 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) { ; RV32-NEXT: sll a0, a0, a2 ; RV32-NEXT: srli a1, a1, 1 ; RV32-NEXT: srl a1, a1, a3 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: test_fshl_i32: @@ -155,9 +153,7 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) { ; RV64-NEXT: sllw a0, a0, a2 ; RV64-NEXT: srliw a1, a1, 1 ; RV64-NEXT: srlw a1, a1, a3 -; RV64-NEXT: or a1, a0, a1 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: ret %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y) %shl = shl i32 %x, %y @@ -175,8 +171,6 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) { ; RV32-NEXT: sll a0, a0, a3 ; RV32-NEXT: srl a1, a1, a2 ; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_fshr_i32: @@ -186,8 +180,6 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) { ; RV64-NEXT: sllw a0, a0, a3 ; RV64-NEXT: srlw a1, a1, a2 ; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: or a1, a0, a1 -; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ret %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y) %lshr = lshr i32 %x, %y From 59ab79476019f04cd9c3181f7aee51e560537d4b Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Fri, 9 May 2025 23:06:07 -0700 Subject: [PATCH 4/7] Removed hasOneUse checks (note that the generated test code now lacks the redundant OR instruction!) --- llvm/include/llvm/Target/GlobalISel/Combine.td | 6 ++---- llvm/test/CodeGen/RISCV/GlobalISel/shift.ll | 12 ++++-------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 7293a92889086..ab5c5b6e8f903 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1038,8 +1038,7 @@ def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule< (defs root:$root), (match (G_FSHL $out1, $x, $z, $y), (G_SHL $out2, $x, $y), - (G_OR $root, $out1, $out2), - [{ return MRI.hasOneUse(${out2}.getReg()); }]), + (G_OR $root, $out1, $out2)), (apply (GIReplaceReg $root, $out1)) >; @@ -1048,8 +1047,7 @@ def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule< (defs root:$root), (match (G_FSHR $out1, $z, $x, $y), (G_LSHR $out2, $x, $y), - (G_OR $root, $out1, $out2), - [{ return MRI.hasOneUse(${out2}.getReg()); }]), + (G_OR $root, $out1, $out2)), (apply (GIReplaceReg $root, $out1)) >; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll index d617559bc90ab..49f57c4942312 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll @@ -9,8 +9,7 @@ define i16 @test_lshr_i32(i32 %_, i32 %x, i32 %y) { ; RV32-NEXT: not a2, a2 ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: sll a0, a0, a2 -; RV32-NEXT: or a2, a1, a1 -; RV32-NEXT: or a0, a0, a2 +; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; @@ -20,8 +19,7 @@ define i16 @test_lshr_i32(i32 %_, i32 %x, i32 %y) { ; RV64-NEXT: not a2, a2 ; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: sllw a0, a0, a2 -; RV64-NEXT: or a2, a1, a1 -; RV64-NEXT: or a0, a0, a2 +; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %lshr = lshr i32 %x, %y @@ -54,8 +52,7 @@ define i16 @test_shl_i32(i32 %_, i32 %x, i32 %y) { ; RV32-NEXT: sll a1, a1, a2 ; RV32-NEXT: srli a0, a0, 1 ; RV32-NEXT: srl a0, a0, a3 -; RV32-NEXT: or a2, a1, a1 -; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; @@ -65,8 +62,7 @@ define i16 @test_shl_i32(i32 %_, i32 %x, i32 %y) { ; RV64-NEXT: sllw a1, a1, a2 ; RV64-NEXT: srliw a0, a0, 1 ; RV64-NEXT: srlw a0, a0, a3 -; RV64-NEXT: or a2, a1, a1 -; RV64-NEXT: or a0, a2, a0 +; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y) From 3e6e0ac72792047acd39b8e8305457cdc30337a7 Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Sat, 10 May 2025 04:31:03 -0700 Subject: [PATCH 5/7] Ensured G_OR is commutative in combines (now triggers no matter the order of operands) --- llvm/include/llvm/Target/GlobalISel/Combine.td | 18 ++++++++++++------ llvm/test/CodeGen/AArch64/funnel-shift.ll | 12 +++++------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index ab5c5b6e8f903..9f46a22dca189 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1034,20 +1034,26 @@ def funnel_shift_overshift: GICombineRule< >; // Transform: fshl x, z, y | shl x, y -> fshl x, z, y +// Transform: shl x, y | fshl x, z, y -> fshl x, z, y +def funnel_shift_or_shift_to_funnel_shift_left_frags : GICombinePatFrag< + (outs root: $dst, $out1, $out2), (ins), + !foreach(inst, [(G_OR $dst, $out1, $out2), (G_OR $dst, $out2, $out1)], + (pattern (G_FSHL $out1, $x, $z, $y), (G_SHL $out2, $x, $y), inst))>; def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule< (defs root:$root), - (match (G_FSHL $out1, $x, $z, $y), - (G_SHL $out2, $x, $y), - (G_OR $root, $out1, $out2)), + (match (funnel_shift_or_shift_to_funnel_shift_left_frags $root, $out1, $out2)), (apply (GIReplaceReg $root, $out1)) >; // Transform: fshr z, x, y | srl x, y -> fshr z, x, y +// Transform: srl x, y | fshr z, x, y -> fshr z, x, y +def funnel_shift_or_shift_to_funnel_shift_right_frags : GICombinePatFrag< + (outs root: $dst, $out1, $out2), (ins), + !foreach(inst, [(G_OR $dst, $out1, $out2), (G_OR $dst, $out2, $out1)], + (pattern (G_FSHR $out1, $z, $x, $y), (G_LSHR $out2, $x, $y), inst))>; def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule< (defs root:$root), - (match (G_FSHR $out1, $z, $x, $y), - (G_LSHR $out2, $x, $y), - (G_OR $root, $out1, $out2)), + (match (funnel_shift_or_shift_to_funnel_shift_right_frags $root, $out1, $out2)), (apply (GIReplaceReg $root, $out1)) >; diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll index 254bbc934f10f..f9fd2ad1b5b6c 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -700,14 +700,12 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) { ; CHECK-GI-LABEL: or_lshr_fshr_simplify: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #31 // =0x1f -; CHECK-GI-NEXT: and w9, w2, #0x1f -; CHECK-GI-NEXT: lsl w10, w0, #1 -; CHECK-GI-NEXT: lsr w11, w1, w2 +; CHECK-GI-NEXT: lsl w9, w0, #1 +; CHECK-GI-NEXT: and w10, w2, #0x1f ; CHECK-GI-NEXT: bic w8, w8, w2 -; CHECK-GI-NEXT: lsr w9, w1, w9 -; CHECK-GI-NEXT: lsl w8, w10, w8 -; CHECK-GI-NEXT: orr w9, w11, w9 -; CHECK-GI-NEXT: orr w0, w9, w8 +; CHECK-GI-NEXT: lsl w8, w9, w8 +; CHECK-GI-NEXT: lsr w9, w1, w10 +; CHECK-GI-NEXT: orr w0, w8, w9 ; CHECK-GI-NEXT: ret %shy = lshr i32 %y, %s %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s) From 96b04fe067f53bdcb36cf83bbafff7da6bfb6649 Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Tue, 20 May 2025 01:19:39 -0700 Subject: [PATCH 6/7] Added FIXME comments for TableGen's lack of G_OR commutativity handling --- llvm/include/llvm/Target/GlobalISel/Combine.td | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 9f46a22dca189..c5ca474f8bcd6 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1035,6 +1035,8 @@ def funnel_shift_overshift: GICombineRule< // Transform: fshl x, z, y | shl x, y -> fshl x, z, y // Transform: shl x, y | fshl x, z, y -> fshl x, z, y +// FIXME: TableGen didn't handle G_OR commutativity on its own, +// necessitating the use of !foreach to handle it manually. def funnel_shift_or_shift_to_funnel_shift_left_frags : GICombinePatFrag< (outs root: $dst, $out1, $out2), (ins), !foreach(inst, [(G_OR $dst, $out1, $out2), (G_OR $dst, $out2, $out1)], @@ -1047,6 +1049,8 @@ def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule< // Transform: fshr z, x, y | srl x, y -> fshr z, x, y // Transform: srl x, y | fshr z, x, y -> fshr z, x, y +// FIXME: TableGen didn't handle G_OR commutativity on its own, +// necessitating the use of !foreach to handle it manually. def funnel_shift_or_shift_to_funnel_shift_right_frags : GICombinePatFrag< (outs root: $dst, $out1, $out2), (ins), !foreach(inst, [(G_OR $dst, $out1, $out2), (G_OR $dst, $out2, $out1)], From c929993e4a7430a062247c6097757b61b05d45be Mon Sep 17 00:00:00 2001 From: Axel Sorenson Date: Tue, 20 May 2025 01:54:33 -0700 Subject: [PATCH 7/7] Reversed changes to original shift.ll tests --- llvm/test/CodeGen/RISCV/GlobalISel/shift.ll | 93 ++++++++------------- 1 file changed, 33 insertions(+), 60 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll index 49f57c4942312..2180f4ab39a63 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll @@ -2,87 +2,60 @@ ; RUN: llc -mtriple=riscv32 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV32 ; RUN: llc -mtriple=riscv64 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV64 -define i16 @test_lshr_i32(i32 %_, i32 %x, i32 %y) { -; RV32-LABEL: test_lshr_i32: +define i16 @test_lshr_i48(i48 %x) { +; RV32-LABEL: test_lshr_i48: ; RV32: # %bb.0: -; RV32-NEXT: srl a1, a1, a2 -; RV32-NEXT: not a2, a2 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: sll a0, a0, a2 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: srli a0, a0, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: test_lshr_i32: +; RV64-LABEL: test_lshr_i48: ; RV64: # %bb.0: -; RV64-NEXT: srlw a1, a1, a2 -; RV64-NEXT: not a2, a2 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: sllw a0, a0, a2 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: srliw a0, a0, 16 ; RV64-NEXT: ret - %lshr = lshr i32 %x, %y - %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y) - %or = or i32 %fshr, %lshr - %and = and i32 %or, %lshr - %trunc = trunc i32 %and to i16 + %lshr = lshr i48 %x, 16 + %trunc = trunc i48 %lshr to i16 ret i16 %trunc } -define i16 @test_ashr_i32(i32 %x) { -; RV32-LABEL: test_ashr_i32: +define i16 @test_ashr_i48(i48 %x) { +; RV32-LABEL: test_ashr_i48: ; RV32: # %bb.0: ; RV32-NEXT: srai a0, a0, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: test_ashr_i32: +; RV64-LABEL: test_ashr_i48: ; RV64: # %bb.0: ; RV64-NEXT: sraiw a0, a0, 16 ; RV64-NEXT: ret - %ashr = ashr i32 %x, 16 - %trunc = trunc i32 %ashr to i16 + %ashr = ashr i48 %x, 16 + %trunc = trunc i48 %ashr to i16 ret i16 %trunc } -define i16 @test_shl_i32(i32 %_, i32 %x, i32 %y) { -; RV32-LABEL: test_shl_i32: +define i16 @test_shl_i48(i48 %x) { +; RV32-LABEL: test_shl_i48: ; RV32: # %bb.0: -; RV32-NEXT: not a3, a2 -; RV32-NEXT: sll a1, a1, a2 -; RV32-NEXT: srli a0, a0, 1 -; RV32-NEXT: srl a0, a0, a3 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: slli a0, a0, 8 ; RV32-NEXT: ret ; -; RV64-LABEL: test_shl_i32: +; RV64-LABEL: test_shl_i48: ; RV64: # %bb.0: -; RV64-NEXT: not a3, a2 -; RV64-NEXT: sllw a1, a1, a2 -; RV64-NEXT: srliw a0, a0, 1 -; RV64-NEXT: srlw a0, a0, a3 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: slli a0, a0, 8 ; RV64-NEXT: ret - %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y) - %shl = shl i32 %x, %y - %or = or i32 %fshl, %shl - %and = and i32 %or, %shl - %trunc = trunc i32 %and to i16 + %shl = shl i48 %x, 8 + %trunc = trunc i48 %shl to i16 ret i16 %trunc - } ; FIXME: Could use srlw to remove slli+srli. -define i16 @test_lshr_i48(i48 %x, i48 %y) { -; RV32-LABEL: test_lshr_i48: +define i16 @test_lshr_i48_2(i48 %x, i48 %y) { +; RV32-LABEL: test_lshr_i48_2: ; RV32: # %bb.0: ; RV32-NEXT: andi a2, a2, 15 ; RV32-NEXT: srl a0, a0, a2 ; RV32-NEXT: ret ; -; RV64-LABEL: test_lshr_i48: +; RV64-LABEL: test_lshr_i48_2: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 ; RV64-NEXT: slli a0, a0, 32 @@ -96,14 +69,14 @@ define i16 @test_lshr_i48(i48 %x, i48 %y) { } ; FIXME: Could use sraw to remove the sext.w. -define i16 @test_ashr_i48(i48 %x, i48 %y) { -; RV32-LABEL: test_ashr_i48: +define i16 @test_ashr_i48_2(i48 %x, i48 %y) { +; RV32-LABEL: test_ashr_i48_2: ; RV32: # %bb.0: ; RV32-NEXT: andi a2, a2, 15 ; RV32-NEXT: sra a0, a0, a2 ; RV32-NEXT: ret ; -; RV64-LABEL: test_ashr_i48: +; RV64-LABEL: test_ashr_i48_2: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 ; RV64-NEXT: sext.w a0, a0 @@ -115,21 +88,21 @@ define i16 @test_ashr_i48(i48 %x, i48 %y) { ret i16 %trunc } -define i16 @test_shl_i32_2(i32 %x, i32 %y) { -; RV32-LABEL: test_shl_i32_2: +define i16 @test_shl_i48_2(i48 %x, i48 %y) { +; RV32-LABEL: test_shl_i48_2: ; RV32: # %bb.0: -; RV32-NEXT: andi a1, a1, 15 -; RV32-NEXT: sll a0, a0, a1 +; RV32-NEXT: andi a2, a2, 15 +; RV32-NEXT: sll a0, a0, a2 ; RV32-NEXT: ret ; -; RV64-LABEL: test_shl_i32_2: +; RV64-LABEL: test_shl_i48_2: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 ; RV64-NEXT: sll a0, a0, a1 ; RV64-NEXT: ret - %and = and i32 %y, 15 - %shl = shl i32 %x, %and - %trunc = trunc i32 %shl to i16 + %and = and i48 %y, 15 + %shl = shl i48 %x, %and + %trunc = trunc i48 %shl to i16 ret i16 %trunc }