From 733135ea6efc0cb336a4f06bd8d07262a25590ef Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson@gmail.com>
Date: Thu, 27 Feb 2025 17:41:41 -0800
Subject: [PATCH 1/7] funnel shift combiner port from SelectionDAG ISel to
 GlobalISel

---
 .../include/llvm/Target/GlobalISel/Combine.td | 22 +++++++-
 llvm/test/CodeGen/AArch64/funnel-shift.ll     | 12 ++---
 llvm/test/CodeGen/RISCV/GlobalISel/shift.ll   | 50 +++++++++++++++++++
 3 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 5309d5952f087..9d0b1cab559be 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1033,6 +1033,24 @@ def funnel_shift_overshift: GICombineRule<
   (apply [{ Helper.applyFunnelShiftConstantModulo(*${root}); }])
 >;
 
+// Transform: fshl x, z, y | shl x, y -> fshl x, z, y
+def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule<
+  (defs root:$root), 
+  (match (G_FSHL $out1, $x, $z, $y),
+         (G_SHL $out2, $x, $y),
+         (G_OR $root, $out1, $out2)),
+  (apply (G_FSHL $root, $x, $z, $y))
+>;
+
+// Transform: fshr z, x, y | srl x, y -> fshr z, x, y
+def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule<
+  (defs root:$root), 
+  (match (G_FSHR $out1, $z, $x, $y),
+         (G_LSHR $out2, $x, $y),
+         (G_OR $root, $out1, $out2)),
+  (apply (G_FSHR $root, $z, $x, $y))
+>;
+
 def rotate_out_of_range : GICombineRule<
   (defs root:$root),
   (match (wip_match_opcode G_ROTR, G_ROTL):$root,
@@ -1105,7 +1123,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
                                             funnel_shift_to_rotate,
                                             funnel_shift_right_zero,
                                             funnel_shift_left_zero,
-                                            funnel_shift_overshift]>;
+                                            funnel_shift_overshift,
+                                            funnel_shift_or_shift_to_funnel_shift_left,
+                                            funnel_shift_or_shift_to_funnel_shift_right]>;
 
 def bitfield_extract_from_sext_inreg : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index e5aa360f804c1..254bbc934f10f 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -674,14 +674,12 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-GI-LABEL: or_shl_fshl_simplify:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
-; CHECK-GI-NEXT:    and w9, w2, #0x1f
-; CHECK-GI-NEXT:    lsr w10, w0, #1
-; CHECK-GI-NEXT:    lsl w11, w1, w2
+; CHECK-GI-NEXT:    lsr w9, w0, #1
+; CHECK-GI-NEXT:    and w10, w2, #0x1f
 ; CHECK-GI-NEXT:    bic w8, w8, w2
-; CHECK-GI-NEXT:    lsl w9, w1, w9
-; CHECK-GI-NEXT:    lsr w8, w10, w8
-; CHECK-GI-NEXT:    orr w9, w9, w11
-; CHECK-GI-NEXT:    orr w0, w9, w8
+; CHECK-GI-NEXT:    lsl w10, w1, w10
+; CHECK-GI-NEXT:    lsr w8, w9, w8
+; CHECK-GI-NEXT:    orr w0, w10, w8
 ; CHECK-GI-NEXT:    ret
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
index 75e318a58fd45..f564abf3d3184 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
@@ -105,3 +105,53 @@ define i16 @test_shl_i48_2(i48 %x, i48 %y) {
   %trunc = trunc i48 %shl to i16
   ret i16 %trunc
 }
+
+define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
+; RV32-LABEL: test_fshl_i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    not a3, a2
+; RV32-NEXT:    sll a0, a0, a2
+; RV32-NEXT:    srli a1, a1, 1
+; RV32-NEXT:    srl a1, a1, a3
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_fshl_i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    not a3, a2
+; RV64-NEXT:    sllw a0, a0, a2
+; RV64-NEXT:    srliw a1, a1, 1
+; RV64-NEXT:    srlw a1, a1, a3
+; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    ret
+  %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
+  %shl = shl i32 %x, %y
+  %or = or i32 %fshl, %shl
+  %trunc = trunc i32 %or to i16
+  ret i16 %trunc
+}
+
+define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
+; RV32-LABEL: test_fshr_i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    not a3, a2
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    sll a0, a0, a3
+; RV32-NEXT:    srl a1, a1, a2
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: test_fshr_i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    not a3, a2
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    sllw a0, a0, a3
+; RV64-NEXT:    srlw a1, a1, a2
+; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    ret
+  %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
+  %lshr = lshr i32 %x, %y
+  %or = or i32 %fshr, %lshr
+  %trunc = trunc i32 %or to i16
+  ret i16 %trunc
+}

From 2fc366c432a669d00017b591deff3be3cdd83f07 Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson@gmail.com>
Date: Sat, 3 May 2025 05:30:12 -0700
Subject: [PATCH 2/7] pre-commit test

---
 .../include/llvm/Target/GlobalISel/Combine.td |  16 +--
 llvm/test/CodeGen/AArch64/funnel-shift.ll     |  12 +-
 llvm/test/CodeGen/RISCV/GlobalISel/shift.ll   | 115 ++++++++++++------
 3 files changed, 94 insertions(+), 49 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9d0b1cab559be..2cbf0c794628a 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1038,8 +1038,9 @@ def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule<
   (defs root:$root), 
   (match (G_FSHL $out1, $x, $z, $y),
          (G_SHL $out2, $x, $y),
-         (G_OR $root, $out1, $out2)),
-  (apply (G_FSHL $root, $x, $z, $y))
+         (G_OR $root, $out1, $out2),
+         [{ return MRI.hasOneUse(${out2}.getReg()); }]),
+  (apply (GIReplaceReg $root, $out1))
 >;
 
 // Transform: fshr z, x, y | srl x, y -> fshr z, x, y
@@ -1047,8 +1048,9 @@ def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule<
   (defs root:$root), 
   (match (G_FSHR $out1, $z, $x, $y),
          (G_LSHR $out2, $x, $y),
-         (G_OR $root, $out1, $out2)),
-  (apply (G_FSHR $root, $z, $x, $y))
+         (G_OR $root, $out1, $out2),
+         [{ return MRI.hasOneUse(${out2}.getReg()); }]),
+  (apply (GIReplaceReg $root, $out1))
 >;
 
 def rotate_out_of_range : GICombineRule<
@@ -1123,9 +1125,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
                                             funnel_shift_to_rotate,
                                             funnel_shift_right_zero,
                                             funnel_shift_left_zero,
-                                            funnel_shift_overshift,
-                                            funnel_shift_or_shift_to_funnel_shift_left,
-                                            funnel_shift_or_shift_to_funnel_shift_right]>;
+                                            funnel_shift_overshift]>;
+                                            //funnel_shift_or_shift_to_funnel_shift_left,
+                                            //funnel_shift_or_shift_to_funnel_shift_right]>;
 
 def bitfield_extract_from_sext_inreg : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index 254bbc934f10f..e5aa360f804c1 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -674,12 +674,14 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-GI-LABEL: or_shl_fshl_simplify:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
-; CHECK-GI-NEXT:    lsr w9, w0, #1
-; CHECK-GI-NEXT:    and w10, w2, #0x1f
+; CHECK-GI-NEXT:    and w9, w2, #0x1f
+; CHECK-GI-NEXT:    lsr w10, w0, #1
+; CHECK-GI-NEXT:    lsl w11, w1, w2
 ; CHECK-GI-NEXT:    bic w8, w8, w2
-; CHECK-GI-NEXT:    lsl w10, w1, w10
-; CHECK-GI-NEXT:    lsr w8, w9, w8
-; CHECK-GI-NEXT:    orr w0, w10, w8
+; CHECK-GI-NEXT:    lsl w9, w1, w9
+; CHECK-GI-NEXT:    lsr w8, w10, w8
+; CHECK-GI-NEXT:    orr w9, w9, w11
+; CHECK-GI-NEXT:    orr w0, w9, w8
 ; CHECK-GI-NEXT:    ret
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
index f564abf3d3184..36000e3496276 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
@@ -2,60 +2,91 @@
 ; RUN: llc -mtriple=riscv32 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV32
 ; RUN: llc -mtriple=riscv64 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV64
 
-define i16 @test_lshr_i48(i48 %x) {
-; RV32-LABEL: test_lshr_i48:
+define i16 @test_lshr_i32(i32 %_, i32 %x, i32 %y) {
+; RV32-LABEL: test_lshr_i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    srli a0, a0, 16
+; RV32-NEXT:    srl a1, a1, a2
+; RV32-NEXT:    not a2, a2
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    sll a0, a0, a2
+; RV32-NEXT:    or a2, a1, a1
+; RV32-NEXT:    or a0, a0, a2
+; RV32-NEXT:    and a0, a0, a1
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_lshr_i48:
+; RV64-LABEL: test_lshr_i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    srliw a0, a0, 16
+; RV64-NEXT:    srlw a1, a1, a2
+; RV64-NEXT:    not a2, a2
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    sllw a0, a0, a2
+; RV64-NEXT:    or a2, a1, a1
+; RV64-NEXT:    or a0, a0, a2
+; RV64-NEXT:    and a0, a0, a1
 ; RV64-NEXT:    ret
-  %lshr = lshr i48 %x, 16
-  %trunc = trunc i48 %lshr to i16
+  %lshr = lshr i32 %x, %y
+  %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
+  %or = or i32 %fshr, %lshr
+  %and = and i32 %or, %lshr
+  %trunc = trunc i32 %and to i16
   ret i16 %trunc
 }
 
-define i16 @test_ashr_i48(i48 %x) {
-; RV32-LABEL: test_ashr_i48:
+define i16 @test_ashr_i32(i32 %x) {
+; RV32-LABEL: test_ashr_i32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    srai a0, a0, 16
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_ashr_i48:
+; RV64-LABEL: test_ashr_i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    sraiw a0, a0, 16
 ; RV64-NEXT:    ret
-  %ashr = ashr i48 %x, 16
-  %trunc = trunc i48 %ashr to i16
+  %ashr = ashr i32 %x, 16
+  %trunc = trunc i32 %ashr to i16
   ret i16 %trunc
 }
 
-define i16 @test_shl_i48(i48 %x) {
-; RV32-LABEL: test_shl_i48:
+define i16 @test_shl_i32(i32 %_, i32 %x, i32 %y) {
+; RV32-LABEL: test_shl_i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a0, a0, 8
+; RV32-NEXT:    not a3, a2
+; RV32-NEXT:    sll a1, a1, a2
+; RV32-NEXT:    srli a0, a0, 1
+; RV32-NEXT:    srl a0, a0, a3
+; RV32-NEXT:    or a2, a1, a1
+; RV32-NEXT:    or a0, a2, a0
+; RV32-NEXT:    and a0, a0, a1
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_shl_i48:
+; RV64-LABEL: test_shl_i32:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a0, a0, 8
+; RV64-NEXT:    not a3, a2
+; RV64-NEXT:    sllw a1, a1, a2
+; RV64-NEXT:    srliw a0, a0, 1
+; RV64-NEXT:    srlw a0, a0, a3
+; RV64-NEXT:    or a2, a1, a1
+; RV64-NEXT:    or a0, a2, a0
+; RV64-NEXT:    and a0, a0, a1
 ; RV64-NEXT:    ret
-  %shl = shl i48 %x, 8
-  %trunc = trunc i48 %shl to i16
+  %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
+  %shl = shl i32 %x, %y
+  %or = or i32 %fshl, %shl
+  %and = and i32 %or, %shl
+  %trunc = trunc i32 %and to i16
   ret i16 %trunc
+
 }
 
 ; FIXME: Could use srlw to remove slli+srli.
-define i16 @test_lshr_i48_2(i48 %x, i48 %y) {
-; RV32-LABEL: test_lshr_i48_2:
+define i16 @test_lshr_i48(i48 %x, i48 %y) {
+; RV32-LABEL: test_lshr_i48:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    andi a2, a2, 15
 ; RV32-NEXT:    srl a0, a0, a2
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_lshr_i48_2:
+; RV64-LABEL: test_lshr_i48:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    andi a1, a1, 15
 ; RV64-NEXT:    slli a0, a0, 32
@@ -69,14 +100,14 @@ define i16 @test_lshr_i48_2(i48 %x, i48 %y) {
 }
 
 ; FIXME: Could use sraw to remove the sext.w.
-define i16 @test_ashr_i48_2(i48 %x, i48 %y) {
-; RV32-LABEL: test_ashr_i48_2:
+define i16 @test_ashr_i48(i48 %x, i48 %y) {
+; RV32-LABEL: test_ashr_i48:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    andi a2, a2, 15
 ; RV32-NEXT:    sra a0, a0, a2
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_ashr_i48_2:
+; RV64-LABEL: test_ashr_i48:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    andi a1, a1, 15
 ; RV64-NEXT:    sext.w a0, a0
@@ -88,21 +119,21 @@ define i16 @test_ashr_i48_2(i48 %x, i48 %y) {
   ret i16 %trunc
 }
 
-define i16 @test_shl_i48_2(i48 %x, i48 %y) {
-; RV32-LABEL: test_shl_i48_2:
+define i16 @test_shl_i32_2(i32 %x, i32 %y) {
+; RV32-LABEL: test_shl_i32_2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    andi a2, a2, 15
-; RV32-NEXT:    sll a0, a0, a2
+; RV32-NEXT:    andi a1, a1, 15
+; RV32-NEXT:    sll a0, a0, a1
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_shl_i48_2:
+; RV64-LABEL: test_shl_i32_2:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    andi a1, a1, 15
 ; RV64-NEXT:    sll a0, a0, a1
 ; RV64-NEXT:    ret
-  %and = and i48 %y, 15
-  %shl = shl i48 %x, %and
-  %trunc = trunc i48 %shl to i16
+  %and = and i32 %y, 15
+  %shl = shl i32 %x, %and
+  %trunc = trunc i32 %shl to i16
   ret i16 %trunc
 }
 
@@ -113,7 +144,9 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
 ; RV32-NEXT:    sll a0, a0, a2
 ; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    srl a1, a1, a3
-; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    or a1, a0, a1
+; RV32-NEXT:    or a0, a1, a0
+; RV32-NEXT:    and a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: test_fshl_i32:
@@ -122,12 +155,15 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
 ; RV64-NEXT:    sllw a0, a0, a2
 ; RV64-NEXT:    srliw a1, a1, 1
 ; RV64-NEXT:    srlw a1, a1, a3
-; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    or a1, a0, a1
+; RV64-NEXT:    or a0, a1, a0
+; RV64-NEXT:    and a0, a0, a1
 ; RV64-NEXT:    ret
   %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
   %shl = shl i32 %x, %y
   %or = or i32 %fshl, %shl
-  %trunc = trunc i32 %or to i16
+  %and = and i32 %or, %fshl
+  %trunc = trunc i32 %and to i16
   ret i16 %trunc
 }
 
@@ -139,6 +175,8 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
 ; RV32-NEXT:    sll a0, a0, a3
 ; RV32-NEXT:    srl a1, a1, a2
 ; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    or a1, a0, a1
+; RV32-NEXT:    and a0, a1, a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: test_fshr_i32:
@@ -148,10 +186,13 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
 ; RV64-NEXT:    sllw a0, a0, a3
 ; RV64-NEXT:    srlw a1, a1, a2
 ; RV64-NEXT:    or a0, a0, a1
+; RV64-NEXT:    or a1, a0, a1
+; RV64-NEXT:    and a0, a1, a0
 ; RV64-NEXT:    ret
   %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
   %lshr = lshr i32 %x, %y
   %or = or i32 %fshr, %lshr
-  %trunc = trunc i32 %or to i16
+  %and = and i32 %or, %fshr
+  %trunc = trunc i32 %and to i16
   ret i16 %trunc
 }

From ddf3bd130253dc5a3a4de50ef4b895bd030a3dc5 Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson@gmail.com>
Date: Sat, 3 May 2025 05:32:51 -0700
Subject: [PATCH 3/7] With combiner on (optimized code)

---
 llvm/include/llvm/Target/GlobalISel/Combine.td |  6 +++---
 llvm/test/CodeGen/AArch64/funnel-shift.ll      | 12 +++++-------
 llvm/test/CodeGen/RISCV/GlobalISel/shift.ll    | 12 ++----------
 3 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 2cbf0c794628a..7293a92889086 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1125,9 +1125,9 @@ def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
                                             funnel_shift_to_rotate,
                                             funnel_shift_right_zero,
                                             funnel_shift_left_zero,
-                                            funnel_shift_overshift]>;
-                                            //funnel_shift_or_shift_to_funnel_shift_left,
-                                            //funnel_shift_or_shift_to_funnel_shift_right]>;
+                                            funnel_shift_overshift,
+                                            funnel_shift_or_shift_to_funnel_shift_left,
+                                            funnel_shift_or_shift_to_funnel_shift_right]>;
 
 def bitfield_extract_from_sext_inreg : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$info),
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index e5aa360f804c1..254bbc934f10f 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -674,14 +674,12 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-GI-LABEL: or_shl_fshl_simplify:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
-; CHECK-GI-NEXT:    and w9, w2, #0x1f
-; CHECK-GI-NEXT:    lsr w10, w0, #1
-; CHECK-GI-NEXT:    lsl w11, w1, w2
+; CHECK-GI-NEXT:    lsr w9, w0, #1
+; CHECK-GI-NEXT:    and w10, w2, #0x1f
 ; CHECK-GI-NEXT:    bic w8, w8, w2
-; CHECK-GI-NEXT:    lsl w9, w1, w9
-; CHECK-GI-NEXT:    lsr w8, w10, w8
-; CHECK-GI-NEXT:    orr w9, w9, w11
-; CHECK-GI-NEXT:    orr w0, w9, w8
+; CHECK-GI-NEXT:    lsl w10, w1, w10
+; CHECK-GI-NEXT:    lsr w8, w9, w8
+; CHECK-GI-NEXT:    orr w0, w10, w8
 ; CHECK-GI-NEXT:    ret
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
index 36000e3496276..d617559bc90ab 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
@@ -144,9 +144,7 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
 ; RV32-NEXT:    sll a0, a0, a2
 ; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    srl a1, a1, a3
-; RV32-NEXT:    or a1, a0, a1
-; RV32-NEXT:    or a0, a1, a0
-; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    or a0, a0, a1
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: test_fshl_i32:
@@ -155,9 +153,7 @@ define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
 ; RV64-NEXT:    sllw a0, a0, a2
 ; RV64-NEXT:    srliw a1, a1, 1
 ; RV64-NEXT:    srlw a1, a1, a3
-; RV64-NEXT:    or a1, a0, a1
-; RV64-NEXT:    or a0, a1, a0
-; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    or a0, a0, a1
 ; RV64-NEXT:    ret
   %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
   %shl = shl i32 %x, %y
@@ -175,8 +171,6 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
 ; RV32-NEXT:    sll a0, a0, a3
 ; RV32-NEXT:    srl a1, a1, a2
 ; RV32-NEXT:    or a0, a0, a1
-; RV32-NEXT:    or a1, a0, a1
-; RV32-NEXT:    and a0, a1, a0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: test_fshr_i32:
@@ -186,8 +180,6 @@ define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
 ; RV64-NEXT:    sllw a0, a0, a3
 ; RV64-NEXT:    srlw a1, a1, a2
 ; RV64-NEXT:    or a0, a0, a1
-; RV64-NEXT:    or a1, a0, a1
-; RV64-NEXT:    and a0, a1, a0
 ; RV64-NEXT:    ret
   %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
   %lshr = lshr i32 %x, %y

From 59ab79476019f04cd9c3181f7aee51e560537d4b Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson@gmail.com>
Date: Fri, 9 May 2025 23:06:07 -0700
Subject: [PATCH 4/7] Removed hasOneUse checks (note that the generated test
 code now lacks the redundant OR instruction!)

---
 llvm/include/llvm/Target/GlobalISel/Combine.td |  6 ++----
 llvm/test/CodeGen/RISCV/GlobalISel/shift.ll    | 12 ++++--------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 7293a92889086..ab5c5b6e8f903 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1038,8 +1038,7 @@ def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule<
   (defs root:$root), 
   (match (G_FSHL $out1, $x, $z, $y),
          (G_SHL $out2, $x, $y),
-         (G_OR $root, $out1, $out2),
-         [{ return MRI.hasOneUse(${out2}.getReg()); }]),
+         (G_OR $root, $out1, $out2)),
   (apply (GIReplaceReg $root, $out1))
 >;
 
@@ -1048,8 +1047,7 @@ def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule<
   (defs root:$root), 
   (match (G_FSHR $out1, $z, $x, $y),
          (G_LSHR $out2, $x, $y),
-         (G_OR $root, $out1, $out2),
-         [{ return MRI.hasOneUse(${out2}.getReg()); }]),
+         (G_OR $root, $out1, $out2)),
   (apply (GIReplaceReg $root, $out1))
 >;
 
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
index d617559bc90ab..49f57c4942312 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
@@ -9,8 +9,7 @@ define i16 @test_lshr_i32(i32 %_, i32 %x, i32 %y) {
 ; RV32-NEXT:    not a2, a2
 ; RV32-NEXT:    slli a0, a0, 1
 ; RV32-NEXT:    sll a0, a0, a2
-; RV32-NEXT:    or a2, a1, a1
-; RV32-NEXT:    or a0, a0, a2
+; RV32-NEXT:    or a0, a0, a1
 ; RV32-NEXT:    and a0, a0, a1
 ; RV32-NEXT:    ret
 ;
@@ -20,8 +19,7 @@ define i16 @test_lshr_i32(i32 %_, i32 %x, i32 %y) {
 ; RV64-NEXT:    not a2, a2
 ; RV64-NEXT:    slli a0, a0, 1
 ; RV64-NEXT:    sllw a0, a0, a2
-; RV64-NEXT:    or a2, a1, a1
-; RV64-NEXT:    or a0, a0, a2
+; RV64-NEXT:    or a0, a0, a1
 ; RV64-NEXT:    and a0, a0, a1
 ; RV64-NEXT:    ret
   %lshr = lshr i32 %x, %y
@@ -54,8 +52,7 @@ define i16 @test_shl_i32(i32 %_, i32 %x, i32 %y) {
 ; RV32-NEXT:    sll a1, a1, a2
 ; RV32-NEXT:    srli a0, a0, 1
 ; RV32-NEXT:    srl a0, a0, a3
-; RV32-NEXT:    or a2, a1, a1
-; RV32-NEXT:    or a0, a2, a0
+; RV32-NEXT:    or a0, a1, a0
 ; RV32-NEXT:    and a0, a0, a1
 ; RV32-NEXT:    ret
 ;
@@ -65,8 +62,7 @@ define i16 @test_shl_i32(i32 %_, i32 %x, i32 %y) {
 ; RV64-NEXT:    sllw a1, a1, a2
 ; RV64-NEXT:    srliw a0, a0, 1
 ; RV64-NEXT:    srlw a0, a0, a3
-; RV64-NEXT:    or a2, a1, a1
-; RV64-NEXT:    or a0, a2, a0
+; RV64-NEXT:    or a0, a1, a0
 ; RV64-NEXT:    and a0, a0, a1
 ; RV64-NEXT:    ret
   %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)

From 3e6e0ac72792047acd39b8e8305457cdc30337a7 Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson@gmail.com>
Date: Sat, 10 May 2025 04:31:03 -0700
Subject: [PATCH 5/7] Ensured G_OR is commutative in combines (now triggers no
 matter the order of operands)

---
 llvm/include/llvm/Target/GlobalISel/Combine.td | 18 ++++++++++++------
 llvm/test/CodeGen/AArch64/funnel-shift.ll      | 12 +++++-------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index ab5c5b6e8f903..9f46a22dca189 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1034,20 +1034,26 @@ def funnel_shift_overshift: GICombineRule<
 >;
 
 // Transform: fshl x, z, y | shl x, y -> fshl x, z, y
+// Transform: shl x, y | fshl x, z, y  -> fshl x, z, y
+def funnel_shift_or_shift_to_funnel_shift_left_frags : GICombinePatFrag<
+  (outs root: $dst, $out1, $out2), (ins),
+  !foreach(inst, [(G_OR $dst, $out1, $out2), (G_OR $dst, $out2, $out1)], 
+           (pattern (G_FSHL $out1, $x, $z, $y), (G_SHL $out2, $x, $y), inst))>;
 def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule<
   (defs root:$root), 
-  (match (G_FSHL $out1, $x, $z, $y),
-         (G_SHL $out2, $x, $y),
-         (G_OR $root, $out1, $out2)),
+  (match (funnel_shift_or_shift_to_funnel_shift_left_frags $root, $out1, $out2)),
   (apply (GIReplaceReg $root, $out1))
 >;
 
 // Transform: fshr z, x, y | srl x, y -> fshr z, x, y
+// Transform: srl x, y | fshr z, x, y -> fshr z, x, y
+def funnel_shift_or_shift_to_funnel_shift_right_frags : GICombinePatFrag<
+  (outs root: $dst, $out1, $out2), (ins),
+  !foreach(inst, [(G_OR $dst, $out1, $out2), (G_OR $dst, $out2, $out1)], 
+           (pattern (G_FSHR $out1, $z, $x, $y), (G_LSHR $out2, $x, $y), inst))>;
 def funnel_shift_or_shift_to_funnel_shift_right: GICombineRule<
   (defs root:$root), 
-  (match (G_FSHR $out1, $z, $x, $y),
-         (G_LSHR $out2, $x, $y),
-         (G_OR $root, $out1, $out2)),
+  (match (funnel_shift_or_shift_to_funnel_shift_right_frags $root, $out1, $out2)), 
   (apply (GIReplaceReg $root, $out1))
 >;
 
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index 254bbc934f10f..f9fd2ad1b5b6c 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -700,14 +700,12 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-GI-LABEL: or_lshr_fshr_simplify:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
-; CHECK-GI-NEXT:    and w9, w2, #0x1f
-; CHECK-GI-NEXT:    lsl w10, w0, #1
-; CHECK-GI-NEXT:    lsr w11, w1, w2
+; CHECK-GI-NEXT:    lsl w9, w0, #1
+; CHECK-GI-NEXT:    and w10, w2, #0x1f
 ; CHECK-GI-NEXT:    bic w8, w8, w2
-; CHECK-GI-NEXT:    lsr w9, w1, w9
-; CHECK-GI-NEXT:    lsl w8, w10, w8
-; CHECK-GI-NEXT:    orr w9, w11, w9
-; CHECK-GI-NEXT:    orr w0, w9, w8
+; CHECK-GI-NEXT:    lsl w8, w9, w8
+; CHECK-GI-NEXT:    lsr w9, w1, w10
+; CHECK-GI-NEXT:    orr w0, w8, w9
 ; CHECK-GI-NEXT:    ret
   %shy = lshr i32 %y, %s
   %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)

From 96b04fe067f53bdcb36cf83bbafff7da6bfb6649 Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson@gmail.com>
Date: Tue, 20 May 2025 01:19:39 -0700
Subject: [PATCH 6/7] Added FIXME comments for TableGen's lack of G_OR
 commutativity handling

---
 llvm/include/llvm/Target/GlobalISel/Combine.td | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9f46a22dca189..c5ca474f8bcd6 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1035,6 +1035,8 @@ def funnel_shift_overshift: GICombineRule<
 
 // Transform: fshl x, z, y | shl x, y -> fshl x, z, y
 // Transform: shl x, y | fshl x, z, y  -> fshl x, z, y
+// FIXME: TableGen didn't handle G_OR commutativity on its own, 
+//        necessitating the use of !foreach to handle it manually.
 def funnel_shift_or_shift_to_funnel_shift_left_frags : GICombinePatFrag<
   (outs root: $dst, $out1, $out2), (ins),
   !foreach(inst, [(G_OR $dst, $out1, $out2), (G_OR $dst, $out2, $out1)], 
@@ -1047,6 +1049,8 @@ def funnel_shift_or_shift_to_funnel_shift_left: GICombineRule<
 
 // Transform: fshr z, x, y | srl x, y -> fshr z, x, y
 // Transform: srl x, y | fshr z, x, y -> fshr z, x, y
+// FIXME: TableGen didn't handle G_OR commutativity on its own, 
+//        necessitating the use of !foreach to handle it manually.
 def funnel_shift_or_shift_to_funnel_shift_right_frags : GICombinePatFrag<
   (outs root: $dst, $out1, $out2), (ins),
   !foreach(inst, [(G_OR $dst, $out1, $out2), (G_OR $dst, $out2, $out1)], 

From c929993e4a7430a062247c6097757b61b05d45be Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson@gmail.com>
Date: Tue, 20 May 2025 01:54:33 -0700
Subject: [PATCH 7/7] Reversed changes to original shift.ll tests

---
 llvm/test/CodeGen/RISCV/GlobalISel/shift.ll | 93 ++++++++-------------
 1 file changed, 33 insertions(+), 60 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
index 49f57c4942312..2180f4ab39a63 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll
@@ -2,87 +2,60 @@
 ; RUN: llc -mtriple=riscv32 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV32
 ; RUN: llc -mtriple=riscv64 -global-isel -global-isel-abort=1 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=RV64
 
-define i16 @test_lshr_i32(i32 %_, i32 %x, i32 %y) {
-; RV32-LABEL: test_lshr_i32:
+define i16 @test_lshr_i48(i48 %x) {
+; RV32-LABEL: test_lshr_i48:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    srl a1, a1, a2
-; RV32-NEXT:    not a2, a2
-; RV32-NEXT:    slli a0, a0, 1
-; RV32-NEXT:    sll a0, a0, a2
-; RV32-NEXT:    or a0, a0, a1
-; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    srli a0, a0, 16
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_lshr_i32:
+; RV64-LABEL: test_lshr_i48:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    srlw a1, a1, a2
-; RV64-NEXT:    not a2, a2
-; RV64-NEXT:    slli a0, a0, 1
-; RV64-NEXT:    sllw a0, a0, a2
-; RV64-NEXT:    or a0, a0, a1
-; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    srliw a0, a0, 16
 ; RV64-NEXT:    ret
-  %lshr = lshr i32 %x, %y
-  %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
-  %or = or i32 %fshr, %lshr
-  %and = and i32 %or, %lshr
-  %trunc = trunc i32 %and to i16
+  %lshr = lshr i48 %x, 16
+  %trunc = trunc i48 %lshr to i16
   ret i16 %trunc
 }
 
-define i16 @test_ashr_i32(i32 %x) {
-; RV32-LABEL: test_ashr_i32:
+define i16 @test_ashr_i48(i48 %x) {
+; RV32-LABEL: test_ashr_i48:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    srai a0, a0, 16
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_ashr_i32:
+; RV64-LABEL: test_ashr_i48:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    sraiw a0, a0, 16
 ; RV64-NEXT:    ret
-  %ashr = ashr i32 %x, 16
-  %trunc = trunc i32 %ashr to i16
+  %ashr = ashr i48 %x, 16
+  %trunc = trunc i48 %ashr to i16
   ret i16 %trunc
 }
 
-define i16 @test_shl_i32(i32 %_, i32 %x, i32 %y) {
-; RV32-LABEL: test_shl_i32:
+define i16 @test_shl_i48(i48 %x) {
+; RV32-LABEL: test_shl_i48:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    not a3, a2
-; RV32-NEXT:    sll a1, a1, a2
-; RV32-NEXT:    srli a0, a0, 1
-; RV32-NEXT:    srl a0, a0, a3
-; RV32-NEXT:    or a0, a1, a0
-; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 8
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_shl_i32:
+; RV64-LABEL: test_shl_i48:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    not a3, a2
-; RV64-NEXT:    sllw a1, a1, a2
-; RV64-NEXT:    srliw a0, a0, 1
-; RV64-NEXT:    srlw a0, a0, a3
-; RV64-NEXT:    or a0, a1, a0
-; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    slli a0, a0, 8
 ; RV64-NEXT:    ret
-  %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
-  %shl = shl i32 %x, %y
-  %or = or i32 %fshl, %shl
-  %and = and i32 %or, %shl
-  %trunc = trunc i32 %and to i16
+  %shl = shl i48 %x, 8
+  %trunc = trunc i48 %shl to i16
   ret i16 %trunc
-
 }
 
 ; FIXME: Could use srlw to remove slli+srli.
-define i16 @test_lshr_i48(i48 %x, i48 %y) {
-; RV32-LABEL: test_lshr_i48:
+define i16 @test_lshr_i48_2(i48 %x, i48 %y) {
+; RV32-LABEL: test_lshr_i48_2:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    andi a2, a2, 15
 ; RV32-NEXT:    srl a0, a0, a2
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_lshr_i48:
+; RV64-LABEL: test_lshr_i48_2:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    andi a1, a1, 15
 ; RV64-NEXT:    slli a0, a0, 32
@@ -96,14 +69,14 @@ define i16 @test_lshr_i48(i48 %x, i48 %y) {
 }
 
 ; FIXME: Could use sraw to remove the sext.w.
-define i16 @test_ashr_i48(i48 %x, i48 %y) {
-; RV32-LABEL: test_ashr_i48:
+define i16 @test_ashr_i48_2(i48 %x, i48 %y) {
+; RV32-LABEL: test_ashr_i48_2:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    andi a2, a2, 15
 ; RV32-NEXT:    sra a0, a0, a2
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_ashr_i48:
+; RV64-LABEL: test_ashr_i48_2:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    andi a1, a1, 15
 ; RV64-NEXT:    sext.w a0, a0
@@ -115,21 +88,21 @@ define i16 @test_ashr_i48(i48 %x, i48 %y) {
   ret i16 %trunc
 }
 
-define i16 @test_shl_i32_2(i32 %x, i32 %y) {
-; RV32-LABEL: test_shl_i32_2:
+define i16 @test_shl_i48_2(i48 %x, i48 %y) {
+; RV32-LABEL: test_shl_i48_2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    andi a1, a1, 15
-; RV32-NEXT:    sll a0, a0, a1
+; RV32-NEXT:    andi a2, a2, 15
+; RV32-NEXT:    sll a0, a0, a2
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: test_shl_i32_2:
+; RV64-LABEL: test_shl_i48_2:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    andi a1, a1, 15
 ; RV64-NEXT:    sll a0, a0, a1
 ; RV64-NEXT:    ret
-  %and = and i32 %y, 15
-  %shl = shl i32 %x, %and
-  %trunc = trunc i32 %shl to i16
+  %and = and i48 %y, 15
+  %shl = shl i48 %x, %and
+  %trunc = trunc i48 %shl to i16
   ret i16 %trunc
 }