[llvm-branch-commits] [llvm] [AArch64][GlobalISel] Added support for neon right shifts (PR #170832)

2025-12-05 Thread Joshua Rodriguez via llvm-branch-commits

https://github.com/JoshdRod updated 
https://github.com/llvm/llvm-project/pull/170832

>From 7e897eac1eee87148b1f3529a42e4b927b556d44 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez 
Date: Thu, 27 Nov 2025 15:34:40 +
Subject: [PATCH 1/9] [AArch64][GlobalISel] Removed fallback for sqshlu
 intrinsic

Added G_SQSHLU node, which lowers the llvm ir intrinsic aarch64_neon_sqshlu to 
the machine intrinsic sqshlu. Generated code is slightly less efficient compare 
to SDAG.
---
 llvm/lib/Target/AArch64/AArch64InstrGISel.td  |  8 +++
 .../AArch64/GISel/AArch64LegalizerInfo.cpp| 12 +
 .../AArch64/GISel/AArch64RegisterBankInfo.cpp |  9 
 llvm/test/CodeGen/AArch64/arm64-vshift.ll | 49 ++-
 4 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td 
b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7d99786830e3d..7469a081d9787 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -252,6 +252,12 @@ def G_USDOT : AArch64GenericInstruction {
   let hasSideEffects = 0;
 }
 
+def G_SQSHLU : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+}
+
 // Generic instruction for the BSP pseudo. It is expanded into BSP, which
 // expands into BSL/BIT/BIF after register allocation.
 def G_BSP : AArch64GenericInstruction {
@@ -300,6 +306,8 @@ def : GINodeEquiv;
 def : GINodeEquiv;
 def : GINodeEquiv;
 
+def : GINodeEquiv;
+
 def : GINodeEquiv;
 
 def : GINodeEquiv;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp 
b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1025b2502211a..0010834e01894 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1857,6 +1857,18 @@ bool 
AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
   case Intrinsic::aarch64_neon_srhadd:
 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
+  case Intrinsic::aarch64_neon_sqshlu: {
+// Check if last operand is constant vector dup
+auto shiftAmount = 
isConstantOrConstantSplatVector(*MRI.getVRegDef(MI.getOperand(3).getReg()), 
MRI);
+if (shiftAmount) {
+   // If so, create a new intrinsic with the correct shift amount
+   MIB.buildInstr(AArch64::G_SQSHLU, {MI.getOperand(0)}, 
{MI.getOperand(2)}).addImm(shiftAmount->getSExtValue());
+   MI.eraseFromParent();
+   return true;
+} else {
+   return false;
+}
+  }
   case Intrinsic::aarch64_neon_abs: {
 // Lower the intrinsic to G_ABS.
 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, 
{MI.getOperand(2)});
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp 
b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 652a31f4e65f2..aa1517533b753 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -1072,6 +1072,15 @@ AArch64RegisterBankInfo::getInstrMapping(const 
MachineInstr &MI) const {
 // Index needs to be a GPR.
 OpRegBankIdx[2] = PMI_FirstGPR;
 break;
+  case AArch64::G_SQSHLU:
+// Destination and source need to be FPRs.
+OpRegBankIdx[0] = PMI_FirstFPR;
+OpRegBankIdx[1] = PMI_FirstFPR;
+
+// Shift Index needs to be a GPR.
+OpRegBankIdx[2] = PMI_FirstGPR;
+break;
+
   case TargetOpcode::G_INSERT_VECTOR_ELT:
 OpRegBankIdx[0] = PMI_FirstFPR;
 OpRegBankIdx[1] = PMI_FirstFPR;
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll 
b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 34843835d284a..961788f311041 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -2,17 +2,7 @@
 ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s 
--check-prefixes=CHECK,CHECK-SD
 ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | 
FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
-; CHECK-GI:warning: Instruction selection used fallback path for sqshlu8b
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu4h
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu2s
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu16b
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu8h
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu4s
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu2d
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu1d_constant
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu_i64_constant
-; CHECK-GI NEXT:warning: Instruction selection used fallback 

[llvm-branch-commits] [llvm] [AArch64][GlobalISel] Added support for neon right shifts (PR #170832)

2025-12-05 Thread Joshua Rodriguez via llvm-branch-commits

https://github.com/JoshdRod created 
https://github.com/llvm/llvm-project/pull/170832

Many neon right shift intrinsics were not supported by GlobalISel, mainly due 
to a lack of legalisation logic. This logic has now been implemented.

>From 7e897eac1eee87148b1f3529a42e4b927b556d44 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez 
Date: Thu, 27 Nov 2025 15:34:40 +
Subject: [PATCH 1/8] [AArch64][GlobalISel] Removed fallback for sqshlu
 intrinsic

Added G_SQSHLU node, which lowers the llvm ir intrinsic aarch64_neon_sqshlu to 
the machine intrinsic sqshlu. Generated code is slightly less efficient compare 
to SDAG.
---
 llvm/lib/Target/AArch64/AArch64InstrGISel.td  |  8 +++
 .../AArch64/GISel/AArch64LegalizerInfo.cpp| 12 +
 .../AArch64/GISel/AArch64RegisterBankInfo.cpp |  9 
 llvm/test/CodeGen/AArch64/arm64-vshift.ll | 49 ++-
 4 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td 
b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7d99786830e3d..7469a081d9787 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -252,6 +252,12 @@ def G_USDOT : AArch64GenericInstruction {
   let hasSideEffects = 0;
 }
 
+def G_SQSHLU : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+}
+
 // Generic instruction for the BSP pseudo. It is expanded into BSP, which
 // expands into BSL/BIT/BIF after register allocation.
 def G_BSP : AArch64GenericInstruction {
@@ -300,6 +306,8 @@ def : GINodeEquiv;
 def : GINodeEquiv;
 def : GINodeEquiv;
 
+def : GINodeEquiv;
+
 def : GINodeEquiv;
 
 def : GINodeEquiv;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp 
b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1025b2502211a..0010834e01894 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1857,6 +1857,18 @@ bool 
AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
   case Intrinsic::aarch64_neon_srhadd:
 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
+  case Intrinsic::aarch64_neon_sqshlu: {
+// Check if last operand is constant vector dup
+auto shiftAmount = 
isConstantOrConstantSplatVector(*MRI.getVRegDef(MI.getOperand(3).getReg()), 
MRI);
+if (shiftAmount) {
+   // If so, create a new intrinsic with the correct shift amount
+   MIB.buildInstr(AArch64::G_SQSHLU, {MI.getOperand(0)}, 
{MI.getOperand(2)}).addImm(shiftAmount->getSExtValue());
+   MI.eraseFromParent();
+   return true;
+} else {
+   return false;
+}
+  }
   case Intrinsic::aarch64_neon_abs: {
 // Lower the intrinsic to G_ABS.
 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, 
{MI.getOperand(2)});
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp 
b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 652a31f4e65f2..aa1517533b753 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -1072,6 +1072,15 @@ AArch64RegisterBankInfo::getInstrMapping(const 
MachineInstr &MI) const {
 // Index needs to be a GPR.
 OpRegBankIdx[2] = PMI_FirstGPR;
 break;
+  case AArch64::G_SQSHLU:
+// Destination and source need to be FPRs.
+OpRegBankIdx[0] = PMI_FirstFPR;
+OpRegBankIdx[1] = PMI_FirstFPR;
+
+// Shift Index needs to be a GPR.
+OpRegBankIdx[2] = PMI_FirstGPR;
+break;
+
   case TargetOpcode::G_INSERT_VECTOR_ELT:
 OpRegBankIdx[0] = PMI_FirstFPR;
 OpRegBankIdx[1] = PMI_FirstFPR;
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll 
b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 34843835d284a..961788f311041 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -2,17 +2,7 @@
 ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s 
--check-prefixes=CHECK,CHECK-SD
 ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | 
FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
-; CHECK-GI:warning: Instruction selection used fallback path for sqshlu8b
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu4h
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu2s
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu16b
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu8h
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu4s
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu2d
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for 
sqshlu1d_constant
-; CHECK-GI