[llvm-branch-commits] [llvm] [AArch64][GlobalISel] Added support for neon right shifts (PR #170832)
https://github.com/JoshdRod updated
https://github.com/llvm/llvm-project/pull/170832
>From 7e897eac1eee87148b1f3529a42e4b927b556d44 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez
Date: Thu, 27 Nov 2025 15:34:40 +
Subject: [PATCH 1/9] [AArch64][GlobalISel] Removed fallback for sqshlu
intrinsic
Added G_SQSHLU node, which lowers the llvm ir intrinsic aarch64_neon_sqshlu to
the machine intrinsic sqshlu. Generated code is slightly less efficient compare
to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 8 +++
.../AArch64/GISel/AArch64LegalizerInfo.cpp| 12 +
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 9
llvm/test/CodeGen/AArch64/arm64-vshift.ll | 49 ++-
4 files changed, 56 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7d99786830e3d..7469a081d9787 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -252,6 +252,12 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_SQSHLU : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -300,6 +306,8 @@ def : GINodeEquiv;
def : GINodeEquiv;
def : GINodeEquiv;
+def : GINodeEquiv;
+
def : GINodeEquiv;
def : GINodeEquiv;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1025b2502211a..0010834e01894 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1857,6 +1857,18 @@ bool
AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
case Intrinsic::aarch64_neon_srhadd:
return LowerBinOp(TargetOpcode::G_SAVGCEIL);
+ case Intrinsic::aarch64_neon_sqshlu: {
+// Check if last operand is constant vector dup
+auto shiftAmount =
isConstantOrConstantSplatVector(*MRI.getVRegDef(MI.getOperand(3).getReg()),
MRI);
+if (shiftAmount) {
+ // If so, create a new intrinsic with the correct shift amount
+ MIB.buildInstr(AArch64::G_SQSHLU, {MI.getOperand(0)},
{MI.getOperand(2)}).addImm(shiftAmount->getSExtValue());
+ MI.eraseFromParent();
+ return true;
+} else {
+ return false;
+}
+ }
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)},
{MI.getOperand(2)});
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 652a31f4e65f2..aa1517533b753 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -1072,6 +1072,15 @@ AArch64RegisterBankInfo::getInstrMapping(const
MachineInstr &MI) const {
// Index needs to be a GPR.
OpRegBankIdx[2] = PMI_FirstGPR;
break;
+ case AArch64::G_SQSHLU:
+// Destination and source need to be FPRs.
+OpRegBankIdx[0] = PMI_FirstFPR;
+OpRegBankIdx[1] = PMI_FirstFPR;
+
+// Shift Index needs to be a GPR.
+OpRegBankIdx[2] = PMI_FirstGPR;
+break;
+
case TargetOpcode::G_INSERT_VECTOR_ELT:
OpRegBankIdx[0] = PMI_FirstFPR;
OpRegBankIdx[1] = PMI_FirstFPR;
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 34843835d284a..961788f311041 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -2,17 +2,7 @@
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s
--check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 |
FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI:warning: Instruction selection used fallback path for sqshlu8b
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu4h
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu2s
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu16b
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu8h
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu4s
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu2d
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu1d_constant
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu_i64_constant
-; CHECK-GI NEXT:warning: Instruction selection used fallback
[llvm-branch-commits] [llvm] [AArch64][GlobalISel] Added support for neon right shifts (PR #170832)
https://github.com/JoshdRod created
https://github.com/llvm/llvm-project/pull/170832
Many neon right shift intrinsics were not supported by GlobalISel, mainly due
to a lack of legalisation logic. This logic has now been implemented.
>From 7e897eac1eee87148b1f3529a42e4b927b556d44 Mon Sep 17 00:00:00 2001
From: Josh Rodriguez
Date: Thu, 27 Nov 2025 15:34:40 +
Subject: [PATCH 1/8] [AArch64][GlobalISel] Removed fallback for sqshlu
intrinsic
Added G_SQSHLU node, which lowers the llvm ir intrinsic aarch64_neon_sqshlu to
the machine intrinsic sqshlu. Generated code is slightly less efficient compare
to SDAG.
---
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 8 +++
.../AArch64/GISel/AArch64LegalizerInfo.cpp| 12 +
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 9
llvm/test/CodeGen/AArch64/arm64-vshift.ll | 49 ++-
4 files changed, 56 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7d99786830e3d..7469a081d9787 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -252,6 +252,12 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_SQSHLU : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -300,6 +306,8 @@ def : GINodeEquiv;
def : GINodeEquiv;
def : GINodeEquiv;
+def : GINodeEquiv;
+
def : GINodeEquiv;
def : GINodeEquiv;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1025b2502211a..0010834e01894 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1857,6 +1857,18 @@ bool
AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
case Intrinsic::aarch64_neon_srhadd:
return LowerBinOp(TargetOpcode::G_SAVGCEIL);
+ case Intrinsic::aarch64_neon_sqshlu: {
+// Check if last operand is constant vector dup
+auto shiftAmount =
isConstantOrConstantSplatVector(*MRI.getVRegDef(MI.getOperand(3).getReg()),
MRI);
+if (shiftAmount) {
+ // If so, create a new intrinsic with the correct shift amount
+ MIB.buildInstr(AArch64::G_SQSHLU, {MI.getOperand(0)},
{MI.getOperand(2)}).addImm(shiftAmount->getSExtValue());
+ MI.eraseFromParent();
+ return true;
+} else {
+ return false;
+}
+ }
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)},
{MI.getOperand(2)});
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 652a31f4e65f2..aa1517533b753 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -1072,6 +1072,15 @@ AArch64RegisterBankInfo::getInstrMapping(const
MachineInstr &MI) const {
// Index needs to be a GPR.
OpRegBankIdx[2] = PMI_FirstGPR;
break;
+ case AArch64::G_SQSHLU:
+// Destination and source need to be FPRs.
+OpRegBankIdx[0] = PMI_FirstFPR;
+OpRegBankIdx[1] = PMI_FirstFPR;
+
+// Shift Index needs to be a GPR.
+OpRegBankIdx[2] = PMI_FirstGPR;
+break;
+
case TargetOpcode::G_INSERT_VECTOR_ELT:
OpRegBankIdx[0] = PMI_FirstFPR;
OpRegBankIdx[1] = PMI_FirstFPR;
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 34843835d284a..961788f311041 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -2,17 +2,7 @@
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s
--check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 |
FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI:warning: Instruction selection used fallback path for sqshlu8b
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu4h
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu2s
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu16b
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu8h
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu4s
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu2d
-; CHECK-GI NEXT:warning: Instruction selection used fallback path for
sqshlu1d_constant
-; CHECK-GI
