https://github.com/zhaoqi5 updated 
https://github.com/llvm/llvm-project/pull/160429

>From 47274d059b280f7721486e4ea6e656adbfe4357f Mon Sep 17 00:00:00 2001
From: Qi Zhao <[email protected]>
Date: Wed, 24 Sep 2025 09:09:25 +0800
Subject: [PATCH] [LoongArch] Custom legalize vector_shuffle to xvpermi.d when
 possible

---
 .../LoongArch/LoongArchISelLowering.cpp       | 23 +++++++++
 .../lasx/ir-instruction/fix-xvshuf.ll         |  9 ++--
 .../lasx/shuffle-as-permute-and-shuffle.ll    | 48 ++++---------------
 .../LoongArch/lasx/vec-shuffle-byte-rotate.ll |  4 +-
 4 files changed, 37 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 9662fec5ef4b9..7e313f0b90d34 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2088,6 +2088,26 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, 
ArrayRef<int> Mask, MVT VT,
   return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
 }
 
+/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
+static SDValue
+lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+                            SDValue V1, SDValue V2, SelectionDAG &DAG,
+                            const LoongArchSubtarget &Subtarget) {
+  // Only consider XVPERMI_D.
+  if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
+    return SDValue();
+
+  unsigned MaskImm = 0;
+  for (unsigned i = 0; i < Mask.size(); ++i) {
+    if (Mask[i] == -1)
+      continue;
+    MaskImm |= Mask[i] << (i * 2);
+  }
+
+  return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
+                     DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
+}
+
 /// Lower VECTOR_SHUFFLE into XVPERM (if possible).
 static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
                                           MVT VT, SDValue V1, SDValue V2,
@@ -2534,6 +2554,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, 
ArrayRef<int> Mask, MVT VT,
     if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
                                                Subtarget)))
       return Result;
+    if ((Result =
+             lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, 
Subtarget)))
+      return Result;
     if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG)))
       return Result;
 
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll 
b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
index 30539427a1a0a..0b8015ddbdd4a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
@@ -7,13 +7,12 @@
 define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: shufflevector_v4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvpickve.d $xr2, $xr1, 3
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 238
-; CHECK-NEXT:    xvrepl128vei.d $xr3, $xr3, 1
-; CHECK-NEXT:    vextrins.d $vr3, $vr2, 16
+; CHECK-NEXT:    xvpermi.d $xr2, $xr0, 3
+; CHECK-NEXT:    xvpickve.d $xr3, $xr1, 3
+; CHECK-NEXT:    vextrins.d $vr2, $vr3, 16
 ; CHECK-NEXT:    xvpickve.d $xr1, $xr1, 2
 ; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16
-; CHECK-NEXT:    xvpermi.q $xr0, $xr3, 2
+; CHECK-NEXT:    xvpermi.q $xr0, $xr2, 2
 ; CHECK-NEXT:    ret
 entry:
   %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 
6, i32 3, i32 7>
diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll 
b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
index 5f76d9951df9c..ee1e9f4ce4e5c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
@@ -5,11 +5,8 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) {
 ; CHECK-LABEL: shuffle_v32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI0_1)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI0_1)
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr2, $xr0, $xr3
+; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI0_0)
+; CHECK-NEXT:    xvpermi.d $xr2, $xr0, 78
 ; CHECK-NEXT:    xvshuf.h $xr1, $xr2, $xr0
 ; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
 ; CHECK-NEXT:    ret
@@ -33,11 +30,8 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) {
 ; CHECK-LABEL: shuffle_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI2_1)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI2_1)
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr2, $xr0, $xr3
+; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT:    xvpermi.d $xr2, $xr0, 78
 ; CHECK-NEXT:    xvshuf.w $xr1, $xr2, $xr0
 ; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
 ; CHECK-NEXT:    ret
@@ -71,10 +65,7 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
 define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
 ; CHECK-LABEL: shuffle_v8i32_same_lane:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT:    xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 225
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 2, 
i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i32> %shuffle
@@ -83,14 +74,7 @@ define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
 define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
 ; CHECK-LABEL: shuffle_v4i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI6_1)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI6_1)
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT:    xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 39
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 3, 
i32 1, i32 2, i32 0>
   ret <4 x i64> %shuffle
@@ -99,10 +83,7 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
 define <4 x i64> @shuffle_v4i64_same_lane(<4 x i64> %a) {
 ; CHECK-LABEL: shuffle_v4i64_same_lane:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT:    xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 225
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, 
i32 0, i32 2, i32 3>
   ret <4 x i64> %shuffle
@@ -135,14 +116,7 @@ define <8 x float> @shuffle_v8f32_same_lane(<8 x float> 
%a) {
 define <4 x double> @shuffle_v4f64(<4 x double> %a) {
 ; CHECK-LABEL: shuffle_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI10_1)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI10_1)
-; CHECK-NEXT:    xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT:    xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 39
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> 
<i32 3, i32 1, i32 2, i32 0>
   ret <4 x double> %shuffle
@@ -151,11 +125,7 @@ define <4 x double> @shuffle_v4f64(<4 x double> %a) {
 define <4 x double> @shuffle_v4f64_same_lane(<4 x double> %a) {
 ; CHECK-LABEL: shuffle_v4f64_same_lane:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT:    xvld $xr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 78
-; CHECK-NEXT:    xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 75
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> 
<i32 3, i32 2, i32 0, i32 1>
   ret <4 x double> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll 
b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
index b697a2fd07435..eaf33d46a8803 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
@@ -126,9 +126,7 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x 
i64> %b) nounwind {
 define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind {
 ; CHECK-LABEL: byte_rotate_v4i64_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xvbsrl.v $xr1, $xr0, 8
-; CHECK-NEXT:    xvbsll.v $xr0, $xr0, 8
-; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 177
 ; CHECK-NEXT:    ret
     %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, 
i32 0, i32 3, i32 2>
     ret <4 x i64> %shuffle

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to