[clang] [llvm] [AArch64][clang][llvm] Add support for Armv9.7-A lookup table intrinsics (PR #187046)

Kerry McLaughlin via cfe-commits Wed, 08 Apr 2026 03:49:18 -0700

================
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -force-streaming 
-mtriple=aarch64-none-linux-gnu -mattr=+sme2p3 < %s | FileCheck %s
+
+target triple = "aarch64-none-linux-gnu"
+
+define <vscale x 16 x i8> @luti6_zt_i8(<vscale x 16 x i8> %x) #0 {
+; CHECK-LABEL: luti6_zt_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 z0.b, zt0, z0
+; CHECK-NEXT:    ret
+  %res = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(
+      i32 0, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %res
+}
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
+         <vscale x 16 x i8> } @luti6_zt_i8_x4(<vscale x 16 x i8> %a,
+                                              <vscale x 16 x i8> %b,
+                                              <vscale x 16 x i8> %c) #0 {
+; CHECK-LABEL: luti6_zt_i8_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 { z0.b - z3.b }, zt0, { z0 - z2 }
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>,
+                     <vscale x 16 x i8>, <vscale x 16 x i8> }
+      @llvm.aarch64.sme.luti6.zt.x4(
+          i32 0, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
+          <vscale x 16 x i8> %c)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
+        <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+         <vscale x 8 x i16> } @luti6_i16_x4(<vscale x 8 x i16> %a,
+                                            <vscale x 8 x i16> %b,
+                                            <vscale x 16 x i8> %x,
+                                            <vscale x 16 x i8> %y) #0 {
+; CHECK-LABEL: luti6_i16_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[1]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>,
+                     <vscale x 8 x i16>, <vscale x 8 x i16> }
+      @llvm.aarch64.sme.luti6.lane.x4.nxv8i16(
+          <vscale x 8 x i16> %a, <vscale x 8 x i16> %b,
+          <vscale x 16 x i8> %x, <vscale x 16 x i8> %y, i32 1)
+  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+        <vscale x 8 x i16> } %res
+}
----------------
kmclaughlin-arm wrote:


Just a suggestion, but I find these tests a little bit hard to read when they 
are split up over multiple lines. It looks like similar tests (e.g. 
`sme2-intrinsics-luti4-lane-x4.ll`) have tried to keep lists mostly on one line 
and reused arguments. Could something like this work?

```
define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 
8 x i16> } @luti6_i16_x4(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: luti6_i16_x4:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z3.d, z0.d
; CHECK-NEXT:    mov z2.d, z1.d
; CHECK-NEXT:    mov z4.d, z0.d
; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z3.h, z4.h }, { z1, z2 }[1]
; CHECK-NEXT:    ret
  %res = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x 
i16>, <vscale x 8 x i16> }
         @llvm.aarch64.sme.luti6.lane.x4.nxv8i16(<vscale x 8 x i16> %a, <vscale 
x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %b, i32 1)
  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 
x i16> } %res
}
```

https://github.com/llvm/llvm-project/pull/187046
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AArch64][clang][llvm] Add support for Armv9.7-A lookup table intrinsics (PR #187046)

Reply via email to