[clang] [llvm] [HLSL][DirectX] Add `transpose` HLSL intrinsic and DXIL lowering of `llvm.matrix.transpose` (PR #186263)

Farzon Lotfi via cfe-commits Fri, 13 Mar 2026 10:31:14 -0700

================
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 6
+; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s
+
+; Verify that llvm.matrix.transpose is expanded to shufflevector for DXIL.
+
+declare <6 x float> @llvm.matrix.transpose.v6f32(<6 x float>, i32, i32)
+declare <12 x i32> @llvm.matrix.transpose.v12i32(<12 x i32>, i32, i32)
+declare <16 x float> @llvm.matrix.transpose.v16f32(<16 x float>, i32, i32)
+declare <4 x float> @llvm.matrix.transpose.v4f32(<4 x float>, i32, i32)
+declare <4 x half> @llvm.matrix.transpose.v4f16(<4 x half>, i32, i32)
+
+; 2x3 float -> 3x2 float
+define <6 x float> @test_transpose_float2x3(<6 x float> %m) {
+; CHECK-LABEL: define <6 x float> @test_transpose_float2x3(
+; CHECK-SAME: <6 x float> [[M:%.*]]) {
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <6 x float> [[M]], <6 x float> 
poison, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+; CHECK-NEXT:    ret <6 x float> [[TMP12]]
+;
+  %r = call <6 x float> @llvm.matrix.transpose.v6f32(<6 x float> %m, i32 2, 
i32 3)
+  ret <6 x float> %r
+}
+
+; 4x3 int -> 3x4 int
+define <12 x i32> @test_transpose_int4x3(<12 x i32> %m) {
+; CHECK-LABEL: define <12 x i32> @test_transpose_int4x3(
+; CHECK-SAME: <12 x i32> [[M:%.*]]) {
+; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <12 x i32> [[M]], <12 x i32> 
poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 
10, i32 3, i32 7, i32 11>
+; CHECK-NEXT:    ret <12 x i32> [[TMP24]]
+;
+  %r = call <12 x i32> @llvm.matrix.transpose.v12i32(<12 x i32> %m, i32 4, i32 
3)
+  ret <12 x i32> %r
+}
+
+; 4x4 float -> 4x4 float
+define <16 x float> @test_transpose_float4x4(<16 x float> %m) {
+; CHECK-LABEL: define <16 x float> @test_transpose_float4x4(
+; CHECK-SAME: <16 x float> [[M:%.*]]) {
+; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <16 x float> [[M]], <16 x 
float> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, 
i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
----------------
farzonl wrote:


Do we know how the scalarizer handles shuffle vectors this large? I think it is 
probably fine.  We should just make sure  it is well tested if not by our 
backend other ones.

https://github.com/llvm/llvm-project/pull/186263
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [HLSL][DirectX] Add `transpose` HLSL intrinsic and DXIL lowering of `llvm.matrix.transpose` (PR #186263)

Reply via email to