Author: Farzon Lotfi
Date: 2026-02-17T14:11:13-05:00
New Revision: 51d128fb1efa443765ae04474ff43905d8719595

URL: 
https://github.com/llvm/llvm-project/commit/51d128fb1efa443765ae04474ff43905d8719595
DIFF: 
https://github.com/llvm/llvm-project/commit/51d128fb1efa443765ae04474ff43905d8719595.diff

LOG: [Matrix][HLSL] Allow memory layout to change via flags (#181866)

fixes #181859

This also fixes an issue introduced in
https://github.com/llvm/llvm-project/pull/179861

where we were doing the array vector layout in row major as DXC would
define it.

Added: 
    clang/test/CodeGenHLSL/matrix_types.hlsl

Modified: 
    clang/lib/CodeGen/CodeGenTypes.cpp
    clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
    clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
    clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
    clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl
    
clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl
    clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl
    clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
    clang/test/CodeGenHLSL/basic_types.hlsl

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CodeGenTypes.cpp 
b/clang/lib/CodeGen/CodeGenTypes.cpp
index fd7a8929a9be9..f54921434986c 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -108,9 +108,16 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
     if (Context.getLangOpts().HLSL) {
       if (T->isConstantMatrixBoolType())
         IRElemTy = ConvertTypeForMem(Context.BoolTy);
-      llvm::Type *VecTy =
-          llvm::FixedVectorType::get(IRElemTy, MT->getNumColumns());
-      return llvm::ArrayType::get(VecTy, MT->getNumRows());
+
+      unsigned NumRows = MT->getNumRows();
+      unsigned NumCols = MT->getNumColumns();
+      bool IsRowMajor =
+          CGM.getContext().getLangOpts().getDefaultMatrixMemoryLayout() ==
+          LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+      unsigned VecLen = IsRowMajor ? NumCols : NumRows;
+      unsigned ArrayLen = IsRowMajor ? NumRows : NumCols;
+      llvm::Type *VecTy = llvm::FixedVectorType::get(IRElemTy, VecLen);
+      return llvm::ArrayType::get(VecTy, ArrayLen);
     }
     return llvm::ArrayType::get(IRElemTy, MT->getNumElementsFlattened());
   }

diff  --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
index f48edc19b86f7..b46d1efec1e1a 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
@@ -5,8 +5,8 @@
 // CHECK-LABEL: define hidden noundef <6 x i32> 
@_Z22elementwise_type_cast0u11matrix_typeILm3ELm2EfE(
 // CHECK-SAME: <6 x float> noundef nofpclass(nan inf) [[F32:%.*]]) 
#[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca [3 x <2 x float>], align 4
-// CHECK-NEXT:    [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca [2 x <3 x float>], align 4
+// CHECK-NEXT:    [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
 // CHECK-NEXT:    store <6 x float> [[F32]], ptr [[F32_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <6 x float>, ptr [[F32_ADDR]], align 4
 // CHECK-NEXT:    [[CONV:%.*]] = fptosi <6 x float> [[TMP0]] to <6 x i32>
@@ -22,8 +22,8 @@ int3x2 elementwise_type_cast0(float3x2 f32) {
 // CHECK-LABEL: define hidden noundef <6 x i32> 
@_Z22elementwise_type_cast1u11matrix_typeILm3ELm2EsE(
 // CHECK-SAME: <6 x i16> noundef [[I16_32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[I16_32_ADDR:%.*]] = alloca [3 x <2 x i16>], align 2
-// CHECK-NEXT:    [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT:    [[I16_32_ADDR:%.*]] = alloca [2 x <3 x i16>], align 2
+// CHECK-NEXT:    [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
 // CHECK-NEXT:    store <6 x i16> [[I16_32]], ptr [[I16_32_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load <6 x i16>, ptr [[I16_32_ADDR]], align 2
 // CHECK-NEXT:    [[CONV:%.*]] = sext <6 x i16> [[TMP0]] to <6 x i32>
@@ -39,8 +39,8 @@ int3x2 elementwise_type_cast1(int16_t3x2 i16_32) {
 // CHECK-LABEL: define hidden noundef <6 x i32> 
@_Z22elementwise_type_cast2u11matrix_typeILm3ELm2ElE(
 // CHECK-SAME: <6 x i64> noundef [[I64_32:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[I64_32_ADDR:%.*]] = alloca [3 x <2 x i64>], align 8
-// CHECK-NEXT:    [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT:    [[I64_32_ADDR:%.*]] = alloca [2 x <3 x i64>], align 8
+// CHECK-NEXT:    [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
 // CHECK-NEXT:    store <6 x i64> [[I64_32]], ptr [[I64_32_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load <6 x i64>, ptr [[I64_32_ADDR]], align 8
 // CHECK-NEXT:    [[CONV:%.*]] = trunc <6 x i64> [[TMP0]] to <6 x i32>
@@ -56,8 +56,8 @@ int3x2 elementwise_type_cast2(int64_t3x2 i64_32) {
 // CHECK-LABEL: define hidden noundef <6 x i16> 
@_Z22elementwise_type_cast3u11matrix_typeILm2ELm3EDhE(
 // CHECK-SAME: <6 x half> noundef nofpclass(nan inf) [[H23:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[H23_ADDR:%.*]] = alloca [2 x <3 x half>], align 2
-// CHECK-NEXT:    [[I23:%.*]] = alloca [2 x <3 x i16>], align 2
+// CHECK-NEXT:    [[H23_ADDR:%.*]] = alloca [3 x <2 x half>], align 2
+// CHECK-NEXT:    [[I23:%.*]] = alloca [3 x <2 x i16>], align 2
 // CHECK-NEXT:    store <6 x half> [[H23]], ptr [[H23_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load <6 x half>, ptr [[H23_ADDR]], align 2
 // CHECK-NEXT:    [[CONV:%.*]] = fptosi <6 x half> [[TMP0]] to <6 x i16>
@@ -73,8 +73,8 @@ int16_t2x3 elementwise_type_cast3(half2x3 h23) {
 // CHECK-LABEL: define hidden noundef <6 x i32> 
@_Z22elementwise_type_cast4u11matrix_typeILm3ELm2EdE(
 // CHECK-SAME: <6 x double> noundef nofpclass(nan inf) [[D32:%.*]]) #[[ATTR0]] 
{
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[D32_ADDR:%.*]] = alloca [3 x <2 x double>], align 8
-// CHECK-NEXT:    [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT:    [[D32_ADDR:%.*]] = alloca [2 x <3 x double>], align 8
+// CHECK-NEXT:    [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
 // CHECK-NEXT:    store <6 x double> [[D32]], ptr [[D32_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load <6 x double>, ptr [[D32_ADDR]], align 8
 // CHECK-NEXT:    [[CONV:%.*]] = fptosi <6 x double> [[TMP0]] to <6 x i32>
@@ -91,7 +91,7 @@ int3x2 elementwise_type_cast4(double3x2 d32) {
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[A:%.*]] = alloca [2 x [1 x i32]], align 4
-// CHECK-NEXT:    [[B:%.*]] = alloca [2 x <1 x i32>], align 4
+// CHECK-NEXT:    [[B:%.*]] = alloca [1 x <2 x i32>], align 4
 // CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [2 x [1 x i32]], align 4
 // CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr 
align 4 @__const._Z5call2v.A, i32 8, i1 false)
@@ -120,7 +120,7 @@ struct S {
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
-// CHECK-NEXT:    [[A:%.*]] = alloca [2 x <1 x i32>], align 4
+// CHECK-NEXT:    [[A:%.*]] = alloca [1 x <2 x i32>], align 4
 // CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_S]], align 1
 // CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr 
align 1 @__const._Z5call3v.s, i32 8, i1 false)

diff  --git 
a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
index 56f816806d63f..fb32478f2cac9 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
@@ -5,7 +5,7 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I34:%.*]] = alloca [3 x <4 x i32>], align 4
+// CHECK-NEXT:    [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
@@ -22,7 +22,7 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I43:%.*]] = alloca [4 x <3 x i32>], align 4
+// CHECK-NEXT:    [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 
9, i32 10, i32 12, i32 13, i32 14>
@@ -56,7 +56,7 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT:    [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
@@ -73,7 +73,7 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I23:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT:    [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
@@ -107,7 +107,7 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I21:%.*]] = alloca [2 x <1 x i32>], align 4
+// CHECK-NEXT:    [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <2 x i32> <i32 0, i32 4>

diff  --git 
a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
index b58f567eb51d3..d8738c8948f0f 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
@@ -5,7 +5,7 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I34:%.*]] = alloca [3 x <4 x i32>], align 4
+// CHECK-NEXT:    [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
@@ -22,7 +22,7 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I43:%.*]] = alloca [4 x <3 x i32>], align 4
+// CHECK-NEXT:    [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 
9, i32 10, i32 12, i32 13, i32 14>
@@ -56,7 +56,7 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// CHECK-NEXT:    [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
@@ -73,7 +73,7 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I23:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT:    [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
@@ -107,7 +107,7 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I21:%.*]] = alloca [2 x <1 x i32>], align 4
+// CHECK-NEXT:    [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <2 x i32> <i32 0, i32 4>

diff  --git 
a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl
index 2b950d8a51a38..57e4d0d6c459f 100644
--- 
a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl
+++ 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl
@@ -108,7 +108,7 @@ void setVectorOnMatrixSwizzle(out int2x3 M, int3 V) {
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) 
[[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[MINDEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[M_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4
 // CHECK-NEXT:    [[MINDEX_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store ptr [[M]], ptr [[M_ADDR]], align 4
 // CHECK-NEXT:    store <6 x i32> [[N]], ptr [[N_ADDR]], align 4
@@ -139,7 +139,7 @@ void setMatrixFromMatrix(out int2x3 M, int2x3 N, int 
MIndex) {
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) 
[[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[NINDEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[M_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4
 // CHECK-NEXT:    [[NINDEX_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store ptr [[M]], ptr [[M_ADDR]], align 4
 // CHECK-NEXT:    store <6 x i32> [[N]], ptr [[N_ADDR]], align 4

diff  --git 
a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl
index 7190b6e1148a5..97921c785dc9d 100644
--- 
a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl
+++ 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl
@@ -115,7 +115,7 @@ int3 getMatrixSwizzle2x3(out int2x3 M, int index) {
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) 
[[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[M_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4
 // CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store ptr [[M]], ptr [[M_ADDR]], align 4
 // CHECK-NEXT:    store <6 x i32> [[N]], ptr [[N_ADDR]], align 4

diff  --git 
a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl
index efa9381b515af..735884911fc06 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl
@@ -31,7 +31,7 @@ float4 getFloatVecMatrixDynamic(float4x4 M, int index) {
 // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float 
@_Z27getFloatScalarMatrixDynamicu11matrix_typeILm2ELm1EfEi(
 // CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]], i32 noundef 
[[INDEX:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [2 x <1 x float>], align 4
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4
 // CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store <2 x float> [[M]], ptr [[M_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
@@ -50,7 +50,7 @@ float getFloatScalarMatrixDynamic(float2x1 M, int index) {
 // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float 
@_Z28getFloatScalarMatrixConstantu11matrix_typeILm2ELm1EfE(
 // CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [2 x <1 x float>], align 4
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4
 // CHECK-NEXT:    store <2 x float> [[M]], ptr [[M_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[M_ADDR]], align 4
 // CHECK-NEXT:    [[MATRIX_ELEM:%.*]] = extractelement <2 x float> [[TMP0]], 
i32 0
@@ -65,7 +65,7 @@ float getFloatScalarMatrixConstant(float2x1 M) {
 // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float 
@_Z29getFloatScalarMatrixConstant2u11matrix_typeILm2ELm1EfE(
 // CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [2 x <1 x float>], align 4
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4
 // CHECK-NEXT:    store <2 x float> [[M]], ptr [[M_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[M_ADDR]], align 4
 // CHECK-NEXT:    [[MATRIX_ELEM:%.*]] = extractelement <2 x float> [[TMP0]], 
i32 1
@@ -207,7 +207,7 @@ int4 AddIntMatrixConstant(int4x4 M) {
 // CHECK-LABEL: define hidden noundef <3 x i1> 
@_Z23getBoolVecMatrixDynamicu11matrix_typeILm2ELm3EbEi(
 // CHECK-SAME: <6 x i1> noundef [[M:%.*]], i32 noundef [[INDEX:%.*]]) 
#[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4
 // CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = zext <6 x i1> [[M]] to <6 x i32>
 // CHECK-NEXT:    store <6 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
@@ -255,7 +255,7 @@ bool4 getBoolVecMatrixConstant(bool4x4 M) {
 // CHECK-LABEL: define hidden noundef i1 
@_Z27getBoolScalarMatrixConstantu11matrix_typeILm3ELm1EbE(
 // CHECK-SAME: <3 x i1> noundef [[M:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [3 x <1 x i32>], align 4
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [1 x <3 x i32>], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = zext <3 x i1> [[M]] to <3 x i32>
 // CHECK-NEXT:    store <3 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4

diff  --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
index 768c1b8e02bea..5edb8a3dd4690 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
@@ -120,7 +120,7 @@ void ExplicitIntToBoolCastThenSplat(int3 Value) {
 // CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[VALUE:%.*]]) 
#[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca <2 x float>, align 8
-// CHECK-NEXT:    [[M:%.*]] = alloca [2 x <3 x i32>], align 4
+// CHECK-NEXT:    [[M:%.*]] = alloca [3 x <2 x i32>], align 4
 // CHECK-NEXT:    store <2 x float> [[VALUE]], ptr [[VALUE_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[VALUE_ADDR]], align 8
 // CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <2 
x float> [[TMP0]], zeroinitializer
@@ -139,7 +139,7 @@ void ExplicitFloatToBoolCastThenSplat(float2 Value) {
 // CHECK-SAME: i1 noundef [[VALUE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[M:%.*]] = alloca [3 x <2 x float>], align 4
+// CHECK-NEXT:    [[M:%.*]] = alloca [2 x <3 x float>], align 4
 // CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[VALUE]] to i32
 // CHECK-NEXT:    store i32 [[STOREDV]], ptr [[VALUE_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4

diff  --git a/clang/test/CodeGenHLSL/basic_types.hlsl 
b/clang/test/CodeGenHLSL/basic_types.hlsl
index 0aaf7a1b77797..8836126934957 100644
--- a/clang/test/CodeGenHLSL/basic_types.hlsl
+++ b/clang/test/CodeGenHLSL/basic_types.hlsl
@@ -38,22 +38,6 @@
 // CHECK: @double2_Val = external hidden addrspace(2) global <2 x double>, 
align 16
 // CHECK: @double3_Val = external hidden addrspace(2) global <3 x double>, 
align 32
 // CHECK: @double4_Val = external hidden addrspace(2) global <4 x double>, 
align 32
-// CHECK: @bool1x1_Val = external hidden addrspace(2) global [1 x <1 x i32>], 
align 4
-// CHECK: @bool1x2_Val = external hidden addrspace(2) global [1 x <2 x i32>], 
align 4
-// CHECK: @bool1x3_Val = external hidden addrspace(2) global [1 x <3 x i32>], 
align 4
-// CHECK: @bool1x4_Val = external hidden addrspace(2) global [1 x <4 x i32>], 
align 4
-// CHECK: @bool2x1_Val = external hidden addrspace(2) global [2 x <1 x i32>], 
align 4
-// CHECK: @bool2x2_Val = external hidden addrspace(2) global [2 x <2 x i32>], 
align 4
-// CHECK: @bool2x3_Val = external hidden addrspace(2) global [2 x <3 x i32>], 
align 4
-// CHECK: @bool2x4_Val = external hidden addrspace(2) global [2 x <4 x i32>], 
align 4
-// CHECK: @bool3x1_Val = external hidden addrspace(2) global [3 x <1 x i32>], 
align 4
-// CHECK: @bool3x2_Val = external hidden addrspace(2) global [3 x <2 x i32>], 
align 4
-// CHECK: @bool3x3_Val = external hidden addrspace(2) global [3 x <3 x i32>], 
align 4
-// CHECK: @bool3x4_Val = external hidden addrspace(2) global [3 x <4 x i32>], 
align 4
-// CHECK: @bool4x1_Val = external hidden addrspace(2) global [4 x <1 x i32>], 
align 4
-// CHECK: @bool4x2_Val = external hidden addrspace(2) global [4 x <2 x i32>], 
align 4
-// CHECK: @bool4x3_Val = external hidden addrspace(2) global [4 x <3 x i32>], 
align 4
-// CHECK: @bool4x4_Val = external hidden addrspace(2) global [4 x <4 x i32>], 
align 4
 
 #ifdef NAMESPACED
 #define TYPE_DECL(T)  hlsl::T T##_Val
@@ -109,20 +93,3 @@ TYPE_DECL( float4  );
 TYPE_DECL( double2 );
 TYPE_DECL( double3 );
 TYPE_DECL( double4 );
-
-TYPE_DECL( bool1x1 );
-TYPE_DECL( bool1x2 );
-TYPE_DECL( bool1x3 );
-TYPE_DECL( bool1x4 );
-TYPE_DECL( bool2x1 );
-TYPE_DECL( bool2x2 );
-TYPE_DECL( bool2x3 );
-TYPE_DECL( bool2x4 );
-TYPE_DECL( bool3x1 );
-TYPE_DECL( bool3x2 );
-TYPE_DECL( bool3x3 );
-TYPE_DECL( bool3x4 );
-TYPE_DECL( bool4x1 );
-TYPE_DECL( bool4x2 );
-TYPE_DECL( bool4x3 );
-TYPE_DECL( bool4x4 );

diff  --git a/clang/test/CodeGenHLSL/matrix_types.hlsl 
b/clang/test/CodeGenHLSL/matrix_types.hlsl
new file mode 100644
index 0000000000000..1c2f9cd316543
--- /dev/null
+++ b/clang/test/CodeGenHLSL/matrix_types.hlsl
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type 
-fnative-int16-type \
+// RUN:   -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o 
- | FileCheck %s --check-prefix=CHECK-ROW-MAJOR
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type 
-fnative-int16-type \
+// RUN:   -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o 
- -DNAMESPACED| FileCheck %s --check-prefix=CHECK-ROW-MAJOR
+
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type 
-fnative-int16-type \
+// RUN:   -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major 
-o - | FileCheck %s --check-prefix=CHECK-COL-MAJOR
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type 
-fnative-int16-type \
+// RUN:   -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major 
-o - -DNAMESPACED| FileCheck %s --check-prefix=CHECK-COL-MAJOR
+
+// CHECK-ROW-MAJOR: @bool1x1_Val = external hidden addrspace(2) global [1 x <1 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool1x2_Val = external hidden addrspace(2) global [1 x <2 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool1x3_Val = external hidden addrspace(2) global [1 x <3 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool1x4_Val = external hidden addrspace(2) global [1 x <4 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool2x1_Val = external hidden addrspace(2) global [2 x <1 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool2x2_Val = external hidden addrspace(2) global [2 x <2 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool2x3_Val = external hidden addrspace(2) global [2 x <3 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool2x4_Val = external hidden addrspace(2) global [2 x <4 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool3x1_Val = external hidden addrspace(2) global [3 x <1 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool3x2_Val = external hidden addrspace(2) global [3 x <2 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool3x3_Val = external hidden addrspace(2) global [3 x <3 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool3x4_Val = external hidden addrspace(2) global [3 x <4 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool4x1_Val = external hidden addrspace(2) global [4 x <1 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool4x2_Val = external hidden addrspace(2) global [4 x <2 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool4x3_Val = external hidden addrspace(2) global [4 x <3 
x i32>], align 4
+// CHECK-ROW-MAJOR: @bool4x4_Val = external hidden addrspace(2) global [4 x <4 
x i32>], align 4
+
+// CHECK-COL-MAJOR: @bool1x1_Val = external hidden addrspace(2) global [1 x <1 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool1x2_Val = external hidden addrspace(2) global [2 x <1 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool1x3_Val = external hidden addrspace(2) global [3 x <1 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool1x4_Val = external hidden addrspace(2) global [4 x <1 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool2x1_Val = external hidden addrspace(2) global [1 x <2 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool2x2_Val = external hidden addrspace(2) global [2 x <2 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool2x3_Val = external hidden addrspace(2) global [3 x <2 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool2x4_Val = external hidden addrspace(2) global [4 x <2 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool3x1_Val = external hidden addrspace(2) global [1 x <3 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool3x2_Val = external hidden addrspace(2) global [2 x <3 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool3x3_Val = external hidden addrspace(2) global [3 x <3 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool3x4_Val = external hidden addrspace(2) global [4 x <3 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool4x1_Val = external hidden addrspace(2) global [1 x <4 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool4x2_Val = external hidden addrspace(2) global [2 x <4 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool4x3_Val = external hidden addrspace(2) global [3 x <4 
x i32>], align 4
+// CHECK-COL-MAJOR: @bool4x4_Val = external hidden addrspace(2) global [4 x <4 
x i32>], align 4
+
+#ifdef NAMESPACED
+#define TYPE_DECL(T)  hlsl::T T##_Val
+#else
+#define TYPE_DECL(T)  T T##_Val
+#endif
+
+TYPE_DECL( bool1x1 );
+TYPE_DECL( bool1x2 );
+TYPE_DECL( bool1x3 );
+TYPE_DECL( bool1x4 );
+TYPE_DECL( bool2x1 );
+TYPE_DECL( bool2x2 );
+TYPE_DECL( bool2x3 );
+TYPE_DECL( bool2x4 );
+TYPE_DECL( bool3x1 );
+TYPE_DECL( bool3x2 );
+TYPE_DECL( bool3x3 );
+TYPE_DECL( bool3x4 );
+TYPE_DECL( bool4x1 );
+TYPE_DECL( bool4x2 );
+TYPE_DECL( bool4x3 );
+TYPE_DECL( bool4x4 );


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to