Author: Farzon Lotfi Date: 2026-02-17T14:11:13-05:00 New Revision: 51d128fb1efa443765ae04474ff43905d8719595
URL: https://github.com/llvm/llvm-project/commit/51d128fb1efa443765ae04474ff43905d8719595 DIFF: https://github.com/llvm/llvm-project/commit/51d128fb1efa443765ae04474ff43905d8719595.diff LOG: [Matrix][HLSL] Allow memory layout to change via flags (#181866) fixes #181859 This also fixes an issue introduced in https://github.com/llvm/llvm-project/pull/179861 where we were doing the array vector layout in row major as DXC would define it. Added: clang/test/CodeGenHLSL/matrix_types.hlsl Modified: clang/lib/CodeGen/CodeGenTypes.cpp clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl clang/test/CodeGenHLSL/basic_types.hlsl Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index fd7a8929a9be9..f54921434986c 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -108,9 +108,16 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { if (Context.getLangOpts().HLSL) { if (T->isConstantMatrixBoolType()) IRElemTy = ConvertTypeForMem(Context.BoolTy); - llvm::Type *VecTy = - llvm::FixedVectorType::get(IRElemTy, MT->getNumColumns()); - return llvm::ArrayType::get(VecTy, MT->getNumRows()); + + unsigned NumRows = MT->getNumRows(); + unsigned NumCols = MT->getNumColumns(); + bool IsRowMajor = + CGM.getContext().getLangOpts().getDefaultMatrixMemoryLayout() == + LangOptions::MatrixMemoryLayout::MatrixRowMajor; + unsigned VecLen = IsRowMajor ? NumCols : NumRows; + unsigned ArrayLen = IsRowMajor ? NumRows : NumCols; + llvm::Type *VecTy = llvm::FixedVectorType::get(IRElemTy, VecLen); + return llvm::ArrayType::get(VecTy, ArrayLen); } return llvm::ArrayType::get(IRElemTy, MT->getNumElementsFlattened()); } diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl index f48edc19b86f7..b46d1efec1e1a 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl @@ -5,8 +5,8 @@ // CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast0u11matrix_typeILm3ELm2EfE( // CHECK-SAME: <6 x float> noundef nofpclass(nan inf) [[F32:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca [3 x <2 x float>], align 4 -// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4 +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca [2 x <3 x float>], align 4 +// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4 // CHECK-NEXT: store <6 x float> [[F32]], ptr [[F32_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <6 x float>, ptr [[F32_ADDR]], align 4 // CHECK-NEXT: [[CONV:%.*]] = fptosi <6 x float> [[TMP0]] to <6 x i32> @@ -22,8 +22,8 @@ int3x2 elementwise_type_cast0(float3x2 f32) { // CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast1u11matrix_typeILm3ELm2EsE( // CHECK-SAME: <6 x i16> noundef [[I16_32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[I16_32_ADDR:%.*]] = alloca [3 x <2 x i16>], align 2 -// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4 +// CHECK-NEXT: [[I16_32_ADDR:%.*]] = alloca [2 x <3 x i16>], align 2 +// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4 // CHECK-NEXT: store <6 x i16> [[I16_32]], ptr [[I16_32_ADDR]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load <6 x i16>, ptr [[I16_32_ADDR]], align 2 // CHECK-NEXT: [[CONV:%.*]] = sext <6 x i16> [[TMP0]] to <6 x i32> @@ -39,8 +39,8 @@ int3x2 elementwise_type_cast1(int16_t3x2 i16_32) { // CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast2u11matrix_typeILm3ELm2ElE( // CHECK-SAME: <6 x i64> noundef [[I64_32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[I64_32_ADDR:%.*]] = alloca [3 x <2 x i64>], align 8 -// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4 +// CHECK-NEXT: [[I64_32_ADDR:%.*]] = alloca [2 x <3 x i64>], align 8 +// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4 // CHECK-NEXT: store <6 x i64> [[I64_32]], ptr [[I64_32_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load <6 x i64>, ptr [[I64_32_ADDR]], align 8 // CHECK-NEXT: [[CONV:%.*]] = trunc <6 x i64> [[TMP0]] to <6 x i32> @@ -56,8 +56,8 @@ int3x2 elementwise_type_cast2(int64_t3x2 i64_32) { // CHECK-LABEL: define hidden noundef <6 x i16> @_Z22elementwise_type_cast3u11matrix_typeILm2ELm3EDhE( // CHECK-SAME: <6 x half> noundef nofpclass(nan inf) [[H23:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[H23_ADDR:%.*]] = alloca [2 x <3 x half>], align 2 -// CHECK-NEXT: [[I23:%.*]] = alloca [2 x <3 x i16>], align 2 +// CHECK-NEXT: [[H23_ADDR:%.*]] = alloca [3 x <2 x half>], align 2 +// CHECK-NEXT: [[I23:%.*]] = alloca [3 x <2 x i16>], align 2 // CHECK-NEXT: store <6 x half> [[H23]], ptr [[H23_ADDR]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load <6 x half>, ptr [[H23_ADDR]], align 2 // CHECK-NEXT: [[CONV:%.*]] = fptosi <6 x half> [[TMP0]] to <6 x i16> @@ -73,8 +73,8 @@ int16_t2x3 elementwise_type_cast3(half2x3 h23) { // CHECK-LABEL: define hidden noundef <6 x i32> @_Z22elementwise_type_cast4u11matrix_typeILm3ELm2EdE( // CHECK-SAME: <6 x double> noundef nofpclass(nan inf) [[D32:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[D32_ADDR:%.*]] = alloca [3 x <2 x double>], align 8 -// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4 +// CHECK-NEXT: [[D32_ADDR:%.*]] = alloca [2 x <3 x double>], align 8 +// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4 // CHECK-NEXT: store <6 x double> [[D32]], ptr [[D32_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load <6 x double>, ptr [[D32_ADDR]], align 8 // CHECK-NEXT: [[CONV:%.*]] = fptosi <6 x double> [[TMP0]] to <6 x i32> @@ -91,7 +91,7 @@ int3x2 elementwise_type_cast4(double3x2 d32) { // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[A:%.*]] = alloca [2 x [1 x i32]], align 4 -// CHECK-NEXT: [[B:%.*]] = alloca [2 x <1 x i32>], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca [1 x <2 x i32>], align 4 // CHECK-NEXT: [[AGG_TEMP:%.*]] = alloca [2 x [1 x i32]], align 4 // CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @__const._Z5call2v.A, i32 8, i1 false) @@ -120,7 +120,7 @@ struct S { // CHECK-SAME: ) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1 -// CHECK-NEXT: [[A:%.*]] = alloca [2 x <1 x i32>], align 4 +// CHECK-NEXT: [[A:%.*]] = alloca [1 x <2 x i32>], align 4 // CHECK-NEXT: [[AGG_TEMP:%.*]] = alloca [[STRUCT_S]], align 1 // CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 @__const._Z5call3v.s, i32 8, i1 false) diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl index 56f816806d63f..fb32478f2cac9 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl @@ -5,7 +5,7 @@ // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 -// CHECK-NEXT: [[I34:%.*]] = alloca [3 x <4 x i32>], align 4 +// CHECK-NEXT: [[I34:%.*]] = alloca [4 x <3 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -22,7 +22,7 @@ // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 -// CHECK-NEXT: [[I43:%.*]] = alloca [4 x <3 x i32>], align 4 +// CHECK-NEXT: [[I43:%.*]] = alloca [3 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14> @@ -56,7 +56,7 @@ // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 -// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4 +// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9> @@ -73,7 +73,7 @@ // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 -// CHECK-NEXT: [[I23:%.*]] = alloca [2 x <3 x i32>], align 4 +// CHECK-NEXT: [[I23:%.*]] = alloca [3 x <2 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6> @@ -107,7 +107,7 @@ // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 -// CHECK-NEXT: [[I21:%.*]] = alloca [2 x <1 x i32>], align 4 +// CHECK-NEXT: [[I21:%.*]] = alloca [1 x <2 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4> diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl index b58f567eb51d3..d8738c8948f0f 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl @@ -5,7 +5,7 @@ // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 -// CHECK-NEXT: [[I34:%.*]] = alloca [3 x <4 x i32>], align 4 +// CHECK-NEXT: [[I34:%.*]] = alloca [4 x <3 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -22,7 +22,7 @@ // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 -// CHECK-NEXT: [[I43:%.*]] = alloca [4 x <3 x i32>], align 4 +// CHECK-NEXT: [[I43:%.*]] = alloca [3 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14> @@ -56,7 +56,7 @@ // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 -// CHECK-NEXT: [[I32:%.*]] = alloca [3 x <2 x i32>], align 4 +// CHECK-NEXT: [[I32:%.*]] = alloca [2 x <3 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9> @@ -73,7 +73,7 @@ // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 -// CHECK-NEXT: [[I23:%.*]] = alloca [2 x <3 x i32>], align 4 +// CHECK-NEXT: [[I23:%.*]] = alloca [3 x <2 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6> @@ -107,7 +107,7 @@ // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 -// CHECK-NEXT: [[I21:%.*]] = alloca [2 x <1 x i32>], align 4 +// CHECK-NEXT: [[I21:%.*]] = alloca [1 x <2 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4 // CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4> diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl index 2b950d8a51a38..57e4d0d6c459f 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptConstSwizzle.hlsl @@ -108,7 +108,7 @@ void setVectorOnMatrixSwizzle(out int2x3 M, int3 V) { // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) [[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[MINDEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4 +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4 // CHECK-NEXT: [[MINDEX_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 // CHECK-NEXT: store <6 x i32> [[N]], ptr [[N_ADDR]], align 4 @@ -139,7 +139,7 @@ void setMatrixFromMatrix(out int2x3 M, int2x3 N, int MIndex) { // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) [[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[NINDEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4 +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4 // CHECK-NEXT: [[NINDEX_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 // CHECK-NEXT: store <6 x i32> [[N]], ptr [[N_ADDR]], align 4 diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl index 7190b6e1148a5..97921c785dc9d 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl @@ -115,7 +115,7 @@ int3 getMatrixSwizzle2x3(out int2x3 M, int index) { // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) [[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4 +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4 // CHECK-NEXT: [[INDEX_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 // CHECK-NEXT: store <6 x i32> [[N]], ptr [[N_ADDR]], align 4 diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl index efa9381b515af..735884911fc06 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl @@ -31,7 +31,7 @@ float4 getFloatVecMatrixDynamic(float4x4 M, int index) { // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z27getFloatScalarMatrixDynamicu11matrix_typeILm2ELm1EfEi( // CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [2 x <1 x float>], align 4 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4 // CHECK-NEXT: [[INDEX_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: store <2 x float> [[M]], ptr [[M_ADDR]], align 4 // CHECK-NEXT: store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4 @@ -50,7 +50,7 @@ float getFloatScalarMatrixDynamic(float2x1 M, int index) { // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z28getFloatScalarMatrixConstantu11matrix_typeILm2ELm1EfE( // CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [2 x <1 x float>], align 4 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4 // CHECK-NEXT: store <2 x float> [[M]], ptr [[M_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[M_ADDR]], align 4 // CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <2 x float> [[TMP0]], i32 0 @@ -65,7 +65,7 @@ float getFloatScalarMatrixConstant(float2x1 M) { // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z29getFloatScalarMatrixConstant2u11matrix_typeILm2ELm1EfE( // CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [2 x <1 x float>], align 4 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <2 x float>], align 4 // CHECK-NEXT: store <2 x float> [[M]], ptr [[M_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[M_ADDR]], align 4 // CHECK-NEXT: [[MATRIX_ELEM:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 @@ -207,7 +207,7 @@ int4 AddIntMatrixConstant(int4x4 M) { // CHECK-LABEL: define hidden noundef <3 x i1> @_Z23getBoolVecMatrixDynamicu11matrix_typeILm2ELm3EbEi( // CHECK-SAME: <6 x i1> noundef [[M:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [2 x <3 x i32>], align 4 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [3 x <2 x i32>], align 4 // CHECK-NEXT: [[INDEX_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP0:%.*]] = zext <6 x i1> [[M]] to <6 x i32> // CHECK-NEXT: store <6 x i32> [[TMP0]], ptr [[M_ADDR]], align 4 @@ -255,7 +255,7 @@ bool4 getBoolVecMatrixConstant(bool4x4 M) { // CHECK-LABEL: define hidden noundef i1 @_Z27getBoolScalarMatrixConstantu11matrix_typeILm3ELm1EbE( // CHECK-SAME: <3 x i1> noundef [[M:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [3 x <1 x i32>], align 4 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [1 x <3 x i32>], align 4 // CHECK-NEXT: [[TMP0:%.*]] = zext <3 x i1> [[M]] to <3 x i32> // CHECK-NEXT: store <3 x i32> [[TMP0]], ptr [[M_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4 diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl index 768c1b8e02bea..5edb8a3dd4690 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl @@ -120,7 +120,7 @@ void ExplicitIntToBoolCastThenSplat(int3 Value) { // CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[VALUE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca <2 x float>, align 8 -// CHECK-NEXT: [[M:%.*]] = alloca [2 x <3 x i32>], align 4 +// CHECK-NEXT: [[M:%.*]] = alloca [3 x <2 x i32>], align 4 // CHECK-NEXT: store <2 x float> [[VALUE]], ptr [[VALUE_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[VALUE_ADDR]], align 8 // CHECK-NEXT: [[TOBOOL:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <2 x float> [[TMP0]], zeroinitializer @@ -139,7 +139,7 @@ void ExplicitFloatToBoolCastThenSplat(float2 Value) { // CHECK-SAME: i1 noundef [[VALUE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[VALUE_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[M:%.*]] = alloca [3 x <2 x float>], align 4 +// CHECK-NEXT: [[M:%.*]] = alloca [2 x <3 x float>], align 4 // CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[VALUE]] to i32 // CHECK-NEXT: store i32 [[STOREDV]], ptr [[VALUE_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4 diff --git a/clang/test/CodeGenHLSL/basic_types.hlsl b/clang/test/CodeGenHLSL/basic_types.hlsl index 0aaf7a1b77797..8836126934957 100644 --- a/clang/test/CodeGenHLSL/basic_types.hlsl +++ b/clang/test/CodeGenHLSL/basic_types.hlsl @@ -38,22 +38,6 @@ // CHECK: @double2_Val = external hidden addrspace(2) global <2 x double>, align 16 // CHECK: @double3_Val = external hidden addrspace(2) global <3 x double>, align 32 // CHECK: @double4_Val = external hidden addrspace(2) global <4 x double>, align 32 -// CHECK: @bool1x1_Val = external hidden addrspace(2) global [1 x <1 x i32>], align 4 -// CHECK: @bool1x2_Val = external hidden addrspace(2) global [1 x <2 x i32>], align 4 -// CHECK: @bool1x3_Val = external hidden addrspace(2) global [1 x <3 x i32>], align 4 -// CHECK: @bool1x4_Val = external hidden addrspace(2) global [1 x <4 x i32>], align 4 -// CHECK: @bool2x1_Val = external hidden addrspace(2) global [2 x <1 x i32>], align 4 -// CHECK: @bool2x2_Val = external hidden addrspace(2) global [2 x <2 x i32>], align 4 -// CHECK: @bool2x3_Val = external hidden addrspace(2) global [2 x <3 x i32>], align 4 -// CHECK: @bool2x4_Val = external hidden addrspace(2) global [2 x <4 x i32>], align 4 -// CHECK: @bool3x1_Val = external hidden addrspace(2) global [3 x <1 x i32>], align 4 -// CHECK: @bool3x2_Val = external hidden addrspace(2) global [3 x <2 x i32>], align 4 -// CHECK: @bool3x3_Val = external hidden addrspace(2) global [3 x <3 x i32>], align 4 -// CHECK: @bool3x4_Val = external hidden addrspace(2) global [3 x <4 x i32>], align 4 -// CHECK: @bool4x1_Val = external hidden addrspace(2) global [4 x <1 x i32>], align 4 -// CHECK: @bool4x2_Val = external hidden addrspace(2) global [4 x <2 x i32>], align 4 -// CHECK: @bool4x3_Val = external hidden addrspace(2) global [4 x <3 x i32>], align 4 -// CHECK: @bool4x4_Val = external hidden addrspace(2) global [4 x <4 x i32>], align 4 #ifdef NAMESPACED #define TYPE_DECL(T) hlsl::T T##_Val @@ -109,20 +93,3 @@ TYPE_DECL( float4 ); TYPE_DECL( double2 ); TYPE_DECL( double3 ); TYPE_DECL( double4 ); - -TYPE_DECL( bool1x1 ); -TYPE_DECL( bool1x2 ); -TYPE_DECL( bool1x3 ); -TYPE_DECL( bool1x4 ); -TYPE_DECL( bool2x1 ); -TYPE_DECL( bool2x2 ); -TYPE_DECL( bool2x3 ); -TYPE_DECL( bool2x4 ); -TYPE_DECL( bool3x1 ); -TYPE_DECL( bool3x2 ); -TYPE_DECL( bool3x3 ); -TYPE_DECL( bool3x4 ); -TYPE_DECL( bool4x1 ); -TYPE_DECL( bool4x2 ); -TYPE_DECL( bool4x3 ); -TYPE_DECL( bool4x4 ); diff --git a/clang/test/CodeGenHLSL/matrix_types.hlsl b/clang/test/CodeGenHLSL/matrix_types.hlsl new file mode 100644 index 0000000000000..1c2f9cd316543 --- /dev/null +++ b/clang/test/CodeGenHLSL/matrix_types.hlsl @@ -0,0 +1,70 @@ +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \ +// RUN: -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - | FileCheck %s --check-prefix=CHECK-ROW-MAJOR +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \ +// RUN: -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - -DNAMESPACED| FileCheck %s --check-prefix=CHECK-ROW-MAJOR + +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \ +// RUN: -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - | FileCheck %s --check-prefix=CHECK-COL-MAJOR +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \ +// RUN: -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - -DNAMESPACED| FileCheck %s --check-prefix=CHECK-COL-MAJOR + +// CHECK-ROW-MAJOR: @bool1x1_Val = external hidden addrspace(2) global [1 x <1 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool1x2_Val = external hidden addrspace(2) global [1 x <2 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool1x3_Val = external hidden addrspace(2) global [1 x <3 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool1x4_Val = external hidden addrspace(2) global [1 x <4 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool2x1_Val = external hidden addrspace(2) global [2 x <1 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool2x2_Val = external hidden addrspace(2) global [2 x <2 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool2x3_Val = external hidden addrspace(2) global [2 x <3 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool2x4_Val = external hidden addrspace(2) global [2 x <4 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool3x1_Val = external hidden addrspace(2) global [3 x <1 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool3x2_Val = external hidden addrspace(2) global [3 x <2 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool3x3_Val = external hidden addrspace(2) global [3 x <3 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool3x4_Val = external hidden addrspace(2) global [3 x <4 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool4x1_Val = external hidden addrspace(2) global [4 x <1 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool4x2_Val = external hidden addrspace(2) global [4 x <2 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool4x3_Val = external hidden addrspace(2) global [4 x <3 x i32>], align 4 +// CHECK-ROW-MAJOR: @bool4x4_Val = external hidden addrspace(2) global [4 x <4 x i32>], align 4 + +// CHECK-COL-MAJOR: @bool1x1_Val = external hidden addrspace(2) global [1 x <1 x i32>], align 4 +// CHECK-COL-MAJOR: @bool1x2_Val = external hidden addrspace(2) global [2 x <1 x i32>], align 4 +// CHECK-COL-MAJOR: @bool1x3_Val = external hidden addrspace(2) global [3 x <1 x i32>], align 4 +// CHECK-COL-MAJOR: @bool1x4_Val = external hidden addrspace(2) global [4 x <1 x i32>], align 4 +// CHECK-COL-MAJOR: @bool2x1_Val = external hidden addrspace(2) global [1 x <2 x i32>], align 4 +// CHECK-COL-MAJOR: @bool2x2_Val = external hidden addrspace(2) global [2 x <2 x i32>], align 4 +// CHECK-COL-MAJOR: @bool2x3_Val = external hidden addrspace(2) global [3 x <2 x i32>], align 4 +// CHECK-COL-MAJOR: @bool2x4_Val = external hidden addrspace(2) global [4 x <2 x i32>], align 4 +// CHECK-COL-MAJOR: @bool3x1_Val = external hidden addrspace(2) global [1 x <3 x i32>], align 4 +// CHECK-COL-MAJOR: @bool3x2_Val = external hidden addrspace(2) global [2 x <3 x i32>], align 4 +// CHECK-COL-MAJOR: @bool3x3_Val = external hidden addrspace(2) global [3 x <3 x i32>], align 4 +// CHECK-COL-MAJOR: @bool3x4_Val = external hidden addrspace(2) global [4 x <3 x i32>], align 4 +// CHECK-COL-MAJOR: @bool4x1_Val = external hidden addrspace(2) global [1 x <4 x i32>], align 4 +// CHECK-COL-MAJOR: @bool4x2_Val = external hidden addrspace(2) global [2 x <4 x i32>], align 4 +// CHECK-COL-MAJOR: @bool4x3_Val = external hidden addrspace(2) global [3 x <4 x i32>], align 4 +// CHECK-COL-MAJOR: @bool4x4_Val = external hidden addrspace(2) global [4 x <4 x i32>], align 4 + +#ifdef NAMESPACED +#define TYPE_DECL(T) hlsl::T T##_Val +#else +#define TYPE_DECL(T) T T##_Val +#endif + +TYPE_DECL( bool1x1 ); +TYPE_DECL( bool1x2 ); +TYPE_DECL( bool1x3 ); +TYPE_DECL( bool1x4 ); +TYPE_DECL( bool2x1 ); +TYPE_DECL( bool2x2 ); +TYPE_DECL( bool2x3 ); +TYPE_DECL( bool2x4 ); +TYPE_DECL( bool3x1 ); +TYPE_DECL( bool3x2 ); +TYPE_DECL( bool3x3 ); +TYPE_DECL( bool3x4 ); +TYPE_DECL( bool4x1 ); +TYPE_DECL( bool4x2 ); +TYPE_DECL( bool4x3 ); +TYPE_DECL( bool4x4 ); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
