https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/177708
>From 21287b87994c3dce5e1615c9bace88d819526324 Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Fri, 23 Jan 2026 15:13:32 -0800 Subject: [PATCH 1/4] Enable explicit cast of matrices to vectors --- clang/lib/CodeGen/CGExpr.cpp | 21 +++++- .../BasicFeatures/MatrixToVectorCast.hlsl | 70 +++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 76a3939cd28eb..5d595c2e3b1e5 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -7117,8 +7117,6 @@ void CodeGenFunction::FlattenAccessAndTypeLValue( while (!WorkList.empty()) { auto [LVal, T, IdxList] = WorkList.pop_back_val(); T = T.getCanonicalType().getUnqualifiedType(); - assert(!isa<MatrixType>(T) && "Matrix types not yet supported in HLSL"); - if (const auto *CAT = dyn_cast<ConstantArrayType>(T)) { uint64_t Size = CAT->getZExtSize(); for (int64_t I = Size - 1; I > -1; I--) { @@ -7192,6 +7190,25 @@ void CodeGenFunction::FlattenAccessAndTypeLValue( Base.getBaseInfo(), TBAAAccessInfo()); AccessList.emplace_back(LV); } + } else if (const auto *MT = dyn_cast<ConstantMatrixType>(T)) { + // Matrices are represented as flat arrays in memory, but has a vector + // value type. So we use ConvertMatrixAddress to convert the address from + // array to vector, and extract elements similar to the vector case above. + // The order in which we iterate over the elements is sequentially in + // memory; whether the matrix is in row- or column-major order does not + // matter. + llvm::Type *LLVMT = ConvertTypeForMem(T); + CharUnits Align = getContext().getTypeAlignInChars(T); + Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, LLVMT, + Align, "matrix.gep"); + LValue Base = MakeAddrLValue(GEP, T); + Address MatAddr = MaybeConvertMatrixAddress(Base.getAddress(), *this); + for (unsigned I = 0, E = MT->getNumElementsFlattened(); I < E; I++) { + llvm::Constant *Idx = llvm::ConstantInt::get(IdxTy, I); + LValue LV = LValue::MakeMatrixElt(MatAddr, Idx, MT->getElementType(), + Base.getBaseInfo(), TBAAAccessInfo()); + AccessList.emplace_back(LV); + } } else { // a scalar/builtin type if (!IdxList.empty()) { llvm::Type *LLVMT = ConvertTypeForMem(T); diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl new file mode 100644 index 0000000000000..06af2ebe72473 --- /dev/null +++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl @@ -0,0 +1,70 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - %s | FileCheck %s --check-prefixes=CHECK,COL-CHECK +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - %s | FileCheck %s --check-prefixes=CHECK,ROW-CHECK + +// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z2fnu11matrix_typeILm2ELm2EfE( +// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x float], align 4 +// CHECK-NEXT: [[V:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [4 x float], align 4 +// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> [[M]], ptr [[M_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[M_ADDR]], align 4 +// CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4 +// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[HLSL_EWCAST_SRC]], i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[FLATCAST_TMP]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[MATRIXEXT]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 +// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[MATRIXEXT1]], i64 1 +// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[MATRIXEXT2]], i64 2 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT3:%.*]] = extractelement <4 x float> [[TMP8]], i32 3 +// CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP7]], float [[MATRIXEXT3]], i64 3 +// CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[V]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, ptr [[V]], align 16 +// CHECK-NEXT: ret <4 x float> [[TMP10]] +// +float4 fn(float2x2 M) { + float4 V = (float4)M; + return V; +} + +// CHECK-LABEL: define hidden noundef <3 x i32> @_Z3fn2u11matrix_typeILm3ELm1EiE( +// CHECK-SAME: <3 x i32> noundef [[M:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [3 x i32], align 4 +// CHECK-NEXT: [[V:%.*]] = alloca <3 x i32>, align 16 +// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [3 x i32], align 4 +// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <3 x i32>, align 16 +// CHECK-NEXT: store <3 x i32> [[M]], ptr [[M_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4 +// CHECK-NEXT: store <3 x i32> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4 +// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <3 x i32>, ptr [[HLSL_EWCAST_SRC]], i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[FLATCAST_TMP]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <3 x i32> [[TMP2]], i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i32> [[TMP1]], i32 [[MATRIXEXT]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <3 x i32> [[TMP4]], i32 1 +// CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x i32> [[TMP3]], i32 [[MATRIXEXT1]], i64 1 +// CHECK-NEXT: [[TMP6:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <3 x i32> [[TMP6]], i32 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i32> [[TMP5]], i32 [[MATRIXEXT2]], i64 2 +// CHECK-NEXT: store <3 x i32> [[TMP7]], ptr [[V]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = load <3 x i32>, ptr [[V]], align 16 +// CHECK-NEXT: ret <3 x i32> [[TMP8]] +// +int3 fn2(int3x1 M) { + int3 V = (int3)M; + return V; +} + +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// COL-CHECK: {{.*}} +// ROW-CHECK: {{.*}} >From 805aebe5ff6bd6b39f07c383d5531cc51759c25e Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Mon, 26 Jan 2026 10:17:01 -0800 Subject: [PATCH 2/4] Adjust index iteration for matrix memory layout --- clang/lib/CodeGen/CGExpr.cpp | 18 ++++++++++++++---- .../BasicFeatures/MatrixToVectorCast.hlsl | 12 +++++------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 5d595c2e3b1e5..9a6629e4b6f9f 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -7194,17 +7194,27 @@ void CodeGenFunction::FlattenAccessAndTypeLValue( // Matrices are represented as flat arrays in memory, but has a vector // value type. So we use ConvertMatrixAddress to convert the address from // array to vector, and extract elements similar to the vector case above. - // The order in which we iterate over the elements is sequentially in - // memory; whether the matrix is in row- or column-major order does not - // matter. + // The order in which we iterate over the elements must respect the + // matrix memory layout, computing the proper index for each (row, col). llvm::Type *LLVMT = ConvertTypeForMem(T); CharUnits Align = getContext().getTypeAlignInChars(T); Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, LLVMT, Align, "matrix.gep"); LValue Base = MakeAddrLValue(GEP, T); Address MatAddr = MaybeConvertMatrixAddress(Base.getAddress(), *this); + unsigned NumRows = MT->getNumRows(); + unsigned NumCols = MT->getNumColumns(); + bool IsMatrixRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() == + LangOptions::MatrixMemoryLayout::MatrixRowMajor; + llvm::MatrixBuilder MB(Builder); for (unsigned I = 0, E = MT->getNumElementsFlattened(); I < E; I++) { - llvm::Constant *Idx = llvm::ConstantInt::get(IdxTy, I); + // Compute (row, col) from linear index assuming row-major iteration. + unsigned Row = I / NumCols; + unsigned Col = I % NumCols; + llvm::Value *RowIdx = llvm::ConstantInt::get(IdxTy, Row); + llvm::Value *ColIdx = llvm::ConstantInt::get(IdxTy, Col); + llvm::Value *Idx = + MB.CreateIndex(RowIdx, ColIdx, NumRows, NumCols, IsMatrixRowMajor); LValue LV = LValue::MakeMatrixElt(MatAddr, Idx, MT->getElementType(), Base.getBaseInfo(), TBAAAccessInfo()); AccessList.emplace_back(LV); diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl index 06af2ebe72473..0485fb73197ad 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl @@ -1,4 +1,3 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - %s | FileCheck %s --check-prefixes=CHECK,COL-CHECK // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - %s | FileCheck %s --check-prefixes=CHECK,ROW-CHECK @@ -18,10 +17,12 @@ // CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 // CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[MATRIXEXT]], i64 0 // CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 -// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 +// COL-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 +// ROW-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 // CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[MATRIXEXT1]], i64 1 // CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 -// CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2 +// COL-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 1 +// ROW-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2 // CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[MATRIXEXT2]], i64 2 // CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 // CHECK-NEXT: [[MATRIXEXT3:%.*]] = extractelement <4 x float> [[TMP8]], i32 3 @@ -36,7 +37,7 @@ float4 fn(float2x2 M) { } // CHECK-LABEL: define hidden noundef <3 x i32> @_Z3fn2u11matrix_typeILm3ELm1EiE( -// CHECK-SAME: <3 x i32> noundef [[M:%.*]]) #[[ATTR0]] { +// CHECK-SAME: <3 x i32> noundef [[M:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[M_ADDR:%.*]] = alloca [3 x i32], align 4 // CHECK-NEXT: [[V:%.*]] = alloca <3 x i32>, align 16 @@ -65,6 +66,3 @@ int3 fn2(int3x1 M) { return V; } -//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -// COL-CHECK: {{.*}} -// ROW-CHECK: {{.*}} >From 98e55a9ea34b9387600ca6d5e650d81099f6dd9c Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Tue, 27 Jan 2026 10:48:18 -0800 Subject: [PATCH 3/4] Move tests to existing VectorElementwiseCast test file --- .../BasicFeatures/MatrixToVectorCast.hlsl | 68 ------------------- .../BasicFeatures/VectorElementwiseCast.hlsl | 61 ++++++++++++++++- 2 files changed, 60 insertions(+), 69 deletions(-) delete mode 100644 clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl deleted file mode 100644 index 0485fb73197ad..0000000000000 --- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixToVectorCast.hlsl +++ /dev/null @@ -1,68 +0,0 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - %s | FileCheck %s --check-prefixes=CHECK,COL-CHECK -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - %s | FileCheck %s --check-prefixes=CHECK,ROW-CHECK - -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z2fnu11matrix_typeILm2ELm2EfE( -// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x float], align 4 -// CHECK-NEXT: [[V:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [4 x float], align 4 -// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <4 x float>, align 16 -// CHECK-NEXT: store <4 x float> [[M]], ptr [[M_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[M_ADDR]], align 4 -// CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4 -// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[HLSL_EWCAST_SRC]], i32 0 -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[FLATCAST_TMP]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 -// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[MATRIXEXT]], i64 0 -// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 -// COL-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 -// ROW-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 -// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[MATRIXEXT1]], i64 1 -// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 -// COL-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 1 -// ROW-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2 -// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[MATRIXEXT2]], i64 2 -// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 -// CHECK-NEXT: [[MATRIXEXT3:%.*]] = extractelement <4 x float> [[TMP8]], i32 3 -// CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP7]], float [[MATRIXEXT3]], i64 3 -// CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[V]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, ptr [[V]], align 16 -// CHECK-NEXT: ret <4 x float> [[TMP10]] -// -float4 fn(float2x2 M) { - float4 V = (float4)M; - return V; -} - -// CHECK-LABEL: define hidden noundef <3 x i32> @_Z3fn2u11matrix_typeILm3ELm1EiE( -// CHECK-SAME: <3 x i32> noundef [[M:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [3 x i32], align 4 -// CHECK-NEXT: [[V:%.*]] = alloca <3 x i32>, align 16 -// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [3 x i32], align 4 -// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <3 x i32>, align 16 -// CHECK-NEXT: store <3 x i32> [[M]], ptr [[M_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4 -// CHECK-NEXT: store <3 x i32> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4 -// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <3 x i32>, ptr [[HLSL_EWCAST_SRC]], i32 0 -// CHECK-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[FLATCAST_TMP]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4 -// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <3 x i32> [[TMP2]], i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i32> [[TMP1]], i32 [[MATRIXEXT]], i64 0 -// CHECK-NEXT: [[TMP4:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4 -// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <3 x i32> [[TMP4]], i32 1 -// CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x i32> [[TMP3]], i32 [[MATRIXEXT1]], i64 1 -// CHECK-NEXT: [[TMP6:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4 -// CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <3 x i32> [[TMP6]], i32 2 -// CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i32> [[TMP5]], i32 [[MATRIXEXT2]], i64 2 -// CHECK-NEXT: store <3 x i32> [[TMP7]], ptr [[V]], align 16 -// CHECK-NEXT: [[TMP8:%.*]] = load <3 x i32>, ptr [[V]], align 16 -// CHECK-NEXT: ret <3 x i32> [[TMP8]] -// -int3 fn2(int3x1 M) { - int3 V = (int3)M; - return V; -} - diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl index 26aa41aaf4626..c11c8498ada45 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=column-major -o - %s | FileCheck %s --check-prefixes=CHECK,COL-CHECK +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -fmatrix-memory-layout=row-major -o - %s | FileCheck %s --check-prefixes=CHECK,ROW-CHECK // vector flat cast from array // CHECK-LABEL: define void {{.*}}call2 @@ -121,3 +122,61 @@ struct Derived : BFields { export void call6(Derived D) { int4 A = (int4)D; } + +// vector flat cast from matrix of same size (float) +// CHECK-LABEL: call7 +// CHECK: [[M_ADDR:%.*]] = alloca [4 x float], align 4 +// CHECK-NEXT: [[V:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [4 x float], align 4 +// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store <4 x float> %M, ptr [[M_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[M_ADDR]], align 4 +// CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4 +// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[HLSL_EWCAST_SRC]], i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[FLATCAST_TMP]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[MATRIXEXT]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 +// COL-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 +// ROW-CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 +// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[MATRIXEXT1]], i64 1 +// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 +// COL-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 1 +// ROW-CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[MATRIXEXT2]], i64 2 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT3:%.*]] = extractelement <4 x float> [[TMP8]], i32 3 +// CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP7]], float [[MATRIXEXT3]], i64 3 +// CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[V]], align 16 +// CHECK-NEXT: ret void +export void call7(float2x2 M) { + float4 V = (float4)M; +} + +// vector flat cast from matrix of same size (int) +// CHECK-LABEL: call8 +// CHECK: [[M_ADDR:%.*]] = alloca [3 x i32], align 4 +// CHECK-NEXT: [[V:%.*]] = alloca <3 x i32>, align 16 +// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [3 x i32], align 4 +// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <3 x i32>, align 16 +// CHECK-NEXT: store <3 x i32> %M, ptr [[M_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4 +// CHECK-NEXT: store <3 x i32> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 4 +// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <3 x i32>, ptr [[HLSL_EWCAST_SRC]], i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[FLATCAST_TMP]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <3 x i32> [[TMP2]], i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i32> [[TMP1]], i32 [[MATRIXEXT]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <3 x i32> [[TMP4]], i32 1 +// CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x i32> [[TMP3]], i32 [[MATRIXEXT1]], i64 1 +// CHECK-NEXT: [[TMP6:%.*]] = load <3 x i32>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT2:%.*]] = extractelement <3 x i32> [[TMP6]], i32 2 +// CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i32> [[TMP5]], i32 [[MATRIXEXT2]], i64 2 +// CHECK-NEXT: store <3 x i32> [[TMP7]], ptr [[V]], align 16 +// CHECK-NEXT: ret void +export void call8(int3x1 M) { + int3 V = (int3)M; +} + >From cd13943548d84e49290501a03165113fcfbbf6a8 Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Tue, 27 Jan 2026 10:53:00 -0800 Subject: [PATCH 4/4] Fixup code comments regarding matrix element indexing --- clang/lib/CodeGen/CGExpr.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 9a6629e4b6f9f..fcd5a30d6ac19 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -7194,8 +7194,8 @@ void CodeGenFunction::FlattenAccessAndTypeLValue( // Matrices are represented as flat arrays in memory, but has a vector // value type. So we use ConvertMatrixAddress to convert the address from // array to vector, and extract elements similar to the vector case above. - // The order in which we iterate over the elements must respect the - // matrix memory layout, computing the proper index for each (row, col). + // The matrix elements are iterated over in row-major order regardless of + // the memory layout of the matrix. llvm::Type *LLVMT = ConvertTypeForMem(T); CharUnits Align = getContext().getTypeAlignInChars(T); Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, LLVMT, @@ -7208,7 +7208,6 @@ void CodeGenFunction::FlattenAccessAndTypeLValue( LangOptions::MatrixMemoryLayout::MatrixRowMajor; llvm::MatrixBuilder MB(Builder); for (unsigned I = 0, E = MT->getNumElementsFlattened(); I < E; I++) { - // Compute (row, col) from linear index assuming row-major iteration. unsigned Row = I / NumCols; unsigned Col = I % NumCols; llvm::Value *RowIdx = llvm::ConstantInt::get(IdxTy, Row); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
