llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen Author: Farzon Lotfi (farzonl) <details> <summary>Changes</summary> fixes #<!-- -->184877 This change was threefold. 1. copy the padded cbuffer from memory to a local alloca 2. switch to using the new `getFlattenedIndex` helpers for index generation 3. convert row major to column major indicies in codegen depending on LangOptions --- Patch is 72.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/185471.diff 12 Files Affected: - (modified) clang/lib/AST/Expr.cpp (+12-12) - (modified) clang/lib/CodeGen/CGExpr.cpp (+28-3) - (added) clang/test/CodeGenHLSL/BasicFeatures/MatrixElementRowColFlags.hlsl (+37) - (modified) clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl (+12-12) - (modified) clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl (+12-12) - (modified) clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-load.hlsl (+8-8) - (modified) clang/test/CodeGenHLSL/matrix-member-one-based-swizzle-store.hlsl (+24-24) - (modified) clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-load.hlsl (+12-12) - (modified) clang/test/CodeGenHLSL/matrix-member-zero-based-accessor-scalar-store.hlsl (+12-12) - (modified) clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-load.hlsl (+8-8) - (modified) clang/test/CodeGenHLSL/matrix-member-zero-based-swizzle-store.hlsl (+24-24) - (added) clang/test/CodeGenHLSL/resources/MatrixElement_cbuffer.hlsl (+96) ``````````diff diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 9632d88fae4e4..185e887fb05c3 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -4478,8 +4478,8 @@ static MatrixAccessorFormat GetHLSLMatrixAccessorFormat(StringRef Comp) { } template <typename Fn> -static bool ForEachMatrixAccessorIndex(StringRef Comp, unsigned Rows, - unsigned Cols, Fn &&F) { +static bool ForEachMatrixAccessorIndex(StringRef Comp, + const ConstantMatrixType *MT, Fn &&F) { auto Format = GetHLSLMatrixAccessorFormat(Comp); for (unsigned I = 0, E = Comp.size(); I < E; I += Format.ChunkLen) { @@ -4491,8 +4491,13 @@ static bool ForEachMatrixAccessorIndex(StringRef Comp, unsigned Rows, Col = static_cast<unsigned>(Comp[I + ZeroIndexOffset + 2] - '0') - OneIndexOffset; - assert(Row < Rows && Col < Cols && "matrix swizzle index out of bounds"); - const unsigned Index = Row * Cols + Col; + assert(Row < MT->getNumRows() && Col < MT->getNumColumns() && + "matrix swizzle index out of bounds"); + // NOTE: AST layer has no access to LangOptions so we will default to row + // major b\c all other AST matrix representations are row major. + // However in codegen we need to convert to column major if the flag + // requires it. + const unsigned Index = MT->getFlattenedIndex(Row, Col, /*IsRowMajor*/ true); // Callback returns true to continue, false to stop early. if (!F(Index)) return false; @@ -4507,13 +4512,10 @@ static bool ForEachMatrixAccessorIndex(StringRef Comp, unsigned Rows, bool MatrixElementExpr::containsDuplicateElements() const { StringRef Comp = Accessor->getName(); const auto *MT = getBase()->getType()->castAs<ConstantMatrixType>(); - const unsigned Rows = MT->getNumRows(); - const unsigned Cols = MT->getNumColumns(); - const unsigned Max = Rows * Cols; - llvm::BitVector Seen(Max, /*t=*/false); + llvm::BitVector Seen(MT->getNumElementsFlattened(), /*t=*/false); bool HasDup = false; - ForEachMatrixAccessorIndex(Comp, Rows, Cols, [&](unsigned Index) -> bool { + ForEachMatrixAccessorIndex(Comp, MT, [&](unsigned Index) -> bool { if (Seen[Index]) { HasDup = true; return false; // exit early @@ -4562,9 +4564,7 @@ void MatrixElementExpr::getEncodedElementAccess( SmallVectorImpl<uint32_t> &Elts) const { StringRef Comp = Accessor->getName(); const auto *MT = getBase()->getType()->castAs<ConstantMatrixType>(); - const unsigned Rows = MT->getNumRows(); - const unsigned Cols = MT->getNumColumns(); - ForEachMatrixAccessorIndex(Comp, Rows, Cols, [&](unsigned Index) -> bool { + ForEachMatrixAccessorIndex(Comp, MT, [&](unsigned Index) -> bool { Elts.push_back(Index); return true; }); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index eebb36276e0eb..fe5de9f8df09d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2329,15 +2329,39 @@ LValue CodeGenFunction::EmitMatrixElementExpr(const MatrixElementExpr *E) { E->getType().withCVRQualifiers(Base.getQuals().getCVRQualifiers()); // Encode the element access list into a vector of unsigned indices. + // getEncodedElementAccess returns row-major linearized indices. SmallVector<uint32_t, 4> Indices; E->getEncodedElementAccess(Indices); + // getEncodedElementAccess returns row-major linearized indices + // If the matrix memory layout is column-major, convert indices + // to column-major indices. + bool IsColMajor = getLangOpts().getDefaultMatrixMemoryLayout() == + LangOptions::MatrixMemoryLayout::MatrixColMajor; + if (IsColMajor) { + const auto *MT = E->getBase()->getType()->castAs<ConstantMatrixType>(); + unsigned NumCols = MT->getNumColumns(); + for (uint32_t &Idx : Indices) { + // Decompose row-major index: Row = Idx / NumCols, Col = Idx % NumCols + unsigned Row = Idx / NumCols; + unsigned Col = Idx % NumCols; + // Re-linearize as column-major + Idx = MT->getColumnMajorFlattenedIndex(Row, Col); + } + } + if (Base.isSimple()) { + RawAddress MatAddr = Base.getAddress(); + if (getLangOpts().HLSL && + E->getBase()->getType().getAddressSpace() == LangAS::hlsl_constant) + MatAddr = CGM.getHLSLRuntime().createBufferMatrixTempAddress( + Base, E->getExprLoc(), *this); + llvm::Constant *CV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); - return LValue::MakeExtVectorElt( - MaybeConvertMatrixAddress(Base.getAddress(), *this), CV, ResultType, - Base.getBaseInfo(), TBAAAccessInfo()); + return LValue::MakeExtVectorElt(MaybeConvertMatrixAddress(MatAddr, *this), + CV, ResultType, Base.getBaseInfo(), + TBAAAccessInfo()); } assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!"); @@ -2347,6 +2371,7 @@ LValue CodeGenFunction::EmitMatrixElementExpr(const MatrixElementExpr *E) { for (unsigned Index : Indices) CElts.push_back(BaseElts->getAggregateElement(Index)); llvm::Constant *CV = llvm::ConstantVector::get(CElts); + return LValue::MakeExtVectorElt( MaybeConvertMatrixAddress(Base.getExtVectorAddress(), *this), CV, ResultType, Base.getBaseInfo(), TBAAAccessInfo()); diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementRowColFlags.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementRowColFlags.hlsl new file mode 100644 index 0000000000000..fe9e69c6a6898 --- /dev/null +++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementRowColFlags.hlsl @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes \ +// RUN: -emit-llvm -finclude-default-header -fmatrix-memory-layout=column-major \ +// RUN: -o - %s | FileCheck %s --check-prefixes=CHECK,COL +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes \ +// RUN: -emit-llvm -finclude-default-header -fmatrix-memory-layout=row-major \ +// RUN: -o - %s | FileCheck %s --check-prefixes=CHECK,ROW + +// For a float3x2 matrix (3 rows, 2 columns): +// Column-major flat vector: [_11, _21, _31, _12, _22, _32] +// idx: 0 1 2 3 4 5 +// Row-major flat vector: [_11, _12, _21, _22, _31, _32] +// idx: 0 1 2 3 4 5 + + +// CHECK-LABEL: define {{.*}} @_Z16getScalarElementu11matrix_typeILm3ELm2EfE +// CHECK: load <6 x float>, ptr +// COL-NEXT: extractelement <6 x float> {{.*}}, i32 4 +// ROW-NEXT: extractelement <6 x float> {{.*}}, i32 3 +export float getScalarElement(float3x2 M) { + return M._22; +} + +// CHECK-LABEL: define {{.*}} @_Z18getSwizzleElementsu11matrix_typeILm3ELm2EfE +// CHECK: load <6 x float>, ptr +// COL-NEXT: shufflevector <6 x float> {{.*}}, <6 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 4> +// ROW-NEXT: shufflevector <6 x float> {{.*}}, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +export float4 getSwizzleElements(float3x2 M) { + return M._11_12_21_22; +} + +// CHECK-LABEL: define {{.*}} @_Z22getZeroBasedSwizzleEltu11matrix_typeILm3ELm2EfE +// CHECK: load <6 x float>, ptr +// COL-NEXT: shufflevector <6 x float> {{.*}}, <6 x float> poison, <2 x i32> <i32 1, i32 3> +// ROW-NEXT: shufflevector <6 x float> {{.*}}, <6 x float> poison, <2 x i32> <i32 2, i32 1> +export float2 getZeroBasedSwizzleElt(float3x2 M) { + return M._m10_m01; +} \ No newline at end of file diff --git a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl index def8aa5440568..6d8a3ce6ecbb6 100644 --- a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl +++ b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-load.hlsl @@ -23,7 +23,7 @@ int Return11(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 1 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 4 // CHECK-NEXT: ret i32 [[TMP1]] // int Return12(int4x4 A) { @@ -36,7 +36,7 @@ int Return12(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 2 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 8 // CHECK-NEXT: ret i32 [[TMP1]] // int Return13(int4x4 A) { @@ -49,7 +49,7 @@ int Return13(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 3 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 12 // CHECK-NEXT: ret i32 [[TMP1]] // int Return14(int4x4 A) { @@ -62,7 +62,7 @@ int Return14(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 4 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 1 // CHECK-NEXT: ret i32 [[TMP1]] // int Return21(int4x4 A) { @@ -88,7 +88,7 @@ int Return22(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 6 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 9 // CHECK-NEXT: ret i32 [[TMP1]] // int Return23(int4x4 A) { @@ -101,7 +101,7 @@ int Return23(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 7 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 13 // CHECK-NEXT: ret i32 [[TMP1]] // int Return24(int4x4 A) { @@ -114,7 +114,7 @@ int Return24(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 8 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 2 // CHECK-NEXT: ret i32 [[TMP1]] // int Return31(int4x4 A) { @@ -127,7 +127,7 @@ int Return31(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 9 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 6 // CHECK-NEXT: ret i32 [[TMP1]] // int Return32(int4x4 A) { @@ -153,7 +153,7 @@ int Return33(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 11 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 14 // CHECK-NEXT: ret i32 [[TMP1]] // int Return34(int4x4 A) { @@ -166,7 +166,7 @@ int Return34(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 12 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 3 // CHECK-NEXT: ret i32 [[TMP1]] // int Return41(int4x4 A) { @@ -179,7 +179,7 @@ int Return41(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 13 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 7 // CHECK-NEXT: ret i32 [[TMP1]] // int Return42(int4x4 A) { @@ -192,7 +192,7 @@ int Return42(int4x4 A) { // CHECK-NEXT: [[A_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4 // CHECK-NEXT: store <16 x i32> [[A]], ptr [[A_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 14 +// CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[TMP0]], i32 11 // CHECK-NEXT: ret i32 [[TMP1]] // int Return43(int4x4 A) { diff --git a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl index fb3a46170ebe0..9ea292ecea007 100644 --- a/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl +++ b/clang/test/CodeGenHLSL/matrix-member-one-based-accessor-scalar-store.hlsl @@ -29,7 +29,7 @@ void StoreScalarAtMat11(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]] -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 1 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 4 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // @@ -46,7 +46,7 @@ void StoreScalarAtMat12(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]] -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 2 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 8 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // @@ -63,7 +63,7 @@ void StoreScalarAtMat13(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]] -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 12 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // @@ -80,7 +80,7 @@ void StoreScalarAtMat14(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]] -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 4 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 1 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // @@ -114,7 +114,7 @@ void StoreScalarAtMat22(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]] -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 6 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 9 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // @@ -131,7 +131,7 @@ void StoreScalarAtMat23(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]] -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 7 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 13 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // @@ -148,7 +148,7 @@ void StoreScalarAtMat24(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]] -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 2 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // @@ -165,7 +165,7 @@ void StoreScalarAtMat31(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]] -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 9 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 6 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // @@ -199,7 +199,7 @@ void StoreScalarAtMat33(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]] -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 11 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 14 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // @@ -216,7 +216,7 @@ void StoreScalarAtMat34(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]] -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 12 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 3 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // @@ -233,7 +233,7 @@ void StoreScalarAtMat41(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META4]], !align [[META5]] -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 13 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x i32>, ptr [[TMP1]], i32 0, i32 7 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // @@ -250,7 +250,7 @@ void StoreScalarAtMat42(out int4x4 A, int I) { // CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/185471 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
