https://github.com/farzonl updated 
https://github.com/llvm/llvm-project/pull/198887

>From 656f637d6da026c6fc0491ef0bcfc2b3c644a5fb Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Mon, 4 May 2026 12:10:08 -0400
Subject: [PATCH 01/10] [Matrix][HLSL] Add codgen support for Matrix Layout
 keywords

fixes #192262

- Wrap Matrix Type in HLSLMatrixLayoutArrr
- Add Helper to know which Matrix Layout to apply in codegen or check for in 
Sema
---
 clang/include/clang/AST/MatrixUtils.h         |  71 +++++++
 clang/include/clang/Basic/Attr.td             |  16 ++
 clang/lib/AST/TypePrinter.cpp                 |  20 ++
 clang/lib/CodeGen/CGExpr.cpp                  |  17 +-
 clang/lib/CodeGen/CGExprConstant.cpp          |   4 +-
 clang/lib/CodeGen/CGExprScalar.cpp            |  22 ++-
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          |   9 +-
 clang/lib/CodeGen/CodeGenTypes.cpp            |   5 +-
 clang/lib/Sema/SemaHLSL.cpp                   |  79 ++++++++
 clang/test/AST/HLSL/matrix_layout_attr.hlsl   |  11 ++
 .../matrix-layout-attr-overrides-default.hlsl | 175 ++++++++++++++++++
 11 files changed, 400 insertions(+), 29 deletions(-)
 create mode 100644 clang/include/clang/AST/MatrixUtils.h
 create mode 100644 
clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl

diff --git a/clang/include/clang/AST/MatrixUtils.h 
b/clang/include/clang/AST/MatrixUtils.h
new file mode 100644
index 0000000000000..d1b351bf4b07b
--- /dev/null
+++ b/clang/include/clang/AST/MatrixUtils.h
@@ -0,0 +1,71 @@
+//===- MatrixUtils.h - Matrix AST utilities ---------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines AST-level helper utilities for matrix types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_MATRIXUTILS_H
+#define LLVM_CLANG_AST_MATRIXUTILS_H
+
+#include "clang/AST/Type.h"
+#include "clang/Basic/AttrKinds.h"
+#include "clang/Basic/LangOptions.h"
+
+namespace clang {
+/// Returns true if matrices of \p T should be laid out in row-major order.
+///
+/// In HLSL mode, an `HLSLRowMajor` / `HLSLColumnMajor` AttributedType anywhere
+/// in the sugar chain of \p T (imprinted by Sema when a source decl carries
+/// `[[hlsl::row_major]]` / `[[hlsl::column_major]]`) takes precedence over the
+/// `-fmatrix-memory-layout=` default carried in \p LangOpts. Otherwise the
+/// LangOptions default is used.
+inline bool isMatrixRowMajor(const LangOptions &LangOpts, QualType T) {
+  if (LangOpts.HLSL && !T.isNull()) {
+    QualType Cur = T;
+    while (const auto *AT = Cur->getAs<AttributedType>()) {
+      switch (AT->getAttrKind()) {
+      case attr::HLSLRowMajor:
+        return true;
+      case attr::HLSLColumnMajor:
+        return false;
+      default:
+        break;
+      }
+      Cur = AT->getModifiedType();
+    }
+  }
+  return LangOpts.getDefaultMatrixMemoryLayout() ==
+         LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+}
+
+/// Returns true if matrices of \p T should be laid out in column-major order.
+/// Mirrors `isMatrixRowMajor`; per-decl HLSL attributes win over the
+/// `-fmatrix-memory-layout=` default.
+inline bool isMatrixColumnMajor(const LangOptions &LangOpts, QualType T) {
+  if (LangOpts.HLSL && !T.isNull()) {
+    QualType Cur = T;
+    while (const auto *AT = Cur->getAs<AttributedType>()) {
+      switch (AT->getAttrKind()) {
+      case attr::HLSLColumnMajor:
+        return true;
+      case attr::HLSLRowMajor:
+        return false;
+      default:
+        break;
+      }
+      Cur = AT->getModifiedType();
+    }
+  }
+  return LangOpts.getDefaultMatrixMemoryLayout() ==
+         LangOptions::MatrixMemoryLayout::MatrixColMajor;
+}
+} // namespace clang
+
+#endif // LLVM_CLANG_AST_MATRIXUTILS_H
diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 32f72e8da960e..accb3ca3c4ce7 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5299,6 +5299,22 @@ def HLSLMatrixLayout : InheritableAttr {
   let Documentation = [HLSLMatrixLayoutDocs];
 }
 
+// Internal type-attribute markers attached to matrix QualTypes by Sema when
+// a decl carries `[[hlsl::row_major]]` / `[[hlsl::column_major]]`. They let
+// CodeGen / const-eval recover the layout from any matrix-typed expression
+// without re-walking back to the source decl.
+def HLSLRowMajor : TypeAttr {
+  let Spellings = [];
+  let LangOpts = [HLSL];
+  let Documentation = [InternalOnly];
+}
+
+def HLSLColumnMajor : TypeAttr {
+  let Spellings = [];
+  let LangOpts = [HLSL];
+  let Documentation = [InternalOnly];
+}
+
 def RandomizeLayout : InheritableAttr {
   let Spellings = [GCC<"randomize_layout">];
   let Subjects = SubjectList<[Record]>;
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 80f5b90ba35c4..7a5aac8bd5cd4 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -295,6 +295,13 @@ bool TypePrinter::canPrefixQualifiers(const Type *T,
       // We still want to print the address_space before the type if it is an
       // address_space attribute.
       const auto *AttrTy = cast<AttributedType>(UnderlyingType);
+      // HLSLRowMajor / HLSLColumnMajor are internal markers with no spelling;
+      // the printer skips them entirely, so look through them when deciding
+      // qualifier placement.
+      if (AttrTy->getAttrKind() == attr::HLSLRowMajor ||
+          AttrTy->getAttrKind() == attr::HLSLColumnMajor)
+        return canPrefixQualifiers(AttrTy->getModifiedType().getTypePtr(),
+                                   NeedARCStrongQualifier);
       CanPrefixQualifiers = AttrTy->getAttrKind() == attr::AddressSpace;
       break;
     }
@@ -1915,6 +1922,15 @@ void TypePrinter::printAttributedAfter(const 
AttributedType *T,
   if (T->getAttrKind() == attr::ObjCInertUnsafeUnretained)
     return;
 
+  // HLSLRowMajor / HLSLColumnMajor are internal markers attached to matrix
+  // types by Sema to record an explicit `[[hlsl::row_major]]` /
+  // `[[hlsl::column_major]]` qualifier from the source decl. They have no
+  // user-visible spelling; the user-facing form is the original keyword on
+  // the decl, not on the type.
+  if (T->getAttrKind() == attr::HLSLRowMajor ||
+      T->getAttrKind() == attr::HLSLColumnMajor)
+    return;
+
   // Don't print ns_returns_retained unless it had an effect.
   if (T->getAttrKind() == attr::NSReturnsRetained &&
       !T->getEquivalentType()->castAs<FunctionType>()
@@ -1992,6 +2008,10 @@ void TypePrinter::printAttributedAfter(const 
AttributedType *T,
   case attr::HLSLResourceDimension:
     llvm_unreachable("HLSL resource type attributes handled separately");
 
+  case attr::HLSLRowMajor:
+  case attr::HLSLColumnMajor:
+    llvm_unreachable("HLSL matrix layout type attributes handled separately");
+
   case attr::OpenCLPrivateAddressSpace:
   case attr::OpenCLGlobalAddressSpace:
   case attr::OpenCLGlobalDeviceAddressSpace:
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 325902f2127bc..52853719a4d00 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -31,6 +31,7 @@
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/InferAlloc.h"
+#include "clang/AST/MatrixUtils.h"
 #include "clang/AST/NSAPI.h"
 #include "clang/AST/ParentMapContext.h"
 #include "clang/AST/StmtVisitor.h"
@@ -2360,8 +2361,7 @@ LValue CodeGenFunction::EmitMatrixElementExpr(const 
MatrixElementExpr *E) {
   // getEncodedElementAccess returns row-major linearized indices
   // If the matrix memory layout is column-major, convert indices
   // to column-major indices.
-  bool IsColMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
-                    LangOptions::MatrixMemoryLayout::MatrixColMajor;
+  bool IsColMajor = isMatrixColumnMajor(getLangOpts(), 
E->getBase()->getType());
   if (IsColMajor) {
     const auto *MT = E->getBase()->getType()->castAs<ConstantMatrixType>();
     unsigned NumCols = MT->getNumColumns();
@@ -2617,8 +2617,7 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, 
SourceLocation Loc) {
         ColIdx = ColConstsIndices->getAggregateElement(Col);
       else
         ColIdx = llvm::ConstantInt::get(Row->getType(), Col);
-      bool IsMatrixRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
-                              LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+      bool IsMatrixRowMajor = isMatrixRowMajor(getLangOpts(), MatTy);
       llvm::Value *EltIndex =
           MB.CreateIndex(Row, ColIdx, NumRows, NumCols, IsMatrixRowMajor);
       llvm::Value *Elt = Builder.CreateExtractElement(MatrixVec, EltIndex);
@@ -2936,8 +2935,7 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, 
LValue Dst,
           ColIdx = ColConstsIndices->getAggregateElement(Col);
         else
           ColIdx = llvm::ConstantInt::get(Row->getType(), Col);
-        bool IsMatrixRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
-                                
LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+        bool IsMatrixRowMajor = isMatrixRowMajor(getLangOpts(), Dst.getType());
         llvm::Value *EltIndex =
             MB.CreateIndex(Row, ColIdx, NumRows, NumCols, IsMatrixRowMajor);
         llvm::Value *Lane = llvm::ConstantInt::get(Builder.getInt32Ty(), Col);
@@ -5229,8 +5227,8 @@ LValue CodeGenFunction::EmitMatrixSubscriptExpr(const 
MatrixSubscriptExpr *E) {
   const auto *MatrixTy = E->getBase()->getType()->castAs<ConstantMatrixType>();
   unsigned NumCols = MatrixTy->getNumColumns();
   unsigned NumRows = MatrixTy->getNumRows();
-  bool IsMatrixRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
-                          LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+  bool IsMatrixRowMajor =
+      isMatrixRowMajor(getLangOpts(), E->getBase()->getType());
   llvm::Value *FinalIdx =
       MB.CreateIndex(RowIdx, ColIdx, NumRows, NumCols, IsMatrixRowMajor);
 
@@ -7453,8 +7451,7 @@ void CodeGenFunction::FlattenAccessAndTypeLValue(
       Address MatAddr = MaybeConvertMatrixAddress(Base.getAddress(), *this);
       unsigned NumRows = MT->getNumRows();
       unsigned NumCols = MT->getNumColumns();
-      bool IsMatrixRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
-                              LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+      bool IsMatrixRowMajor = isMatrixRowMajor(getLangOpts(), T);
       llvm::MatrixBuilder MB(Builder);
       for (unsigned Row = 0; Row < MT->getNumRows(); Row++) {
         for (unsigned Col = 0; Col < MT->getNumColumns(); Col++) {
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp 
b/clang/lib/CodeGen/CGExprConstant.cpp
index 17a10dd40eba2..ffcd3fef9cd52 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -21,6 +21,7 @@
 #include "clang/AST/APValue.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
+#include "clang/AST/MatrixUtils.h"
 #include "clang/AST/NSAPI.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtVisitor.h"
@@ -2645,8 +2646,7 @@ ConstantEmitter::tryEmitPrivate(const APValue &Value, 
QualType DestType,
     unsigned NumElts = NumRows * NumCols;
     SmallVector<llvm::Constant *, 16> Inits(NumElts);
 
-    bool IsRowMajor = CGM.getLangOpts().getDefaultMatrixMemoryLayout() ==
-                      LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+    bool IsRowMajor = isMatrixRowMajor(CGM.getLangOpts(), DestType);
 
     for (unsigned Row = 0; Row != NumRows; ++Row) {
       for (unsigned Col = 0; Col != NumCols; ++Col) {
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index c8a8ec7b6d928..ddd28cfe5372c 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -26,6 +26,7 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/MatrixUtils.h"
 #include "clang/AST/ParentMapContext.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtVisitor.h"
@@ -2247,8 +2248,8 @@ Value *ScalarExprEmitter::VisitMatrixSingleSubscriptExpr(
 
   for (unsigned Col = 0; Col != NumColumns; ++Col) {
     Value *ColVal = llvm::ConstantInt::get(RowIdx->getType(), Col);
-    bool IsMatrixRowMajor = CGF.getLangOpts().getDefaultMatrixMemoryLayout() ==
-                            LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+    bool IsMatrixRowMajor =
+        isMatrixRowMajor(CGF.getLangOpts(), E->getBase()->getType());
     Value *EltIdx = MB.CreateIndex(RowIdx, ColVal, NumRows, NumColumns,
                                    IsMatrixRowMajor, "matrix_row_idx");
     Value *Elt =
@@ -2274,8 +2275,8 @@ Value 
*ScalarExprEmitter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
   Value *Idx;
   unsigned NumCols = MatrixTy->getNumColumns();
   unsigned NumRows = MatrixTy->getNumRows();
-  bool IsMatrixRowMajor = CGF.getLangOpts().getDefaultMatrixMemoryLayout() ==
-                          LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+  bool IsMatrixRowMajor =
+      isMatrixRowMajor(CGF.getLangOpts(), E->getBase()->getType());
   Idx = MB.CreateIndex(RowIdx, ColumnIdx, NumRows, NumCols, IsMatrixRowMajor);
 
   if (CGF.CGM.getCodeGenOpts().OptimizationLevel > 0)
@@ -2360,8 +2361,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr 
*E) {
   // column-major positions rather than inserting sequentially and shuffling.
   const ConstantMatrixType *ColMajorMT = nullptr;
   if (const auto *MT = E->getType()->getAs<ConstantMatrixType>();
-      MT && CGF.getLangOpts().getDefaultMatrixMemoryLayout() ==
-                LangOptions::MatrixMemoryLayout::MatrixColMajor)
+      MT && isMatrixColumnMajor(CGF.getLangOpts(), E->getType()))
     ColMajorMT = MT;
 
   // Loop over initializers collecting the Value for each, and remembering
@@ -2600,8 +2600,7 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction 
&CGF, LValue SrcVal,
            "Flattened type on RHS must have the same number or more elements "
            "than vector on LHS.");
 
-    bool IsRowMajor = CGF.getLangOpts().getDefaultMatrixMemoryLayout() ==
-                      LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+    bool IsRowMajor = isMatrixRowMajor(CGF.getLangOpts(), DestTy);
 
     llvm::Value *V = CGF.Builder.CreateLoad(
         CGF.CreateIRTempWithoutCast(DestTy, "flatcast.tmp"));
@@ -3180,8 +3179,11 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
       assert(SrcMatTy && "Source type must be a matrix type.");
       assert(NumRows <= SrcMatTy->getNumRows());
       assert(NumCols <= SrcMatTy->getNumColumns());
-      bool IsRowMajor = CGF.getLangOpts().getDefaultMatrixMemoryLayout() ==
-                        LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+
+      // isMatrixRowMajor needs the full sugared QualType to find matrix layout
+      // attrs. So use Use E->getType() (the source QualType) rather than
+      // SrcMatTy b\c getAs<ConstantMatrixType>() strips the sugar.
+      bool IsRowMajor = isMatrixRowMajor(CGF.getLangOpts(), E->getType());
       for (unsigned R = 0; R < NumRows; R++)
         for (unsigned C = 0; C < NumCols; C++)
           Mask[MatTy->getFlattenedIndex(R, C, IsRowMajor)] =
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp 
b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index bf599e2d77aa8..ace00a0e4692e 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -13,6 +13,7 @@
 #include "CGBuiltin.h"
 #include "CGHLSLRuntime.h"
 #include "CodeGenFunction.h"
+#include "clang/AST/MatrixUtils.h"
 #include "llvm/IR/MatrixBuilder.h"
 
 using namespace clang;
@@ -1206,8 +1207,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     // The matrix multiply intrinsic only operates on column-major order
     // matrices. Therefore matrix memory layout transforms must be inserted
     // before and after matrix multiply intrinsics.
-    bool IsRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
-                      LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+    // Use whichever operand is a matrix to discover its declared layout.
+    QualType MatTy = IsMat0 ? QTy0 : QTy1;
+    bool IsRowMajor = isMatrixRowMajor(getLangOpts(), MatTy);
 
     llvm::MatrixBuilder MB(Builder);
     if (IsVec0 && IsMat1) {
@@ -1259,8 +1261,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     // For row-major, a row-major RxC matrix is equivalent to a column-major
     // CxR matrix, so transposing with swapped dimensions produces the correct
     // row-major CxR result directly.
-    bool IsRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
-                      LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+    bool IsRowMajor = isMatrixRowMajor(getLangOpts(), E->getArg(0)->getType());
     if (IsRowMajor)
       return MB.CreateMatrixTranspose(Op0, Cols, Rows);
     return MB.CreateMatrixTranspose(Op0, Rows, Cols);
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp 
b/clang/lib/CodeGen/CodeGenTypes.cpp
index e76b1e8608d33..b28a0eb82f302 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -22,6 +22,7 @@
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/MatrixUtils.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "llvm/IR/DataLayout.h"
@@ -111,9 +112,7 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
 
       unsigned NumRows = MT->getNumRows();
       unsigned NumCols = MT->getNumColumns();
-      bool IsRowMajor =
-          CGM.getContext().getLangOpts().getDefaultMatrixMemoryLayout() ==
-          LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+      bool IsRowMajor = isMatrixRowMajor(Context.getLangOpts(), T);
       unsigned VecLen = IsRowMajor ? NumCols : NumRows;
       unsigned ArrayLen = IsRowMajor ? NumRows : NumCols;
       llvm::Type *VecTy = llvm::FixedVectorType::get(IRElemTy, VecLen);
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 04148fcd008d5..cc613d48add42 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2704,6 +2704,35 @@ static bool isMatrixOrArrayOfMatrix(const ASTContext 
&Ctx, QualType QT) {
   return Ty->isDependentType() || Ty->isConstantMatrixType();
 }
 
+/// Embeds the row/column-major layout ( \p AttrK ) directly into the
+/// matrix QualType of \p T, preserving any surrounding type sugar.
+static QualType wrapMatrixWithLayoutAttr(ASTContext &C, QualType T,
+                                         attr::Kind AttrK) {
+  if (T.isNull() || T->isDependentType())
+    return T;
+
+  if (const auto *AT = dyn_cast<ArrayType>(T.getTypePtr())) {
+    QualType Inner = wrapMatrixWithLayoutAttr(C, AT->getElementType(), AttrK);
+    if (Inner == AT->getElementType())
+      return T;
+
+    // fallthrough Inner != AT->getElementType()
+    if (const auto *CAT = dyn_cast<ConstantArrayType>(T.getTypePtr()))
+      return C.getConstantArrayType(Inner, CAT->getSize(), CAT->getSizeExpr(),
+                                    CAT->getSizeModifier(),
+                                    CAT->getIndexTypeCVRQualifiers());
+    // Note The IncompleteArrayType case would handle something like row_major
+    // float2x3 arr[] but HLSL doesn't support this syntax. so we can't test 
it.
+    // consider removing.
+    if (const auto *IAT = dyn_cast<IncompleteArrayType>(T.getTypePtr()))
+      return C.getIncompleteArrayType(Inner, IAT->getSizeModifier(),
+                                      IAT->getIndexTypeCVRQualifiers());
+  }
+  if (T->isConstantMatrixType())
+    return C.getAttributedType(AttrK, T, T);
+  return T;
+}
+
 static bool diagnoseMatrixLayoutOnNonMatrix(Sema &SemaRef, Decl *D,
                                             SourceLocation Loc,
                                             const IdentifierInfo *AttrName) {
@@ -2731,6 +2760,53 @@ static bool diagnoseMatrixLayoutOnNonMatrix(Sema 
&SemaRef, Decl *D,
   return true;
 }
 
+/// Embeds layout attributes into the matrix type so no decl lookups are
+/// needed.
+static void applyHLSLMatrixLayoutTypeAttr(ASTContext &Ctx, Decl *D,
+                                          const ParsedAttr &AL) {
+  attr::Kind AttrK =
+      AL.getSemanticSpelling() == HLSLMatrixLayoutAttr::Keyword_row_major
+          ? attr::HLSLRowMajor
+          : attr::HLSLColumnMajor;
+
+  auto *TD = dyn_cast<TypedefNameDecl>(D);
+  auto *VD = dyn_cast<ValueDecl>(D);
+
+  if (!TD && !VD)
+    return;
+
+  if (auto *FD = dyn_cast<FunctionDecl>(D)) {
+    const auto *FPT = FD->getType()->getAs<FunctionProtoType>();
+    if (!FPT)
+      return;
+
+    QualType NewRet =
+        wrapMatrixWithLayoutAttr(Ctx, FPT->getReturnType(), AttrK);
+    if (NewRet == FPT->getReturnType())
+      return;
+
+    FD->setType(Ctx.getFunctionType(NewRet, FPT->getParamTypes(),
+                                    FPT->getExtProtoInfo()));
+    return;
+  }
+
+  if (VD) {
+    QualType OldT = VD->getType();
+    QualType NewT = wrapMatrixWithLayoutAttr(Ctx, OldT, AttrK);
+    if (NewT != OldT)
+      VD->setType(NewT);
+    return;
+  }
+
+  if (TD) {
+    QualType OldT = TD->getUnderlyingType();
+    QualType NewT = wrapMatrixWithLayoutAttr(Ctx, OldT, AttrK);
+    if (NewT != OldT)
+      TD->setModedTypeSourceInfo(TD->getTypeSourceInfo(), NewT);
+    return;
+  }
+}
+
 void SemaHLSL::handleMatrixLayoutAttr(Decl *D, const ParsedAttr &AL) {
   // row_major and column_major are only valid on matrix types.
   if (diagnoseMatrixLayoutOnNonMatrix(SemaRef, D, AL.getLoc(),
@@ -2752,6 +2828,9 @@ void SemaHLSL::handleMatrixLayoutAttr(Decl *D, const 
ParsedAttr &AL) {
   }
 
   D->addAttr(::new (getASTContext()) HLSLMatrixLayoutAttr(getASTContext(), 
AL));
+
+  ASTContext &Ctx = getASTContext();
+  applyHLSLMatrixLayoutTypeAttr(Ctx, D, AL);
 }
 
 bool SemaHLSL::diagnoseInstantiatedMatrixLayoutAttr(
diff --git a/clang/test/AST/HLSL/matrix_layout_attr.hlsl 
b/clang/test/AST/HLSL/matrix_layout_attr.hlsl
index f1c24a3b5a954..98bcb9976c58c 100644
--- a/clang/test/AST/HLSL/matrix_layout_attr.hlsl
+++ b/clang/test/AST/HLSL/matrix_layout_attr.hlsl
@@ -19,6 +19,17 @@ struct S {
   column_major float3x3 m2;
 };
 
+
+// Check to make sure the internal type-sugar marker is invisible in the
+//printed type (it has no spelling), but the `AttributedType` node and
+// the `HLSLMatrixLayoutAttr` child still appear in the AST dump.
+
 // CHECK: TypedefDecl {{.*}} RM44 'float4x4'
+// CHECK: AttributedType {{.*}} 'float4x4' sugar
 // CHECK: HLSLMatrixLayoutAttr {{.*}} row_major
 typedef row_major float4x4 RM44;
+
+// CHECK-LABEL: TypedefDecl {{.*}} CM44 'float4x4'
+// CHECK:       AttributedType {{.*}} 'float4x4' sugar
+// CHECK:       HLSLMatrixLayoutAttr {{.*}} column_major
+typedef column_major float4x4 CM44;
diff --git a/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl 
b/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl
new file mode 100644
index 0000000000000..8320807e1cd89
--- /dev/null
+++ b/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl
@@ -0,0 +1,175 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -fmatrix-memory-layout=column-major -o - | FileCheck %s
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -fmatrix-memory-layout=row-major -o - | FileCheck %s
+
+// Verifies that a per-decl `[[hlsl::row_major]]` / `[[hlsl::column_major]]`
+// (spelled `row_major` / `column_major` in HLSL) overrides the
+// `-fmatrix-memory-layout=` default at every CodeGen lowering site:
+//
+//   * `MatrixSubscriptExpr` index computation
+//   * `MatrixSingleSubscriptExpr` row extraction
+//   * `__builtin_hlsl_mul` matrix-multiply transpose insertion
+//   * `__builtin_hlsl_transpose` row/col dimension swap
+//   * `CK_HLSLMatrixTruncation` shuffle mask
+//
+// The decl-level attribute should win regardless of the TU default.
+
+// 
-----------------------------------------------------------------------------
+// MatrixSubscriptExpr indexing: row-major attr -> Row*NumCols + Col
+// 
-----------------------------------------------------------------------------
+export float subscript_rm(int row, int col, row_major float2x3 m) {
+  return m[row][col];
+}
+// CHECK-LABEL: define {{.*}} float 
@_Z12subscript_rmiiu11matrix_typeILm2ELm3EfE
+// CHECK: [[ROW:%.*]] = load i32, ptr %row.addr
+// CHECK: [[COL:%.*]] = load i32, ptr %col.addr
+// CHECK: [[OFFSET:%.*]] = mul i32 [[ROW]], 3
+// CHECK: [[IDX:%.*]] = add i32 [[OFFSET]], [[COL]]
+// CHECK: extractelement <6 x float> %{{.*}}, i32 [[IDX]]
+
+// 
-----------------------------------------------------------------------------
+// MatrixSubscriptExpr indexing: column-major attr -> Col*NumRows + Row
+// 
-----------------------------------------------------------------------------
+export float subscript_cm(int row, int col, column_major float2x3 m) {
+  return m[row][col];
+}
+// CHECK-LABEL: define {{.*}} float 
@_Z12subscript_cmiiu11matrix_typeILm2ELm3EfE
+// CHECK: [[ROW:%.*]] = load i32, ptr %row.addr
+// CHECK: [[COL:%.*]] = load i32, ptr %col.addr
+// CHECK: [[OFFSET:%.*]] = mul i32 [[COL]], 2
+// CHECK: [[IDX:%.*]] = add i32 [[OFFSET]], [[ROW]]
+// CHECK: extractelement <6 x float> %{{.*}}, i32 [[IDX]]
+
+// 
-----------------------------------------------------------------------------
+// MatrixSingleSubscriptExpr (row extraction): attribute selects the 
per-element
+// index formula even when the TU default disagrees.
+// 
-----------------------------------------------------------------------------
+
+// Row-major: per-column element index is Row*NumCols + Col, materialized as a
+// constant-zero / constant-one / constant-two add to (Row*3).
+export float3 row_extract_rm(int row, row_major float2x3 m) {
+  return m[row];
+}
+// CHECK-LABEL: define {{.*}} <3 x float> 
@_Z14row_extract_rmiu11matrix_typeILm2ELm3EfE
+// CHECK: [[ROW:%.*]] = load i32, ptr %row.addr
+// CHECK: [[ROW_OFFSET0:%.*]] = mul i32 [[ROW]], 3
+// CHECK: add i32 [[ROW_OFFSET0]], 0
+// CHECK: [[ROW_OFFSET1:%.*]] = mul i32 [[ROW]], 3
+// CHECK: add i32 [[ROW_OFFSET1]], 1
+// CHECK: [[ROW_OFFSET2:%.*]] = mul i32 [[ROW]], 3
+// CHECK: add i32 [[ROW_OFFSET2]], 2
+
+// Column-major: per-column element index is Col*NumRows + Row, so we *don't*
+// see the Row*NumCols multiply; instead each column folds the constant
+// Col*NumRows into the GEP, leaving just an add of Row.
+export float3 row_extract_cm(int row, column_major float2x3 m) {
+  return m[row];
+}
+// CHECK-LABEL: define {{.*}} <3 x float> 
@_Z14row_extract_cmiu11matrix_typeILm2ELm3EfE
+// CHECK: [[ROW:%.*]] = load i32, ptr %row.addr
+// CHECK: add i32 0, [[ROW]]
+// CHECK: add i32 2, [[ROW]]
+// CHECK: add i32 4, [[ROW]]
+
+// 
-----------------------------------------------------------------------------
+// __builtin_hlsl_mul (vector * matrix): row-major operand triggers a transpose
+// before the column-major matrix.multiply intrinsic.
+// 
-----------------------------------------------------------------------------
+export float3 vec_mat_rm(float2 v, row_major float2x3 m) { return mul(v, m); }
+// CHECK-LABEL: define {{.*}} <3 x float> 
@_Z10vec_mat_rmDv2_fu11matrix_typeILm2ELm3EfE
+// CHECK: [[T:%.*]] = call {{.*}} <6 x float> @llvm.matrix.transpose.v6f32(<6 
x float> %{{.*}}, i32 3, i32 2)
+// CHECK: call {{.*}} <3 x float> @llvm.matrix.multiply.v3f32.v2f32.v6f32(<2 x 
float> %{{.*}}, <6 x float> [[T]], i32 1, i32 2, i32 3)
+
+// Column-major operand: no transpose is inserted before matrix.multiply.
+export float3 vec_mat_cm(float2 v, column_major float2x3 m) { return mul(v, 
m); }
+// CHECK-LABEL: define {{.*}} <3 x float> 
@_Z10vec_mat_cmDv2_fu11matrix_typeILm2ELm3EfE
+// CHECK-NOT: @llvm.matrix.transpose
+// CHECK: call {{.*}} <3 x float> @llvm.matrix.multiply.v3f32.v2f32.v6f32(<2 x 
float> %{{.*}}, <6 x float> %{{.*}}, i32 1, i32 2, i32 3)
+
+// 
-----------------------------------------------------------------------------
+// __builtin_hlsl_transpose: row-major operand swaps Rows/Cols passed to the
+// underlying intrinsic.
+// 
-----------------------------------------------------------------------------
+
+// Row-major float2x3 transposed: passes (Cols=3, Rows=2) to the intrinsic.
+export float3x2 transpose_rm(row_major float2x3 m) { return transpose(m); }
+// CHECK-LABEL: define {{.*}} <6 x float> 
@_Z12transpose_rmu11matrix_typeILm2ELm3EfE
+// CHECK: call {{.*}} <6 x float> @llvm.matrix.transpose.v6f32(<6 x float> 
%{{.*}}, i32 3, i32 2)
+
+// Column-major float2x3 transposed: passes (Rows=2, Cols=3) to the intrinsic.
+export float3x2 transpose_cm(column_major float2x3 m) { return transpose(m); }
+// CHECK-LABEL: define {{.*}} <6 x float> 
@_Z12transpose_cmu11matrix_typeILm2ELm3EfE
+// CHECK: call {{.*}} <6 x float> @llvm.matrix.transpose.v6f32(<6 x float> 
%{{.*}}, i32 2, i32 3)
+
+// 
-----------------------------------------------------------------------------
+// CK_HLSLMatrixTruncation: the shuffle mask that picks elements from the
+// source matrix uses the operand's per-decl layout to flatten indices.
+// 
-----------------------------------------------------------------------------
+
+// Row-major source 3x2 -> row-major dest 2x2: flat row-major mask is 
{0,1,2,3}.
+export float2x2 truncate_rm(row_major float3x2 m) { return (float2x2)m; }
+// CHECK-LABEL: define {{.*}} <4 x float> 
@_Z11truncate_rmu11matrix_typeILm3ELm2EfE
+// CHECK: shufflevector <6 x float> %{{.*}}, <6 x float> poison, <4 x i32> 
<i32 0, i32 1, i32 2, i32 3>
+
+// Column-major source 3x2 -> column-major dest 2x2: flat column-major mask is 
{0,1,3,4}.
+export float2x2 truncate_cm(column_major float3x2 m) { return (float2x2)m; }
+// CHECK-LABEL: define {{.*}} <4 x float> 
@_Z11truncate_cmu11matrix_typeILm3ELm2EfE
+// CHECK: shufflevector <6 x float> %{{.*}}, <6 x float> poison, <4 x i32> 
<i32 0, i32 1, i32 3, i32 4>
+
+// 
-----------------------------------------------------------------------------
+// Array of matrix: the per-decl layout attribute propagates through
+// ConstantArrayType sugar via wrapMatrixWithLayoutAttr, so indexing into an
+// array element still uses the correct layout.
+// 
-----------------------------------------------------------------------------
+
+// Row-major array element subscript: Row*NumCols + Col
+export float arr_subscript_rm(int row, int col, row_major float2x3 arr[2]) {
+  return arr[1][row][col];
+}
+// CHECK-LABEL: define {{.*}} float @_Z16arr_subscript_rm
+// CHECK: [[ROW:%.*]] = load i32, ptr %row.addr
+// CHECK: [[COL:%.*]] = load i32, ptr %col.addr
+// CHECK: [[OFFSET:%.*]] = mul i32 [[ROW]], 3
+// CHECK: [[IDX:%.*]] = add i32 [[OFFSET]], [[COL]]
+// CHECK: extractelement <6 x float> %{{.*}}, i32 [[IDX]]
+
+// Column-major array element subscript: Col*NumRows + Row
+export float arr_subscript_cm(int row, int col, column_major float2x3 arr[2]) {
+  return arr[1][row][col];
+}
+// CHECK-LABEL: define {{.*}} float @_Z16arr_subscript_cm
+// CHECK: [[ROW:%.*]] = load i32, ptr %row.addr
+// CHECK: [[COL:%.*]] = load i32, ptr %col.addr
+// CHECK: [[OFFSET:%.*]] = mul i32 [[COL]], 2
+// CHECK: [[IDX:%.*]] = add i32 [[OFFSET]], [[ROW]]
+// CHECK: extractelement <6 x float> %{{.*}}, i32 [[IDX]]
+
+// 
-----------------------------------------------------------------------------
+// Multi-dimensional array of matrix: wrapMatrixWithLayoutAttr recurses
+// through nested ConstantArrayType layers.
+// 
-----------------------------------------------------------------------------
+
+// Row-major 2D array element subscript: Row*NumCols + Col
+export float arr2d_subscript_rm(int row, int col, row_major float2x3 
arr[2][3]) {
+  return arr[0][1][row][col];
+}
+// CHECK-LABEL: define {{.*}} float @_Z18arr2d_subscript_rm
+// CHECK: [[ROW:%.*]] = load i32, ptr %row.addr
+// CHECK: [[COL:%.*]] = load i32, ptr %col.addr
+// CHECK: [[OFFSET:%.*]] = mul i32 [[ROW]], 3
+// CHECK: [[IDX:%.*]] = add i32 [[OFFSET]], [[COL]]
+// CHECK: extractelement <6 x float> %{{.*}}, i32 [[IDX]]
+
+// Column-major 2D array element subscript: Col*NumRows + Row
+export float arr2d_subscript_cm(int row, int col, column_major float2x3 
arr[2][3]) {
+  return arr[0][1][row][col];
+}
+// CHECK-LABEL: define {{.*}} float @_Z18arr2d_subscript_cm
+// CHECK: [[ROW:%.*]] = load i32, ptr %row.addr
+// CHECK: [[COL:%.*]] = load i32, ptr %col.addr
+// CHECK: [[OFFSET:%.*]] = mul i32 [[COL]], 2
+// CHECK: [[IDX:%.*]] = add i32 [[OFFSET]], [[ROW]]
+// CHECK: extractelement <6 x float> %{{.*}}, i32 [[IDX]]

>From 124a51dbeb380cc11ba06d5f0a3db864c74bbfe0 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Mon, 11 May 2026 14:37:57 -0400
Subject: [PATCH 02/10] switch to just ConstantArrayType, HLSL does not support
 IncompleteArrayType

---
 clang/include/clang/AST/MatrixUtils.h |  2 +-
 clang/lib/Sema/SemaHLSL.cpp           | 21 ++++++---------------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/AST/MatrixUtils.h 
b/clang/include/clang/AST/MatrixUtils.h
index d1b351bf4b07b..b6385100c3e4d 100644
--- a/clang/include/clang/AST/MatrixUtils.h
+++ b/clang/include/clang/AST/MatrixUtils.h
@@ -1,4 +1,4 @@
-//===- MatrixUtils.h - Matrix AST utilities ---------------------*- C++ 
-*-===//
+//===- MatrixUtils.h - Matrix AST utilities 
-------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index cc613d48add42..67bb88bf91581 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2711,22 +2711,13 @@ static QualType wrapMatrixWithLayoutAttr(ASTContext &C, 
QualType T,
   if (T.isNull() || T->isDependentType())
     return T;
 
-  if (const auto *AT = dyn_cast<ArrayType>(T.getTypePtr())) {
-    QualType Inner = wrapMatrixWithLayoutAttr(C, AT->getElementType(), AttrK);
-    if (Inner == AT->getElementType())
+  if (const auto *CAT = dyn_cast<ConstantArrayType>(T.getTypePtr())) {
+    QualType Inner = wrapMatrixWithLayoutAttr(C, CAT->getElementType(), AttrK);
+    if (Inner == CAT->getElementType())
       return T;
-
-    // fallthrough Inner != AT->getElementType()
-    if (const auto *CAT = dyn_cast<ConstantArrayType>(T.getTypePtr()))
-      return C.getConstantArrayType(Inner, CAT->getSize(), CAT->getSizeExpr(),
-                                    CAT->getSizeModifier(),
-                                    CAT->getIndexTypeCVRQualifiers());
-    // Note The IncompleteArrayType case would handle something like row_major
-    // float2x3 arr[] but HLSL doesn't support this syntax. so we can't test 
it.
-    // consider removing.
-    if (const auto *IAT = dyn_cast<IncompleteArrayType>(T.getTypePtr()))
-      return C.getIncompleteArrayType(Inner, IAT->getSizeModifier(),
-                                      IAT->getIndexTypeCVRQualifiers());
+    return C.getConstantArrayType(Inner, CAT->getSize(), CAT->getSizeExpr(),
+                                  CAT->getSizeModifier(),
+                                  CAT->getIndexTypeCVRQualifiers());
   }
   if (T->isConstantMatrixType())
     return C.getAttributedType(AttrK, T, T);

>From 753477b7b5cd36bfc3981e50261ceab21e18bba5 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Thu, 14 May 2026 16:05:40 -0400
Subject: [PATCH 03/10] Make Matrix multiply check both matrix types for
 attribute type sugar of matrix layout

---
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          | 15 ++++++-------
 .../matrix-layout-attr-overrides-default.hlsl | 21 +++++++++++++++++++
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp 
b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index ace00a0e4692e..4a8e3a22fb80b 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -1208,8 +1208,8 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     // matrices. Therefore matrix memory layout transforms must be inserted
     // before and after matrix multiply intrinsics.
     // Use whichever operand is a matrix to discover its declared layout.
-    QualType MatTy = IsMat0 ? QTy0 : QTy1;
-    bool IsRowMajor = isMatrixRowMajor(getLangOpts(), MatTy);
+    bool IsRowMajorMat0 = IsMat0 && isMatrixRowMajor(getLangOpts(), QTy0);
+    bool IsRowMajorMat1 = IsMat1 && isMatrixRowMajor(getLangOpts(), QTy1);
 
     llvm::MatrixBuilder MB(Builder);
     if (IsVec0 && IsMat1) {
@@ -1218,7 +1218,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
       unsigned Rows = MatTy->getNumRows();
       unsigned Cols = MatTy->getNumColumns();
       assert(N == Rows && "vector length must match matrix row count");
-      if (IsRowMajor)
+      if (IsRowMajorMat1)
         Op1 = MB.CreateRowMajorToColumnMajorTransform(Op1, Rows, Cols);
       return MB.CreateMatrixMultiply(Op0, Op1, 1, N, Cols, "hlsl.mul");
     }
@@ -1228,7 +1228,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
       unsigned Cols = MatTy->getNumColumns();
       assert(QTy1->castAs<VectorType>()->getNumElements() == Cols &&
              "vector length must match matrix column count");
-      if (IsRowMajor)
+      if (IsRowMajorMat0)
         Op0 = MB.CreateRowMajorToColumnMajorTransform(Op0, Rows, Cols);
       return MB.CreateMatrixMultiply(Op0, Op1, Rows, Cols, 1, "hlsl.mul");
     }
@@ -1241,13 +1241,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
     unsigned Cols1 = MatTy1->getNumColumns();
     assert(Cols0 == Rows1 &&
            "inner matrix dimensions must match for multiplication");
-    if (IsRowMajor) {
+    if (IsRowMajorMat0)
       Op0 = MB.CreateRowMajorToColumnMajorTransform(Op0, Rows0, Cols0);
+    if (IsRowMajorMat1)
       Op1 = MB.CreateRowMajorToColumnMajorTransform(Op1, Rows1, Cols1);
-    }
+
     Value *Result =
         MB.CreateMatrixMultiply(Op0, Op1, Rows0, Cols0, Cols1, "hlsl.mul");
-    if (IsRowMajor)
+    if (IsRowMajorMat0 || IsRowMajorMat1)
       Result = MB.CreateColumnMajorToRowMajorTransform(Result, Rows0, Cols1);
     return Result;
   }
diff --git a/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl 
b/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl
index 8320807e1cd89..4e8196c885da9 100644
--- a/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl
+++ b/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl
@@ -89,6 +89,27 @@ export float3 vec_mat_cm(float2 v, column_major float2x3 m) 
{ return mul(v, m);
 // CHECK-NOT: @llvm.matrix.transpose
 // CHECK: call {{.*}} <3 x float> @llvm.matrix.multiply.v3f32.v2f32.v6f32(<2 x 
float> %{{.*}}, <6 x float> %{{.*}}, i32 1, i32 2, i32 3)
 
+// 
-----------------------------------------------------------------------------
+// __builtin_hlsl_mul (matrix * matrix): mixed per-decl layouts cause a
+// transpose only on the row-major operand.
+// 
-----------------------------------------------------------------------------
+
+// LHS row-major, RHS column-major: only LHS is transposed.
+export float2x2 mat_mat_rm_cm(row_major float2x3 a, column_major float3x2 b) { 
return mul(a, b); }
+// CHECK-LABEL: define {{.*}} <4 x float> @_Z13mat_mat_rm_cm
+// CHECK: [[AMat:%.*]] = load <6 x float>, ptr %a.addr, align 4
+// CHECK: [[BMat:%.*]] = load <6 x float>, ptr %b.addr, align 4
+// CHECK: [[T:%.*]] = call {{.*}} <6 x float> @llvm.matrix.transpose.v6f32(<6 
x float> [[AMat]], i32 3, i32 2)
+// CHECK: call {{.*}} <4 x float> @llvm.matrix.multiply.v4f32.v6f32.v6f32(<6 x 
float> [[T]], <6 x float> [[BMat]], i32 2, i32 3, i32 2)
+
+// LHS column-major, RHS row-major: only RHS is transposed.
+export float2x2 mat_mat_cm_rm(column_major float2x3 a, row_major float3x2 b) { 
return mul(a, b); }
+// CHECK-LABEL: define {{.*}} <4 x float> @_Z13mat_mat_cm_rm
+// CHECK: [[AMat:%.*]] = load <6 x float>, ptr %a.addr, align 4
+// CHECK: [[BMat:%.*]] = load <6 x float>, ptr %b.addr, align 4
+// CHECK: [[T:%.*]] = call {{.*}} <6 x float> @llvm.matrix.transpose.v6f32(<6 
x float> [[BMat]], i32 2, i32 3)
+// CHECK: call {{.*}} <4 x float> @llvm.matrix.multiply.v4f32.v6f32.v6f32(<6 x 
float> [[AMat]], <6 x float> [[T]], i32 2, i32 3, i32 2)
+
 // 
-----------------------------------------------------------------------------
 // __builtin_hlsl_transpose: row-major operand swaps Rows/Cols passed to the
 // underlying intrinsic.

>From a496b17fbbe5b7cdfb13840fb89d80cf0167fc59 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Wed, 20 May 2026 09:08:51 -0400
Subject: [PATCH 04/10] remove isMatrixColumnMajor

---
 clang/include/clang/AST/MatrixUtils.h | 22 ----------------------
 clang/lib/CodeGen/CGExpr.cpp          |  4 ++--
 clang/lib/CodeGen/CGExprScalar.cpp    |  2 +-
 3 files changed, 3 insertions(+), 25 deletions(-)

diff --git a/clang/include/clang/AST/MatrixUtils.h 
b/clang/include/clang/AST/MatrixUtils.h
index b6385100c3e4d..ef6cbba6ba7c0 100644
--- a/clang/include/clang/AST/MatrixUtils.h
+++ b/clang/include/clang/AST/MatrixUtils.h
@@ -44,28 +44,6 @@ inline bool isMatrixRowMajor(const LangOptions &LangOpts, 
QualType T) {
   return LangOpts.getDefaultMatrixMemoryLayout() ==
          LangOptions::MatrixMemoryLayout::MatrixRowMajor;
 }
-
-/// Returns true if matrices of \p T should be laid out in column-major order.
-/// Mirrors `isMatrixRowMajor`; per-decl HLSL attributes win over the
-/// `-fmatrix-memory-layout=` default.
-inline bool isMatrixColumnMajor(const LangOptions &LangOpts, QualType T) {
-  if (LangOpts.HLSL && !T.isNull()) {
-    QualType Cur = T;
-    while (const auto *AT = Cur->getAs<AttributedType>()) {
-      switch (AT->getAttrKind()) {
-      case attr::HLSLColumnMajor:
-        return true;
-      case attr::HLSLRowMajor:
-        return false;
-      default:
-        break;
-      }
-      Cur = AT->getModifiedType();
-    }
-  }
-  return LangOpts.getDefaultMatrixMemoryLayout() ==
-         LangOptions::MatrixMemoryLayout::MatrixColMajor;
-}
 } // namespace clang
 
 #endif // LLVM_CLANG_AST_MATRIXUTILS_H
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 52853719a4d00..6d08473dd24d9 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2361,8 +2361,8 @@ LValue CodeGenFunction::EmitMatrixElementExpr(const 
MatrixElementExpr *E) {
   // getEncodedElementAccess returns row-major linearized indices
   // If the matrix memory layout is column-major, convert indices
   // to column-major indices.
-  bool IsColMajor = isMatrixColumnMajor(getLangOpts(), 
E->getBase()->getType());
-  if (IsColMajor) {
+  bool IsRowlMajor = isMatrixRowMajor(getLangOpts(), E->getBase()->getType());
+  if (!IsRowlMajor) {
     const auto *MT = E->getBase()->getType()->castAs<ConstantMatrixType>();
     unsigned NumCols = MT->getNumColumns();
     for (uint32_t &Idx : Indices) {
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index ddd28cfe5372c..c1d2e8a104fe7 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2361,7 +2361,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr 
*E) {
   // column-major positions rather than inserting sequentially and shuffling.
   const ConstantMatrixType *ColMajorMT = nullptr;
   if (const auto *MT = E->getType()->getAs<ConstantMatrixType>();
-      MT && isMatrixColumnMajor(CGF.getLangOpts(), E->getType()))
+      MT && !isMatrixRowMajor(CGF.getLangOpts(), E->getType()))
     ColMajorMT = MT;
 
   // Loop over initializers collecting the Value for each, and remembering

>From b7dd228c6d2ff4bce041925974a537e77bf2fd4b Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Wed, 20 May 2026 16:02:26 -0400
Subject: [PATCH 05/10] remove decl attribute switch entirely to type attribute

Assisted by Claud Opus 4.7
---
 clang/include/clang/Basic/Attr.td             |  22 +--
 clang/include/clang/Sema/SemaHLSL.h           |   6 +-
 clang/lib/AST/TypePrinter.cpp                 |  27 +--
 clang/lib/Sema/SemaDeclAttr.cpp               |   3 -
 clang/lib/Sema/SemaHLSL.cpp                   | 164 ++++++------------
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |   7 -
 clang/lib/Sema/SemaType.cpp                   |   7 +
 clang/lib/Sema/TreeTransform.h                |  10 ++
 clang/test/AST/HLSL/matrix_layout_attr.hlsl   |  28 +--
 clang/test/SemaHLSL/matrix_layout_attr.hlsl   |   3 -
 10 files changed, 96 insertions(+), 181 deletions(-)

diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index accb3ca3c4ce7..782aec69622ff 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5292,28 +5292,20 @@ def HLSLVkLocation : HLSLAnnotationAttr {
   let Documentation = [HLSLVkLocationDocs];
 }
 
-def HLSLMatrixLayout : InheritableAttr {
-  let Spellings = [CustomKeyword<"row_major">, CustomKeyword<"column_major">];
-  let Subjects = SubjectList<[Var, TypedefName, Field, Function]>;
-  let LangOpts = [HLSL];
-  let Documentation = [HLSLMatrixLayoutDocs];
-}
-
-// Internal type-attribute markers attached to matrix QualTypes by Sema when
-// a decl carries `[[hlsl::row_major]]` / `[[hlsl::column_major]]`. They let
-// CodeGen / const-eval recover the layout from any matrix-typed expression
-// without re-walking back to the source decl.
+// `row_major` / `column_major` are HLSL keywords that select the in-memory
+// layout of a matrix-typed declaration. 
 def HLSLRowMajor : TypeAttr {
-  let Spellings = [];
+  let Spellings = [CustomKeyword<"row_major">];
   let LangOpts = [HLSL];
-  let Documentation = [InternalOnly];
+  let Documentation = [HLSLMatrixLayoutDocs];
 }
 
 def HLSLColumnMajor : TypeAttr {
-  let Spellings = [];
+  let Spellings = [CustomKeyword<"column_major">];
   let LangOpts = [HLSL];
-  let Documentation = [InternalOnly];
+  let Documentation = [HLSLMatrixLayoutDocs];
 }
+def : MutualExclusions<[HLSLRowMajor, HLSLColumnMajor]>;
 
 def RandomizeLayout : InheritableAttr {
   let Spellings = [GCC<"randomize_layout">];
diff --git a/clang/include/clang/Sema/SemaHLSL.h 
b/clang/include/clang/Sema/SemaHLSL.h
index e65de5d4aa4c3..6cdb158e86e61 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -187,9 +187,9 @@ class SemaHLSL : public SemaBase {
   void handleShaderAttr(Decl *D, const ParsedAttr &AL);
   void handleResourceBindingAttr(Decl *D, const ParsedAttr &AL);
   void handleParamModifierAttr(Decl *D, const ParsedAttr &AL);
-  void handleMatrixLayoutAttr(Decl *D, const ParsedAttr &AL);
-  bool diagnoseInstantiatedMatrixLayoutAttr(Decl *D,
-                                            const HLSLMatrixLayoutAttr *Attr);
+  Attr *buildMatrixLayoutTypeAttr(QualType T, const ParsedAttr &AL);
+  bool diagnoseMatrixLayoutInstantiation(attr::Kind K, QualType T,
+                                         SourceLocation Loc);
   bool handleResourceTypeAttr(QualType T, const ParsedAttr &AL);
 
   template <typename T>
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 7a5aac8bd5cd4..d85ed244f643a 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -295,13 +295,6 @@ bool TypePrinter::canPrefixQualifiers(const Type *T,
       // We still want to print the address_space before the type if it is an
       // address_space attribute.
       const auto *AttrTy = cast<AttributedType>(UnderlyingType);
-      // HLSLRowMajor / HLSLColumnMajor are internal markers with no spelling;
-      // the printer skips them entirely, so look through them when deciding
-      // qualifier placement.
-      if (AttrTy->getAttrKind() == attr::HLSLRowMajor ||
-          AttrTy->getAttrKind() == attr::HLSLColumnMajor)
-        return canPrefixQualifiers(AttrTy->getModifiedType().getTypePtr(),
-                                   NeedARCStrongQualifier);
       CanPrefixQualifiers = AttrTy->getAttrKind() == attr::AddressSpace;
       break;
     }
@@ -1922,15 +1915,6 @@ void TypePrinter::printAttributedAfter(const 
AttributedType *T,
   if (T->getAttrKind() == attr::ObjCInertUnsafeUnretained)
     return;
 
-  // HLSLRowMajor / HLSLColumnMajor are internal markers attached to matrix
-  // types by Sema to record an explicit `[[hlsl::row_major]]` /
-  // `[[hlsl::column_major]]` qualifier from the source decl. They have no
-  // user-visible spelling; the user-facing form is the original keyword on
-  // the decl, not on the type.
-  if (T->getAttrKind() == attr::HLSLRowMajor ||
-      T->getAttrKind() == attr::HLSLColumnMajor)
-    return;
-
   // Don't print ns_returns_retained unless it had an effect.
   if (T->getAttrKind() == attr::NSReturnsRetained &&
       !T->getEquivalentType()->castAs<FunctionType>()
@@ -2008,10 +1992,6 @@ void TypePrinter::printAttributedAfter(const 
AttributedType *T,
   case attr::HLSLResourceDimension:
     llvm_unreachable("HLSL resource type attributes handled separately");
 
-  case attr::HLSLRowMajor:
-  case attr::HLSLColumnMajor:
-    llvm_unreachable("HLSL matrix layout type attributes handled separately");
-
   case attr::OpenCLPrivateAddressSpace:
   case attr::OpenCLGlobalAddressSpace:
   case attr::OpenCLGlobalDeviceAddressSpace:
@@ -2069,6 +2049,13 @@ void TypePrinter::printAttributedAfter(const 
AttributedType *T,
     OS << "ns_returns_retained";
     break;
 
+  case attr::HLSLRowMajor:
+    OS << "row_major";
+    break;
+  case attr::HLSLColumnMajor:
+    OS << "column_major";
+    break;
+
   // FIXME: When Sema learns to form this AttributedType, avoid printing the
   // attribute again in printFunctionProtoAfter.
   case attr::AnyX86NoCfCheck: OS << "nocf_check"; break;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index ae04d3855f01c..fa93ef90a6505 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -8193,9 +8193,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, 
const ParsedAttr &AL,
   case ParsedAttr::AT_HLSLParamModifier:
     S.HLSL().handleParamModifierAttr(D, AL);
     break;
-  case ParsedAttr::AT_HLSLMatrixLayout:
-    S.HLSL().handleMatrixLayoutAttr(D, AL);
-    break;
   case ParsedAttr::AT_HLSLUnparsedSemantic:
     S.HLSL().handleSemanticAttr(D, AL);
     break;
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 67bb88bf91581..70e0feab29e8f 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2704,130 +2704,74 @@ static bool isMatrixOrArrayOfMatrix(const ASTContext 
&Ctx, QualType QT) {
   return Ty->isDependentType() || Ty->isConstantMatrixType();
 }
 
-/// Embeds the row/column-major layout ( \p AttrK ) directly into the
-/// matrix QualType of \p T, preserving any surrounding type sugar.
-static QualType wrapMatrixWithLayoutAttr(ASTContext &C, QualType T,
-                                         attr::Kind AttrK) {
-  if (T.isNull() || T->isDependentType())
-    return T;
-
-  if (const auto *CAT = dyn_cast<ConstantArrayType>(T.getTypePtr())) {
-    QualType Inner = wrapMatrixWithLayoutAttr(C, CAT->getElementType(), AttrK);
-    if (Inner == CAT->getElementType())
-      return T;
-    return C.getConstantArrayType(Inner, CAT->getSize(), CAT->getSizeExpr(),
-                                  CAT->getSizeModifier(),
-                                  CAT->getIndexTypeCVRQualifiers());
-  }
-  if (T->isConstantMatrixType())
-    return C.getAttributedType(AttrK, T, T);
-  return T;
-}
-
-static bool diagnoseMatrixLayoutOnNonMatrix(Sema &SemaRef, Decl *D,
-                                            SourceLocation Loc,
-                                            const IdentifierInfo *AttrName) {
-  QualType Ty;
-  if (auto *VD = dyn_cast<ValueDecl>(D))
-    Ty = VD->getType();
-  else if (auto *TD = dyn_cast<TypedefNameDecl>(D))
-    Ty = TD->getUnderlyingType();
-
-  if (Ty.isNull() || Ty->isDependentType())
-    return false;
-
-  // For functions, the qualifier can apply to the return type or any 
parameter.
-  if (const auto *FPT = Ty->getAs<FunctionProtoType>()) {
-    if (isMatrixOrArrayOfMatrix(SemaRef.getASTContext(), FPT->getReturnType()))
-      return false;
-    SemaRef.Diag(Loc, diag::err_hlsl_matrix_layout_non_matrix) << AttrName;
-    return true;
+/// Walks the existing AttributedType sugar of \p T looking for a previously
+/// applied HLSLRowMajor/HLSLColumnMajor marker. If one is found, populates
+/// \p ExistingKind with its attr::Kind and returns true.
+static bool findExistingMatrixLayoutMarker(QualType T,
+                                           attr::Kind &ExistingKind) {
+  QualType Cur = T;
+  while (const auto *AT = Cur->getAs<AttributedType>()) {
+    attr::Kind K = AT->getAttrKind();
+    if (K == attr::HLSLRowMajor || K == attr::HLSLColumnMajor) {
+      ExistingKind = K;
+      return true;
+    }
+    Cur = AT->getModifiedType();
   }
-
-  if (isMatrixOrArrayOfMatrix(SemaRef.getASTContext(), Ty))
-    return false;
-
-  SemaRef.Diag(Loc, diag::err_hlsl_matrix_layout_non_matrix) << AttrName;
-  return true;
+  return false;
 }
 
-/// Embeds layout attributes into the matrix type so no decl lookups are
-/// needed.
-static void applyHLSLMatrixLayoutTypeAttr(ASTContext &Ctx, Decl *D,
-                                          const ParsedAttr &AL) {
-  attr::Kind AttrK =
-      AL.getSemanticSpelling() == HLSLMatrixLayoutAttr::Keyword_row_major
-          ? attr::HLSLRowMajor
-          : attr::HLSLColumnMajor;
-
-  auto *TD = dyn_cast<TypedefNameDecl>(D);
-  auto *VD = dyn_cast<ValueDecl>(D);
-
-  if (!TD && !VD)
-    return;
-
-  if (auto *FD = dyn_cast<FunctionDecl>(D)) {
-    const auto *FPT = FD->getType()->getAs<FunctionProtoType>();
-    if (!FPT)
-      return;
-
-    QualType NewRet =
-        wrapMatrixWithLayoutAttr(Ctx, FPT->getReturnType(), AttrK);
-    if (NewRet == FPT->getReturnType())
-      return;
-
-    FD->setType(Ctx.getFunctionType(NewRet, FPT->getParamTypes(),
-                                    FPT->getExtProtoInfo()));
-    return;
-  }
+Attr *SemaHLSL::buildMatrixLayoutTypeAttr(QualType T, const ParsedAttr &AL) {
+  ASTContext &Ctx = getASTContext();
+  attr::Kind AttrK = AL.getKind() == ParsedAttr::AT_HLSLRowMajor
+                         ? attr::HLSLRowMajor
+                         : attr::HLSLColumnMajor;
 
-  if (VD) {
-    QualType OldT = VD->getType();
-    QualType NewT = wrapMatrixWithLayoutAttr(Ctx, OldT, AttrK);
-    if (NewT != OldT)
-      VD->setType(NewT);
-    return;
-  }
+  if (T.isNull())
+    return nullptr;
 
-  if (TD) {
-    QualType OldT = TD->getUnderlyingType();
-    QualType NewT = wrapMatrixWithLayoutAttr(Ctx, OldT, AttrK);
-    if (NewT != OldT)
-      TD->setModedTypeSourceInfo(TD->getTypeSourceInfo(), NewT);
-    return;
+  // For non-dependent types, the operand must be a matrix (or array of
+  // matrices).
+  if (!T->isDependentType() && !isMatrixOrArrayOfMatrix(Ctx, T)) {
+    Diag(AL.getLoc(), diag::err_hlsl_matrix_layout_non_matrix)
+        << AL.getAttrName();
+    AL.setInvalid();
+    return nullptr;
   }
-}
-
-void SemaHLSL::handleMatrixLayoutAttr(Decl *D, const ParsedAttr &AL) {
-  // row_major and column_major are only valid on matrix types.
-  if (diagnoseMatrixLayoutOnNonMatrix(SemaRef, D, AL.getLoc(),
-                                      AL.getAttrName()))
-    return;
 
-  // Check for conflicting or duplicate matrix layout attributes.
-  if (const auto *Existing = D->getAttr<HLSLMatrixLayoutAttr>()) {
-    if (Existing->getSemanticSpelling() != AL.getSemanticSpelling()) {
-      Diag(AL.getLoc(), diag::err_hlsl_matrix_layout_conflict)
-          << AL.getAttrName() << Existing->getAttrName();
-      Diag(Existing->getLoc(), diag::note_conflicting_attribute);
-    } else {
+  // Conflict / duplicate detection by walking existing sugar.
+  attr::Kind ExistingKind;
+  if (findExistingMatrixLayoutMarker(T, ExistingKind)) {
+    if (ExistingKind == AttrK) {
       Diag(AL.getLoc(), diag::warn_duplicate_attribute_exact)
           << AL.getAttrName();
-      Diag(Existing->getLoc(), diag::note_previous_attribute);
+      return nullptr;
     }
-    return;
+    IdentifierInfo *ExistingII = &Ctx.Idents.get(
+        ExistingKind == attr::HLSLRowMajor ? "row_major" : "column_major");
+    Diag(AL.getLoc(), diag::err_hlsl_matrix_layout_conflict)
+        << AL.getAttrName() << ExistingII;
+    AL.setInvalid();
+    return nullptr;
   }
 
-  D->addAttr(::new (getASTContext()) HLSLMatrixLayoutAttr(getASTContext(), 
AL));
-
-  ASTContext &Ctx = getASTContext();
-  applyHLSLMatrixLayoutTypeAttr(Ctx, D, AL);
+  if (AttrK == attr::HLSLRowMajor)
+    return ::new (Ctx) HLSLRowMajorAttr(Ctx, AL);
+  return ::new (Ctx) HLSLColumnMajorAttr(Ctx, AL);
 }
 
-bool SemaHLSL::diagnoseInstantiatedMatrixLayoutAttr(
-    Decl *D, const HLSLMatrixLayoutAttr *Attr) {
-  return diagnoseMatrixLayoutOnNonMatrix(SemaRef, D, Attr->getLoc(),
-                                         Attr->getAttrName());
+bool SemaHLSL::diagnoseMatrixLayoutInstantiation(attr::Kind K, QualType T,
+                                                 SourceLocation Loc) {
+  if (K != attr::HLSLRowMajor && K != attr::HLSLColumnMajor)
+    return false;
+  if (T.isNull() || T->isDependentType())
+    return false;
+  if (isMatrixOrArrayOfMatrix(getASTContext(), T))
+    return false;
+  IdentifierInfo *II = &getASTContext().Idents.get(
+      K == attr::HLSLRowMajor ? "row_major" : "column_major");
+  Diag(Loc, diag::err_hlsl_matrix_layout_non_matrix) << II;
+  return true;
 }
 
 namespace {
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp 
b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index c9bc613a7c4ea..324d6bf3857c7 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1044,13 +1044,6 @@ void Sema::InstantiateAttrs(const 
MultiLevelTemplateArgumentList &TemplateArgs,
       continue;
     }
 
-    if (auto *A = dyn_cast<HLSLMatrixLayoutAttr>(TmplAttr)) {
-      if (!HLSL().diagnoseInstantiatedMatrixLayoutAttr(New, A) &&
-          !New->hasAttr<HLSLMatrixLayoutAttr>())
-        New->addAttr(A->clone(Context));
-      continue;
-    }
-
     assert(!TmplAttr->isPackExpansion());
     if (TmplAttr->isLateParsed() && LateAttrs) {
       // Late parsed attributes must be instantiated and attached after the
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 0e6532a6e2178..6283fe4f41f36 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -9129,6 +9129,13 @@ static void processTypeAttrs(TypeProcessingState &state, 
QualType &type,
       }
       attr.setUsedAsTypeAttr();
       break;
+    case ParsedAttr::AT_HLSLRowMajor:
+    case ParsedAttr::AT_HLSLColumnMajor:
+      if (Attr *A =
+              state.getSema().HLSL().buildMatrixLayoutTypeAttr(type, attr))
+        type = state.getAttributedType(A, type, type);
+      attr.setUsedAsTypeAttr();
+      break;
     OBJC_POINTER_TYPE_ATTRS_CASELIST:
       if (!handleObjCPointerTypeAttr(state, attr, type))
         distributeObjCPointerTypeAttr(state, attr, type);
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 70cdc47e42d5d..77c8f17439a1a 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -39,6 +39,7 @@
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaDiagnostic.h"
+#include "clang/Sema/SemaHLSL.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/SemaObjC.h"
 #include "clang/Sema/SemaOpenACC.h"
@@ -7673,6 +7674,15 @@ QualType 
TreeTransform<Derived>::TransformAttributedType(TypeLocBuilder &TLB,
   if (modifiedType.isNull())
     return QualType();
 
+  // HLSL: re-validate matrix-layout markers after substitution. If the
+  // post-substitution type is no longer a matrix, diagnose now.
+  if (SemaRef.getLangOpts().HLSL &&
+      SemaRef.HLSL().diagnoseMatrixLayoutInstantiation(
+          oldType->getAttrKind(), modifiedType,
+          TL.getAttr() ? TL.getAttr()->getLocation()
+                       : TL.getModifiedLoc().getBeginLoc()))
+    return QualType();
+
   // oldAttr can be null if we started with a QualType rather than a TypeLoc.
   const Attr *oldAttr = TL.getAttr();
   const Attr *newAttr = oldAttr ? getDerived().TransformAttr(oldAttr) : 
nullptr;
diff --git a/clang/test/AST/HLSL/matrix_layout_attr.hlsl 
b/clang/test/AST/HLSL/matrix_layout_attr.hlsl
index 98bcb9976c58c..5e5084feb143f 100644
--- a/clang/test/AST/HLSL/matrix_layout_attr.hlsl
+++ b/clang/test/AST/HLSL/matrix_layout_attr.hlsl
@@ -1,35 +1,23 @@
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library 
-finclude-default-header \
 // RUN:   -std=hlsl202x -ast-dump -x hlsl %s | FileCheck %s
 
-// CHECK: VarDecl {{.*}} rm_mat 'hlsl_constant float3x3'
-// CHECK-NEXT: HLSLMatrixLayoutAttr {{.*}} row_major
+// CHECK: VarDecl {{.*}} rm_mat 'float3x3 hlsl_constant 
__attribute__((row_major))':'matrix<float, 3, 3> hlsl_constant'
 row_major float3x3 rm_mat;
-
-// CHECK: VarDecl {{.*}} cm_mat 'hlsl_constant float4x4'
-// CHECK-NEXT: HLSLMatrixLayoutAttr {{.*}} column_major
+// CHECK: VarDecl {{.*}} cm_mat 'float4x4 hlsl_constant 
__attribute__((column_major))':'matrix<float, 4, 4> hlsl_constant'
 column_major float4x4 cm_mat;
 
 // CHECK: CXXRecordDecl {{.*}} struct S definition
 struct S {
-  // CHECK: FieldDecl {{.*}} m1 'float2x2'
-  // CHECK-NEXT: HLSLMatrixLayoutAttr {{.*}} row_major
+  // CHECK: FieldDecl {{.*}} m1 'float2x2 
__attribute__((row_major))':'matrix<float, 2, 2>'
   row_major float2x2 m1;
-  // CHECK: FieldDecl {{.*}} m2 'float3x3'
-  // CHECK-NEXT: HLSLMatrixLayoutAttr {{.*}} column_major
+  // CHECK: FieldDecl {{.*}} m2 'float3x3 
__attribute__((column_major))':'matrix<float, 3, 3>'
   column_major float3x3 m2;
 };
 
-
-// Check to make sure the internal type-sugar marker is invisible in the
-//printed type (it has no spelling), but the `AttributedType` node and
-// the `HLSLMatrixLayoutAttr` child still appear in the AST dump.
-
-// CHECK: TypedefDecl {{.*}} RM44 'float4x4'
-// CHECK: AttributedType {{.*}} 'float4x4' sugar
-// CHECK: HLSLMatrixLayoutAttr {{.*}} row_major
+// CHECK-LABEL: TypedefDecl {{.*}} RM44 'float4x4 
__attribute__((row_major))':'matrix<float, 4, 4>'
+// CHECK-NEXT:  AttributedType {{.*}} 'float4x4 __attribute__((row_major))' 
sugar
 typedef row_major float4x4 RM44;
 
-// CHECK-LABEL: TypedefDecl {{.*}} CM44 'float4x4'
-// CHECK:       AttributedType {{.*}} 'float4x4' sugar
-// CHECK:       HLSLMatrixLayoutAttr {{.*}} column_major
+// CHECK-LABEL: TypedefDecl {{.*}} CM44 'float4x4 
__attribute__((column_major))':'matrix<float, 4, 4>'
+// CHECK-NEXT:  AttributedType {{.*}} 'float4x4 __attribute__((column_major))' 
sugar
 typedef column_major float4x4 CM44;
diff --git a/clang/test/SemaHLSL/matrix_layout_attr.hlsl 
b/clang/test/SemaHLSL/matrix_layout_attr.hlsl
index 3e953f07557e6..a5c3cc38027b0 100644
--- a/clang/test/SemaHLSL/matrix_layout_attr.hlsl
+++ b/clang/test/SemaHLSL/matrix_layout_attr.hlsl
@@ -102,16 +102,13 @@ struct S2 {
 typedef row_major float ScalarRM;
 
 // Invalid: conflicting row_major and column_major on same decl.
-// expected-note@+2 {{conflicting attribute is here}}
 // expected-error@+1 {{'column_major' and 'row_major' attributes are not 
compatible}}
 row_major column_major float3x3 conflict_mat;
 
 // Invalid: duplicate row_major.
-// expected-note@+2 {{previous attribute is here}}
 // expected-warning@+1 {{attribute 'row_major' is already applied}}
 row_major row_major float3x3 dup_rm_mat;
 
 // Invalid: duplicate column_major.
-// expected-note@+2 {{previous attribute is here}}
 // expected-warning@+1 {{attribute 'column_major' is already applied}}
 column_major column_major float4x4 dup_cm_mat;

>From fb40fc8553c62146f1dfb0061a2aef66d894f9cd Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Wed, 20 May 2026 18:10:36 -0400
Subject: [PATCH 06/10] address obvious PR comments

---
 clang/lib/CodeGen/CGExpr.cpp         | 4 ++--
 clang/lib/CodeGen/CGExprScalar.cpp   | 9 +++++----
 clang/lib/CodeGen/CGHLSLBuiltins.cpp | 4 +++-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 6d08473dd24d9..19b5c61395b34 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2361,8 +2361,8 @@ LValue CodeGenFunction::EmitMatrixElementExpr(const 
MatrixElementExpr *E) {
   // getEncodedElementAccess returns row-major linearized indices
   // If the matrix memory layout is column-major, convert indices
   // to column-major indices.
-  bool IsRowlMajor = isMatrixRowMajor(getLangOpts(), E->getBase()->getType());
-  if (!IsRowlMajor) {
+  bool IsRowMajor = isMatrixRowMajor(getLangOpts(), E->getBase()->getType());
+  if (!IsRowMajor) {
     const auto *MT = E->getBase()->getType()->castAs<ConstantMatrixType>();
     unsigned NumCols = MT->getNumColumns();
     for (uint32_t &Idx : Indices) {
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index c1d2e8a104fe7..8170fa41d1ea1 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2246,10 +2246,11 @@ Value 
*ScalarExprEmitter::VisitMatrixSingleSubscriptExpr(
   auto *ResultTy = llvm::FixedVectorType::get(ElemTy, NumColumns);
   Value *RowVec = llvm::PoisonValue::get(ResultTy);
 
+  bool IsMatrixRowMajor =
+      isMatrixRowMajor(CGF.getLangOpts(), E->getBase()->getType());
+
   for (unsigned Col = 0; Col != NumColumns; ++Col) {
     Value *ColVal = llvm::ConstantInt::get(RowIdx->getType(), Col);
-    bool IsMatrixRowMajor =
-        isMatrixRowMajor(CGF.getLangOpts(), E->getBase()->getType());
     Value *EltIdx = MB.CreateIndex(RowIdx, ColVal, NumRows, NumColumns,
                                    IsMatrixRowMajor, "matrix_row_idx");
     Value *Elt =
@@ -3181,8 +3182,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
       assert(NumCols <= SrcMatTy->getNumColumns());
 
       // isMatrixRowMajor needs the full sugared QualType to find matrix layout
-      // attrs. So use Use E->getType() (the source QualType) rather than
-      // SrcMatTy b\c getAs<ConstantMatrixType>() strips the sugar.
+      // attrs. So use E->getType() (the source QualType) rather than
+      // SrcMatTy b/c getAs<ConstantMatrixType>() strips the sugar.
       bool IsRowMajor = isMatrixRowMajor(CGF.getLangOpts(), E->getType());
       for (unsigned R = 0; R < NumRows; R++)
         for (unsigned C = 0; C < NumCols; C++)
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp 
b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 4a8e3a22fb80b..e9511f5729f15 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -1248,7 +1248,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned 
BuiltinID,
 
     Value *Result =
         MB.CreateMatrixMultiply(Op0, Op1, Rows0, Cols0, Cols1, "hlsl.mul");
-    if (IsRowMajorMat0 || IsRowMajorMat1)
+
+    bool IsResultRowMajor = isMatrixRowMajor(getLangOpts(), E->getType());
+    if (IsResultRowMajor)
       Result = MB.CreateColumnMajorToRowMajorTransform(Result, Rows0, Cols1);
     return Result;
   }

>From 9788f7e5f80d3d5c4bf52e8834d76be349250c4a Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Wed, 20 May 2026 21:37:30 -0400
Subject: [PATCH 07/10] keep type sugar in matrix truncation so we can lookup
 DestTy matrix layout type.

---
 clang/lib/CodeGen/CGExprScalar.cpp            | 13 +++----
 clang/lib/Sema/SemaExprCXX.cpp                |  8 +++--
 .../matrix-layout-attr-overrides-default.hlsl | 35 +++++++++++++++++--
 .../BuiltIns/logical-mat-operator-errors.hlsl |  8 ++---
 .../MatrixElementOverloadResolution.hlsl      | 26 +++++++-------
 .../MatrixImplicitTruncCastWarnings.hlsl      | 14 ++++----
 6 files changed, 69 insertions(+), 35 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 8170fa41d1ea1..3a3dff7bec347 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3181,14 +3181,15 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
       assert(NumRows <= SrcMatTy->getNumRows());
       assert(NumCols <= SrcMatTy->getNumColumns());
 
-      // isMatrixRowMajor needs the full sugared QualType to find matrix layout
-      // attrs. So use E->getType() (the source QualType) rather than
-      // SrcMatTy b/c getAs<ConstantMatrixType>() strips the sugar.
-      bool IsRowMajor = isMatrixRowMajor(CGF.getLangOpts(), E->getType());
+      // isMatrix[Src|Dst]RowMajor needs the full sugared QualType to find
+      // matrix layout attrs. So use E->getType() &  DestTy rather than 
SrcMatTy
+      // & MatTy b/c getAs<ConstantMatrixType>() strips the sugar.
+      bool IsSrcRowMajor = isMatrixRowMajor(CGF.getLangOpts(), E->getType());
+      bool IsDstRowMajor = isMatrixRowMajor(CGF.getLangOpts(), DestTy);
       for (unsigned R = 0; R < NumRows; R++)
         for (unsigned C = 0; C < NumCols; C++)
-          Mask[MatTy->getFlattenedIndex(R, C, IsRowMajor)] =
-              SrcMatTy->getFlattenedIndex(R, C, IsRowMajor);
+          Mask[MatTy->getFlattenedIndex(R, C, IsDstRowMajor)] =
+              SrcMatTy->getFlattenedIndex(R, C, IsSrcRowMajor);
 
       return Builder.CreateShuffleVector(Mat, Mask, "trunc");
     }
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 00c873833c8a7..4e1652462b3ae 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -5344,9 +5344,11 @@ Sema::PerformImplicitConversion(Expr *From, QualType 
ToType,
     case ICK_HLSL_Matrix_Truncation: {
       auto *FromMat = From->getType()->castAs<ConstantMatrixType>();
       QualType TruncTy = FromMat->getElementType();
-      if (auto *ToMat = ToType->getAs<ConstantMatrixType>())
-        TruncTy = Context.getConstantMatrixType(TruncTy, ToMat->getNumRows(),
-                                                ToMat->getNumColumns());
+      // Preserve any sugar (e.g. `row_major`/`column_major` HLSL TypeAttrs) on
+      // `ToType` so that downstream CodeGen can query the destination layout
+      // from the cast node itself rather than falling back to the TU default.
+      if (ToType->getAs<ConstantMatrixType>())
+        TruncTy = ToType;
       From = ImpCastExprToType(From, TruncTy, CK_HLSLMatrixTruncation,
                                From->getValueKind())
                  .get();
diff --git a/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl 
b/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl
index 4e8196c885da9..6536a4341d3e9 100644
--- a/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl
+++ b/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl
@@ -130,16 +130,47 @@ export float3x2 transpose_cm(column_major float2x3 m) { 
return transpose(m); }
 // source matrix uses the operand's per-decl layout to flatten indices.
 // 
-----------------------------------------------------------------------------
 
+typedef row_major    float2x2 RM22;
+typedef column_major float2x2 CM22;
+typedef row_major    float3x3 RM33;
+typedef column_major float3x3 CM33;
+
 // Row-major source 3x2 -> row-major dest 2x2: flat row-major mask is 
{0,1,2,3}.
-export float2x2 truncate_rm(row_major float3x2 m) { return (float2x2)m; }
+export row_major float2x2 truncate_rm(row_major float3x2 m) { return (RM22)m; }
 // CHECK-LABEL: define {{.*}} <4 x float> 
@_Z11truncate_rmu11matrix_typeILm3ELm2EfE
 // CHECK: shufflevector <6 x float> %{{.*}}, <6 x float> poison, <4 x i32> 
<i32 0, i32 1, i32 2, i32 3>
 
 // Column-major source 3x2 -> column-major dest 2x2: flat column-major mask is 
{0,1,3,4}.
-export float2x2 truncate_cm(column_major float3x2 m) { return (float2x2)m; }
+export column_major float2x2 truncate_cm(column_major float3x2 m) { return 
(CM22)m; }
 // CHECK-LABEL: define {{.*}} <4 x float> 
@_Z11truncate_cmu11matrix_typeILm3ELm2EfE
 // CHECK: shufflevector <6 x float> %{{.*}}, <6 x float> poison, <4 x i32> 
<i32 0, i32 1, i32 3, i32 4>
 
+// 
-----------------------------------------------------------------------------
+// CK_HLSLMatrixTruncation cross-layout: when source and destination carry
+// different layout keywords, `IsSrcRowMajor` and `IsDstRowMajor` differ. The
+// source indices flatten using the source layout while the destination
+// positions flatten using the destination layout. This is independent of the
+// `-fmatrix-memory-layout=` default.
+// 
-----------------------------------------------------------------------------
+
+// Row-major src 3x4 -> column-major dst 3x3.
+// src idx (R,C) = R*4+C; dst slot (R,C) = C*3+R.
+//   (0,0)->mask[0]=0  (0,1)->mask[3]=1  (0,2)->mask[6]=2
+//   (1,0)->mask[1]=4  (1,1)->mask[4]=5  (1,2)->mask[7]=6
+//   (2,0)->mask[2]=8  (2,1)->mask[5]=9  (2,2)->mask[8]=10
+export column_major float3x3 truncate_rm_to_cm(row_major float3x4 m) { return 
(CM33)m; }
+// CHECK-LABEL: define {{.*}} <9 x float> 
@_Z17truncate_rm_to_cmu11matrix_typeILm3ELm4EfE
+// CHECK: shufflevector <12 x float> %{{.*}}, <12 x float> poison, <9 x i32> 
<i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10>
+
+// Column-major src 3x4 -> row-major dst 3x3.
+// src idx (R,C) = C*3+R; dst slot (R,C) = R*3+C.
+//   (0,0)->mask[0]=0  (0,1)->mask[1]=3  (0,2)->mask[2]=6
+//   (1,0)->mask[3]=1  (1,1)->mask[4]=4  (1,2)->mask[5]=7
+//   (2,0)->mask[6]=2  (2,1)->mask[7]=5  (2,2)->mask[8]=8
+export row_major float3x3 truncate_cm_to_rm(column_major float3x4 m) { return 
(RM33)m; }
+// CHECK-LABEL: define {{.*}} <9 x float> 
@_Z17truncate_cm_to_rmu11matrix_typeILm3ELm4EfE
+// CHECK: shufflevector <12 x float> %{{.*}}, <12 x float> poison, <9 x i32> 
<i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
+
 // 
-----------------------------------------------------------------------------
 // Array of matrix: the per-decl layout attribute propagates through
 // ConstantArrayType sugar via wrapMatrixWithLayoutAttr, so indexing into an
diff --git a/clang/test/SemaHLSL/BuiltIns/logical-mat-operator-errors.hlsl 
b/clang/test/SemaHLSL/BuiltIns/logical-mat-operator-errors.hlsl
index bd7f01dbd768f..693c49c497824 100644
--- a/clang/test/SemaHLSL/BuiltIns/logical-mat-operator-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/logical-mat-operator-errors.hlsl
@@ -4,19 +4,19 @@
 bool2x2 test_mismatched_args(bool2x2 a, bool3x3 b)
 {
   return TEST_FUNC(a, b);
-  // expected-warning@-1 {{implicit conversion truncates matrix: 'bool3x3' 
(aka 'matrix<bool, 3, 3>') to 'matrix<bool, 2, 2>'}}
+  // expected-warning@-1 {{implicit conversion truncates matrix: 'bool3x3' 
(aka 'matrix<bool, 3, 3>') to 'bool2x2' (aka 'matrix<bool, 2, 2>')}}
 }
 
 bool3x3 test_mismatched_args(bool3x3 a, bool2x2 b)
 {
   return TEST_FUNC(a, b);
-  // expected-error@-1 {{cannot initialize return object of type 'bool3x3' 
(aka 'matrix<bool, 3, 3>') with an rvalue of type 'matrix<bool, 2, 2>'}}
+  // expected-error@-1 {{cannot initialize return object of type 
'matrix<[...], 3, 3>' with an rvalue of type 'matrix<[...], 2, 2>'}}
 }
 
 bool2x2 test_mismatched_args2(bool3x3 a, bool2x2 b)
 {
   return TEST_FUNC(a, b);
-  // expected-warning@-1 {{implicit conversion truncates matrix: 'bool3x3' 
(aka 'matrix<bool, 3, 3>') to 'matrix<bool, 2, 2>'}}
+  // expected-warning@-1 {{implicit conversion truncates matrix: 'bool3x3' 
(aka 'matrix<bool, 3, 3>') to 'bool2x2' (aka 'matrix<bool, 2, 2>')}}
 }
 
 bool3x3 test_mismatched_return_larger(bool2x2 a, bool2x2 b)
@@ -28,5 +28,5 @@ bool3x3 test_mismatched_return_larger(bool2x2 a, bool2x2 b)
 bool2x2 test_mismatched_return_smaller(bool3x3 a, bool3x3 b)
 {
   return TEST_FUNC(a, b);
-  // expected-warning@-1 {{implicit conversion truncates matrix: 'bool3x3' 
(aka 'matrix<bool, 3, 3>') to 'matrix<bool, 2, 2>'}}
+  // expected-warning@-1 {{implicit conversion truncates matrix: 'bool3x3' 
(aka 'matrix<bool, 3, 3>') to 'bool2x2' (aka 'matrix<bool, 2, 2>')}}
 }
diff --git a/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl 
b/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
index 011f2a19cb847..e0619c2987fbc 100644
--- a/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
+++ b/clang/test/SemaHLSL/MatrixElementOverloadResolution.hlsl
@@ -238,10 +238,10 @@ void matOrVec3(float4x4 F) {}
 
 export void Case8(float2x3 f23, float4x4 f44, float3x3 f33, float3x2 f32) {
   int2x2 i22 = f23;
-  // expected-warning@-1{{implicit conversion truncates matrix: 'float2x3' 
(aka 'matrix<float, 2, 3>') to 'matrix<int, 2, 2>'}}
-  // expected-warning@-2{{implicit conversion turns floating-point number into 
integer: 'float2x3' (aka 'matrix<float, 2, 3>') to 'matrix<int, 2, 2>'}}
+  // expected-warning@-1{{implicit conversion truncates matrix: 'float2x3' 
(aka 'matrix<float, 2, 3>') to 'int2x2' (aka 'matrix<int, 2, 2>')}}
+  // expected-warning@-2{{implicit conversion turns floating-point number into 
integer: 'float2x3' (aka 'matrix<float, 2, 3>') to 'int2x2' (aka 'matrix<int, 
2, 2>')}}
   //CHECK: VarDecl {{.*}} i22 'int2x2':'matrix<int, 2, 2>' cinit
-  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<int, 2, 2>' 
<HLSLMatrixTruncation>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2x2':'matrix<int, 2, 2>' 
<HLSLMatrixTruncation>
   //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<int, 2, 3>' 
<FloatingToIntegral>
   //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' 
<LValueToRValue>
 #ifdef ERROR
@@ -250,9 +250,9 @@ export void Case8(float2x3 f23, float4x4 f44, float3x3 f33, 
float3x2 f32) {
 #endif
   
   fn2x2(f23);
-  // expected-warning@-1{{implicit conversion truncates matrix: 'float2x3' 
(aka 'matrix<float, 2, 3>') to 'matrix<float, 2, 2>'}}
+  // expected-warning@-1{{implicit conversion truncates matrix: 'float2x3' 
(aka 'matrix<float, 2, 3>') to 'float2x2' (aka 'matrix<float, 2, 2>')}}
   //CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 'fn2x2' 
'void (float2x2)'
-  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' 
<HLSLMatrixTruncation>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x2':'matrix<float, 2, 2>' 
<HLSLMatrixTruncation>
   //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' 
<LValueToRValue>
 
 #ifdef ERROR
@@ -261,15 +261,15 @@ export void Case8(float2x3 f23, float4x4 f44, float3x3 
f33, float3x2 f32) {
 #endif
 
   matOrVec(f23);
-  // expected-warning@-1{{implicit conversion truncates matrix: 'float2x3' 
(aka 'matrix<float, 2, 3>') to 'matrix<float, 2, 2>'}}
+  // expected-warning@-1{{implicit conversion truncates matrix: 'float2x3' 
(aka 'matrix<float, 2, 3>') to 'float2x2' (aka 'matrix<float, 2, 2>')}}
   //CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 
'matOrVec' 'void (float2x2)'
-  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' 
<HLSLMatrixTruncation>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x2':'matrix<float, 2, 2>' 
<HLSLMatrixTruncation>
   //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' 
<LValueToRValue>
 
   matOrVec(f44);
-  // expected-warning@-1{{implicit conversion truncates matrix: 'float4x4' 
(aka 'matrix<float, 4, 4>') to 'matrix<float, 2, 2>'}}
+  // expected-warning@-1{{implicit conversion truncates matrix: 'float4x4' 
(aka 'matrix<float, 4, 4>') to 'float2x2' (aka 'matrix<float, 2, 2>')}}
   //CHECK: DeclRefExpr {{.*}} 'void (float2x2)' lvalue Function {{.*}} 
'matOrVec' 'void (float2x2)'
-  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 2>' 
<HLSLMatrixTruncation>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x2':'matrix<float, 2, 2>' 
<HLSLMatrixTruncation>
   //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4x4':'matrix<float, 4, 4>' 
<LValueToRValue>
 
 #ifdef ERROR
@@ -284,15 +284,15 @@ export void Case8(float2x3 f23, float4x4 f44, float3x3 
f33, float3x2 f32) {
   //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' 
<LValueToRValue>
 
   matOrVec2(f44);
-  // expected-warning@-1{{implicit conversion truncates matrix: 'float4x4' 
(aka 'matrix<float, 4, 4>') to 'matrix<float, 2, 3>'}}
+  // expected-warning@-1{{implicit conversion truncates matrix: 'float4x4' 
(aka 'matrix<float, 4, 4>') to 'float2x3' (aka 'matrix<float, 2, 3>')}}
   //CHECK: DeclRefExpr {{.*}} 'void (float2x3)' lvalue Function {{.*}} 
'matOrVec2' 'void (float2x3)'
-  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 3>' 
<HLSLMatrixTruncation>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' 
<HLSLMatrixTruncation>
   //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float4x4':'matrix<float, 4, 4>' 
<LValueToRValue>
 
   matOrVec2(f33);
-  // expected-warning@-1{{implicit conversion truncates matrix: 'float3x3' 
(aka 'matrix<float, 3, 3>') to 'matrix<float, 2, 3>'}}
+  // expected-warning@-1{{implicit conversion truncates matrix: 'float3x3' 
(aka 'matrix<float, 3, 3>') to 'float2x3' (aka 'matrix<float, 2, 3>')}}
   //CHECK: DeclRefExpr {{.*}} 'void (float2x3)' lvalue Function {{.*}} 
'matOrVec2' 'void (float2x3)'
-  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'matrix<float, 2, 3>' 
<HLSLMatrixTruncation>
+  //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2x3':'matrix<float, 2, 3>' 
<HLSLMatrixTruncation>
   //CHECK-NEXT: ImplicitCastExpr {{.*}} 'float3x3':'matrix<float, 3, 3>' 
<LValueToRValue>
   
 #ifdef ERROR
diff --git 
a/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl 
b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
index 2c50b957578ec..6ac3feeb1ea03 100644
--- 
a/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
+++ 
b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixImplicitTruncCastWarnings.hlsl
@@ -2,43 +2,43 @@
 
 export int3x4 trunc_cast(int4x4 i44) {
     int3x4 i34 = i44;
-    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'matrix<int, 3, 4>'}}
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'int3x4' (aka 'matrix<int, 3, 4>')}}
     return i34;
 }
 
 export int4x3 trunc_cast0(int4x4 i44) {
     int4x3 i43 = i44;
-    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'matrix<int, 4, 3>'}}
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'int4x3' (aka 'matrix<int, 4, 3>')}}
     return i43;
 }
 
 export int3x3 trunc_cast1(int4x4 i44) {
     int3x3 i33 = i44;
-    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'matrix<int, 3, 3>'}}
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'int3x3' (aka 'matrix<int, 3, 3>')}}
     return i33;
 }
 
 export int3x2 trunc_cast2(int4x4 i44) {
     int3x2 i32 = i44;
-    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'matrix<int, 3, 2>'}}
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'int3x2' (aka 'matrix<int, 3, 2>')}}
     return i32;
 }
 
 export int2x3 trunc_cast3(int4x4 i44) {
     int2x3 i23 = i44;
-    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'matrix<int, 2, 3>'}}
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'int2x3' (aka 'matrix<int, 2, 3>')}}
     return i23;
 }
 
 export int2x2 trunc_cast4(int4x4 i44) {
     int2x2 i22 = i44;
-    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'matrix<int, 2, 2>'}}
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'int2x2' (aka 'matrix<int, 2, 2>')}}
     return i22;
 }
 
 export int2x1 trunc_cast5(int4x4 i44) {
     int2x1 i21 = i44;
-    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'matrix<int, 2, 1>'}}
+    // expected-warning@-1{{implicit conversion truncates matrix: 'int4x4' 
(aka 'matrix<int, 4, 4>') to 'int2x1' (aka 'matrix<int, 2, 1>')}}
     return i21;
 }
 

>From a036f2502bb619a9f7fa261168b8f8ef1f676fcb Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Tue, 2 Jun 2026 10:36:51 -0400
Subject: [PATCH 08/10] address pr comments

---
 clang/include/clang/Basic/Attr.td           | 1 -
 clang/lib/Sema/SemaHLSL.cpp                 | 2 ++
 clang/test/SemaHLSL/matrix_layout_attr.hlsl | 3 +++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 782aec69622ff..e53d6dd20f824 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5305,7 +5305,6 @@ def HLSLColumnMajor : TypeAttr {
   let LangOpts = [HLSL];
   let Documentation = [HLSLMatrixLayoutDocs];
 }
-def : MutualExclusions<[HLSLRowMajor, HLSLColumnMajor]>;
 
 def RandomizeLayout : InheritableAttr {
   let Spellings = [GCC<"randomize_layout">];
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 70e0feab29e8f..42621ae87da19 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2745,12 +2745,14 @@ Attr *SemaHLSL::buildMatrixLayoutTypeAttr(QualType T, 
const ParsedAttr &AL) {
     if (ExistingKind == AttrK) {
       Diag(AL.getLoc(), diag::warn_duplicate_attribute_exact)
           << AL.getAttrName();
+      Diag(AL.getLoc(), diag::note_previous_attribute);
       return nullptr;
     }
     IdentifierInfo *ExistingII = &Ctx.Idents.get(
         ExistingKind == attr::HLSLRowMajor ? "row_major" : "column_major");
     Diag(AL.getLoc(), diag::err_hlsl_matrix_layout_conflict)
         << AL.getAttrName() << ExistingII;
+    Diag(AL.getLoc(), diag::note_conflicting_attribute);
     AL.setInvalid();
     return nullptr;
   }
diff --git a/clang/test/SemaHLSL/matrix_layout_attr.hlsl 
b/clang/test/SemaHLSL/matrix_layout_attr.hlsl
index a5c3cc38027b0..3e953f07557e6 100644
--- a/clang/test/SemaHLSL/matrix_layout_attr.hlsl
+++ b/clang/test/SemaHLSL/matrix_layout_attr.hlsl
@@ -102,13 +102,16 @@ struct S2 {
 typedef row_major float ScalarRM;
 
 // Invalid: conflicting row_major and column_major on same decl.
+// expected-note@+2 {{conflicting attribute is here}}
 // expected-error@+1 {{'column_major' and 'row_major' attributes are not 
compatible}}
 row_major column_major float3x3 conflict_mat;
 
 // Invalid: duplicate row_major.
+// expected-note@+2 {{previous attribute is here}}
 // expected-warning@+1 {{attribute 'row_major' is already applied}}
 row_major row_major float3x3 dup_rm_mat;
 
 // Invalid: duplicate column_major.
+// expected-note@+2 {{previous attribute is here}}
 // expected-warning@+1 {{attribute 'column_major' is already applied}}
 column_major column_major float4x4 dup_cm_mat;

>From ade7b3ccf22f1260db80b3ae387f95c1d296b4d3 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Tue, 2 Jun 2026 21:11:43 -0400
Subject: [PATCH 09/10] address pr comments

---
 clang/lib/Sema/SemaHLSL.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 42621ae87da19..e5fbbb6094ba0 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2722,14 +2722,14 @@ static bool findExistingMatrixLayoutMarker(QualType T,
 }
 
 Attr *SemaHLSL::buildMatrixLayoutTypeAttr(QualType T, const ParsedAttr &AL) {
+  if (T.isNull())
+    return nullptr;
+
   ASTContext &Ctx = getASTContext();
   attr::Kind AttrK = AL.getKind() == ParsedAttr::AT_HLSLRowMajor
                          ? attr::HLSLRowMajor
                          : attr::HLSLColumnMajor;
 
-  if (T.isNull())
-    return nullptr;
-
   // For non-dependent types, the operand must be a matrix (or array of
   // matrices).
   if (!T->isDependentType() && !isMatrixOrArrayOfMatrix(Ctx, T)) {
@@ -2762,6 +2762,12 @@ Attr *SemaHLSL::buildMatrixLayoutTypeAttr(QualType T, 
const ParsedAttr &AL) {
   return ::new (Ctx) HLSLColumnMajorAttr(Ctx, AL);
 }
 
+// Re-validates an HLSL `row_major` / `column_major` attribute after template
+// substitution. The parse-time check in `buildMatrixLayoutTypeAttr` is skipped
+// for dependent types; `TransformAttributedType` calls this once the type is
+// concrete. Returns `true` (and emits a diagnostic) if the substituted type is
+// not a matrix or array of matrices, signaling the caller to abort the
+// transform.
 bool SemaHLSL::diagnoseMatrixLayoutInstantiation(attr::Kind K, QualType T,
                                                  SourceLocation Loc) {
   if (K != attr::HLSLRowMajor && K != attr::HLSLColumnMajor)

>From e24bf9590c75c3dc60be45607eb7c87e12eb4b62 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Wed, 3 Jun 2026 09:55:40 -0400
Subject: [PATCH 10/10] update matrix-layout-attr-overrides-default.hlsl to be
 singular seperate run lines.

---
 .../CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl 
b/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl
index 6536a4341d3e9..8f2bace5740d9 100644
--- a/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl
+++ b/clang/test/CodeGenHLSL/matrix-layout-attr-overrides-default.hlsl
@@ -1,9 +1,5 @@
-// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -fmatrix-memory-layout=column-major -o - | FileCheck %s
-// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -fmatrix-memory-layout=row-major -o - | FileCheck %s
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple 
dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes 
-fmatrix-memory-layout=column-major -o - | FileCheck %s
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple 
dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes 
-fmatrix-memory-layout=row-major -o - | FileCheck %s
 
 // Verifies that a per-decl `[[hlsl::row_major]]` / `[[hlsl::column_major]]`
 // (spelled `row_major` / `column_major` in HLSL) overrides the

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to