[clang] [HLSL][Matrix] Add support for Matrix element and trunc Casts (PR #168915)

Farzon Lotfi via cfe-commits Thu, 20 Nov 2025 10:06:23 -0800

https://github.com/farzonl updated 
https://github.com/llvm/llvm-project/pull/168915


>From 068d85c8c0542d434a0b6a006329d143fdc7535b Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Thu, 20 Nov 2025 12:16:26 -0500
Subject: [PATCH 1/2] [HLSL][Matrix] Add support for Matrix element and trunc
 Casts

fixes #168737
fixes #168755

This change fixes adds support for Matrix truncations
via the ICK_HLSL_Matrix_Truncation enum. That ends up being
most of the files changed.

It also allows Matrix as an HLSL Elementwise cast as long as the
cast does not perform a shape transformation ie  3x2 to 2x3.

Tests for the new elementwise and truncation behavior were added.
As well as sema tests to make sure we error n the shape transformation
cast.

I am punting right now on the ConstExpr Matrix support.
That will need to be addressed later. Will file a seperate issue for
that if reviewers agree it can wait.
---
 clang/include/clang/AST/OperationKinds.def    |   3 +
 clang/include/clang/Sema/Overload.h           |   3 +
 clang/lib/AST/Expr.cpp                        |   1 +
 clang/lib/AST/ExprConstant.cpp                |  13 ++
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp          |   2 +
 clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp   |   1 +
 clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp  |   1 +
 clang/lib/CodeGen/CGExpr.cpp                  |   1 +
 clang/lib/CodeGen/CGExprAgg.cpp               |   3 +-
 clang/lib/CodeGen/CGExprComplex.cpp           |   1 +
 clang/lib/CodeGen/CGExprConstant.cpp          |   1 +
 clang/lib/CodeGen/CGExprScalar.cpp            |  35 +++-
 clang/lib/Sema/SemaExprCXX.cpp                |  22 ++-
 clang/lib/Sema/SemaHLSL.cpp                   |   5 +-
 clang/lib/Sema/SemaOverload.cpp               |  75 ++++++-
 clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp |   1 +
 .../BasicFeatures/MatrixElementTypeCast.hlsl  | 186 ++++++++++++++++++
 .../BasicFeatures/MatrixTruncation.hlsl       | 156 +++++++++++++++
 .../Types/BuiltinMatrix/MatrixCastErrors.hlsl |  21 ++
 19 files changed, 516 insertions(+), 15 deletions(-)
 create mode 100644 
clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
 create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/MatrixTruncation.hlsl
 create mode 100644 
clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixCastErrors.hlsl

diff --git a/clang/include/clang/AST/OperationKinds.def 
b/clang/include/clang/AST/OperationKinds.def
index c2dca895e8411..8a13ad988403b 100644
--- a/clang/include/clang/AST/OperationKinds.def
+++ b/clang/include/clang/AST/OperationKinds.def
@@ -364,6 +364,9 @@ CAST_OPERATION(IntToOCLSampler)
 // Truncate a vector type by dropping elements from the end (HLSL only).
 CAST_OPERATION(HLSLVectorTruncation)
 
+// Truncate a matrix type by dropping elements from the end (HLSL only).
+CAST_OPERATION(HLSLMatrixTruncation)
+
 // Non-decaying array RValue cast (HLSL only).
 CAST_OPERATION(HLSLArrayRValue)
 
diff --git a/clang/include/clang/Sema/Overload.h 
b/clang/include/clang/Sema/Overload.h
index 59bbd0fbd9e95..1ad52cb9da517 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -198,6 +198,9 @@ class Sema;
     /// HLSL vector truncation.
     ICK_HLSL_Vector_Truncation,
 
+    /// HLSL Matrid truncation.
+    ICK_HLSL_Matrix_Truncation,
+
     /// HLSL non-decaying array rvalue cast.
     ICK_HLSL_Array_RValue,
 
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 1d914fa876759..159ea4867857d 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1937,6 +1937,7 @@ bool CastExpr::CastConsistency() const {
   case CK_FixedPointToBoolean:
   case CK_HLSLArrayRValue:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
   CheckNoBasePath:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 74f6e3acb6b39..b7ea213679d2a 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11773,6 +11773,10 @@ bool VectorExprEvaluator::VisitCastExpr(const CastExpr 
*E) {
       Elements.push_back(Val.getVectorElt(I));
     return Success(Elements, E);
   }
+  case CK_HLSLMatrixTruncation: {
+    // TODO: support Expr Constant for Matrix Truncation
+    return Error(E);
+  }
   case CK_HLSLAggregateSplatCast: {
     APValue Val;
     QualType ValTy;
@@ -18011,6 +18015,10 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr 
*E) {
       return Error(E);
     return Success(Val.getVectorElt(0), E);
   }
+  case CK_HLSLMatrixTruncation: {
+    // TODO: support Expr Constant for Matrix Truncation
+    return Error(E);
+  }
   case CK_HLSLElementwiseCast: {
     SmallVector<APValue> SrcVals;
     SmallVector<QualType> SrcTypes;
@@ -18604,6 +18612,10 @@ bool FloatExprEvaluator::VisitCastExpr(const CastExpr 
*E) {
       return Error(E);
     return Success(Val.getVectorElt(0), E);
   }
+  case CK_HLSLMatrixTruncation: {
+    // TODO: support Expr Constant for Matrix Truncation
+    return Error(E);
+  }
   case CK_HLSLElementwiseCast: {
     SmallVector<APValue> SrcVals;
     SmallVector<QualType> SrcTypes;
@@ -18761,6 +18773,7 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr 
*E) {
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
     llvm_unreachable("invalid cast kind for complex value");
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 8607558c1cf7d..abfbca16cd60b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -188,6 +188,7 @@ Address CIRGenFunction::emitPointerWithAlignment(const Expr 
*expr,
     case CK_HLSLArrayRValue:
     case CK_HLSLElementwiseCast:
     case CK_HLSLVectorTruncation:
+    case CK_HLSLMatrixTruncation:
     case CK_IntToOCLSampler:
     case CK_IntegralCast:
     case CK_IntegralComplexCast:
@@ -1279,6 +1280,7 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) {
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLArrayRValue:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
index 9ed920085c8c6..fe06f8cc2c430 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
@@ -534,6 +534,7 @@ mlir::Value ComplexExprEmitter::emitCast(CastKind ck, Expr 
*op,
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLArrayRValue:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index 6af87a0159f0a..7ce02f9b42af4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -1012,6 +1012,7 @@ class ConstExprEmitter
     case CK_MatrixCast:
     case CK_HLSLArrayRValue:
     case CK_HLSLVectorTruncation:
+    case CK_HLSLMatrixTruncation:
     case CK_HLSLElementwiseCast:
     case CK_HLSLAggregateSplatCast:
       return {};
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index f2451b16e78be..1737301c67021 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5744,6 +5744,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) 
{
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLArrayRValue:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 67b5f919d1b2a..7cc4d6c8f06f6 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -1036,7 +1036,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
   case CK_ZeroToOCLOpaqueType:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
-
+  case CK_HLSLMatrixTruncation:
   case CK_IntToOCLSampler:
   case CK_FloatingToFixedPoint:
   case CK_FixedPointToFloating:
@@ -1550,6 +1550,7 @@ static bool castPreservesZero(const CastExpr *CE) {
   case CK_NonAtomicToAtomic:
   case CK_AtomicToNonAtomic:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
     return true;
diff --git a/clang/lib/CodeGen/CGExprComplex.cpp 
b/clang/lib/CodeGen/CGExprComplex.cpp
index f8a946a76554a..e6683d4c931b8 100644
--- a/clang/lib/CodeGen/CGExprComplex.cpp
+++ b/clang/lib/CodeGen/CGExprComplex.cpp
@@ -621,6 +621,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, 
Expr *Op,
   case CK_IntegralToFixedPoint:
   case CK_MatrixCast:
   case CK_HLSLVectorTruncation:
+  case CK_HLSLMatrixTruncation:
   case CK_HLSLArrayRValue:
   case CK_HLSLElementwiseCast:
   case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp 
b/clang/lib/CodeGen/CGExprConstant.cpp
index 6407afc3d9447..0eec4dba4824a 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1333,6 +1333,7 @@ class ConstExprEmitter
     case CK_ZeroToOCLOpaqueType:
     case CK_MatrixCast:
     case CK_HLSLVectorTruncation:
+    case CK_HLSLMatrixTruncation:
     case CK_HLSLArrayRValue:
     case CK_HLSLElementwiseCast:
     case CK_HLSLAggregateSplatCast:
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 714192db1b15c..a9e2ebdffa59a 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2422,9 +2422,27 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction 
&CGF, LValue SrcVal,
     }
     return V;
   }
+  if (auto *MatTy = DestTy->getAs<ConstantMatrixType>()) {
+    assert(LoadList.size() >= MatTy->getNumElementsFlattened() &&
+           "Flattened type on RHS must have the same number or more elements "
+           "than vector on LHS.");
+    llvm::Value *V =
+        CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
+    // write to V.
+    for (unsigned I = 0, E = MatTy->getNumElementsFlattened(); I < E; I++) {
+      RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
+      assert(RVal.isScalar() &&
+             "All flattened source values should be scalars.");
+      llvm::Value *Cast =
+          CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(),
+                                   MatTy->getElementType(), Loc);
+      V = CGF.Builder.CreateInsertElement(V, Cast, I);
+    }
+    return V;
+  }
   // if its a builtin just do an extract element or load.
   assert(DestTy->isBuiltinType() &&
-         "Destination type must be a vector or builtin type.");
+         "Destination type must be a vector, matrix, or builtin type.");
   RValue RVal = CGF.EmitLoadOfLValue(LoadList[0], Loc);
   assert(RVal.isScalar() && "All flattened source values should be scalars.");
   return CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[0].getType(),
@@ -2954,6 +2972,21 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy);
     return Builder.CreateExtractElement(Vec, Zero, "cast.vtrunc");
   }
+  case CK_HLSLMatrixTruncation: {
+    assert((DestTy->isMatrixType() || DestTy->isBuiltinType()) &&
+           "Destination type must be a matrix or builtin type.");
+    Value *Mat = Visit(E);
+    if (auto *MatTy = DestTy->getAs<ConstantMatrixType>()) {
+      SmallVector<int> Mask;
+      unsigned NumElts = MatTy->getNumElementsFlattened();
+      for (unsigned I = 0; I != NumElts; ++I)
+        Mask.push_back(I);
+
+      return Builder.CreateShuffleVector(Mat, Mask, "trunc");
+    }
+    llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy);
+    return Builder.CreateExtractElement(Mat, Zero, "cast.mtrunc");
+  }
   case CK_HLSLElementwiseCast: {
     RValue RV = CGF.EmitAnyExpr(E);
     SourceLocation Loc = CE->getExprLoc();
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index dc7ed4e9a48bc..be3ac296f2597 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -5197,6 +5197,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType 
ToType,
   case ICK_Incompatible_Pointer_Conversion:
   case ICK_HLSL_Array_RValue:
   case ICK_HLSL_Vector_Truncation:
+  case ICK_HLSL_Matrix_Truncation:
   case ICK_HLSL_Vector_Splat:
     llvm_unreachable("Improper second standard conversion");
   }
@@ -5204,12 +5205,10 @@ Sema::PerformImplicitConversion(Expr *From, QualType 
ToType,
   if (SCS.Dimension != ICK_Identity) {
     // If SCS.Element is not ICK_Identity the To and From types must be HLSL
     // vectors or matrices.
-
-    // TODO: Support HLSL matrices.
-    assert((!From->getType()->isMatrixType() && !ToType->isMatrixType()) &&
-           "Dimension conversion for matrix types is not implemented yet.");
-    assert((ToType->isVectorType() || ToType->isBuiltinType()) &&
-           "Dimension conversion output must be vector or scalar type.");
+    assert(
+        (ToType->isVectorType() || ToType->isConstantMatrixType() ||
+         ToType->isBuiltinType()) &&
+        "Dimension conversion output must be vector, matrix, or scalar type.");
     switch (SCS.Dimension) {
     case ICK_HLSL_Vector_Splat: {
       // Vector splat from any arithmetic type to a vector.
@@ -5235,6 +5234,17 @@ Sema::PerformImplicitConversion(Expr *From, QualType 
ToType,
 
       break;
     }
+    case ICK_HLSL_Matrix_Truncation: {
+      auto *FromMat = From->getType()->castAs<ConstantMatrixType>();
+      QualType TruncTy = FromMat->getElementType();
+      if (auto *ToMat = ToType->getAs<ConstantMatrixType>())
+        TruncTy = Context.getConstantMatrixType(TruncTy, ToMat->getNumRows(),
+                                                ToMat->getNumColumns());
+      From = ImpCastExprToType(From, TruncTy, CK_HLSLMatrixTruncation,
+                               From->getValueKind())
+                 .get();
+      break;
+    }
     case ICK_Identity:
     default:
       llvm_unreachable("Improper element standard conversion");
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 5555916c2536f..168bfc3da99e0 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3728,7 +3728,6 @@ bool SemaHLSL::CanPerformAggregateSplatCast(Expr *Src, 
QualType DestTy) {
 }
 
 // Can we perform an HLSL Elementwise cast?
-// TODO: update this code when matrices are added; see issue #88060
 bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) {
 
   // Don't handle casts where LHS and RHS are any combination of scalar/vector
@@ -3741,6 +3740,10 @@ bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, 
QualType DestTy) {
       (DestTy->isScalarType() || DestTy->isVectorType()))
     return false;
 
+  if (SrcTy->isConstantMatrixType() &&
+      (DestTy->isScalarType() || DestTy->isConstantMatrixType()))
+    return false;
+
   llvm::SmallVector<QualType> DestTypes;
   BuildFlattenedTypeList(DestTy, DestTypes);
   llvm::SmallVector<QualType> SrcTypes;
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 37f351174e3d0..f912c2431fc6f 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -162,6 +162,7 @@ ImplicitConversionRank 
clang::GetConversionRank(ImplicitConversionKind Kind) {
       ICR_C_Conversion_Extension,
       ICR_Conversion,
       ICR_HLSL_Dimension_Reduction,
+      ICR_HLSL_Dimension_Reduction,
       ICR_Conversion,
       ICR_HLSL_Scalar_Widening,
   };
@@ -224,6 +225,7 @@ static const char 
*GetImplicitConversionName(ImplicitConversionKind Kind) {
       "Incompatible pointer conversion",
       "Fixed point conversion",
       "HLSL vector truncation",
+      "HLSL matrix truncation",
       "Non-decaying array conversion",
       "HLSL vector splat",
   };
@@ -2046,9 +2048,10 @@ static bool IsFloatingPointConversion(Sema &S, QualType 
FromType,
   return true;
 }
 
-static bool IsVectorElementConversion(Sema &S, QualType FromType,
-                                      QualType ToType,
-                                      ImplicitConversionKind &ICK, Expr *From) 
{
+static bool IsVectorOrMatrixElementConversion(Sema &S, QualType FromType,
+                                              QualType ToType,
+                                              ImplicitConversionKind &ICK,
+                                              Expr *From) {
   if (S.Context.hasSameUnqualifiedType(FromType, ToType))
     return true;
 
@@ -2088,6 +2091,59 @@ static bool IsVectorElementConversion(Sema &S, QualType 
FromType,
   return false;
 }
 
+/// Determine whether the conversion from FromType to ToType is a valid
+/// matrix conversion.
+///
+/// \param ICK Will be set to the matrix conversion kind, if this is a matrix
+/// conversion.
+static bool IsMatrixConversion(Sema &S, QualType FromType, QualType ToType,
+                               ImplicitConversionKind &ICK,
+                               ImplicitConversionKind &ElConv, Expr *From,
+                               bool InOverloadResolution, bool CStyle) {
+  // The non HLSL Matrix conversion rules are not clear.
+  if (!S.getLangOpts().HLSL)
+    return false;
+
+  auto *ToMatrixType = ToType->getAs<ConstantMatrixType>();
+  auto *FromMatrixType = FromType->getAs<ConstantMatrixType>();
+
+  // If both arguments are vectors, handle possible vector truncation and
+  // element conversion.
+  if (ToMatrixType && FromMatrixType) {
+    unsigned FromCols = FromMatrixType->getNumColumns();
+    unsigned ToCols = ToMatrixType->getNumColumns();
+    if (FromCols < ToCols)
+      return false;
+
+    unsigned FromRows = FromMatrixType->getNumRows();
+    unsigned ToRows = ToMatrixType->getNumRows();
+    if (FromRows < ToRows)
+      return false;
+
+    unsigned FromElts = FromMatrixType->getNumElementsFlattened();
+    unsigned ToElts = ToMatrixType->getNumElementsFlattened();
+    if (FromElts == ToElts)
+      ElConv = ICK_Identity;
+    else
+      ElConv = ICK_HLSL_Matrix_Truncation;
+
+    QualType FromElTy = FromMatrixType->getElementType();
+    QualType ToElTy = ToMatrixType->getElementType();
+    if (S.Context.hasSameUnqualifiedType(FromElTy, ToElTy))
+      return true;
+    return IsVectorOrMatrixElementConversion(S, FromElTy, ToElTy, ICK, From);
+  }
+  if (FromMatrixType && !ToMatrixType) {
+    ElConv = ICK_HLSL_Matrix_Truncation;
+    QualType FromElTy = FromMatrixType->getElementType();
+    if (S.Context.hasSameUnqualifiedType(FromElTy, ToType))
+      return true;
+    return IsVectorOrMatrixElementConversion(S, FromElTy, ToType, ICK, From);
+  }
+
+  return false;
+}
+
 /// Determine whether the conversion from FromType to ToType is a valid
 /// vector conversion.
 ///
@@ -2127,14 +2183,14 @@ static bool IsVectorConversion(Sema &S, QualType 
FromType, QualType ToType,
       QualType ToElTy = ToExtType->getElementType();
       if (S.Context.hasSameUnqualifiedType(FromElTy, ToElTy))
         return true;
-      return IsVectorElementConversion(S, FromElTy, ToElTy, ICK, From);
+      return IsVectorOrMatrixElementConversion(S, FromElTy, ToElTy, ICK, From);
     }
     if (FromExtType && !ToExtType) {
       ElConv = ICK_HLSL_Vector_Truncation;
       QualType FromElTy = FromExtType->getElementType();
       if (S.Context.hasSameUnqualifiedType(FromElTy, ToType))
         return true;
-      return IsVectorElementConversion(S, FromElTy, ToType, ICK, From);
+      return IsVectorOrMatrixElementConversion(S, FromElTy, ToType, ICK, From);
     }
     // Fallthrough for the case where ToType is a vector and FromType is not.
   }
@@ -2161,7 +2217,8 @@ static bool IsVectorConversion(Sema &S, QualType 
FromType, QualType ToType,
       if (S.getLangOpts().HLSL) {
         ElConv = ICK_HLSL_Vector_Splat;
         QualType ToElTy = ToExtType->getElementType();
-        return IsVectorElementConversion(S, FromType, ToElTy, ICK, From);
+        return IsVectorOrMatrixElementConversion(S, FromType, ToElTy, ICK,
+                                                 From);
       }
       ICK = ICK_Vector_Splat;
       return true;
@@ -2460,6 +2517,11 @@ static bool IsStandardConversion(Sema &S, Expr* From, 
QualType ToType,
     SCS.Second = SecondICK;
     SCS.Dimension = DimensionICK;
     FromType = ToType.getUnqualifiedType();
+  } else if (IsMatrixConversion(S, FromType, ToType, SecondICK, DimensionICK,
+                                From, InOverloadResolution, CStyle)) {
+    SCS.Second = SecondICK;
+    SCS.Dimension = DimensionICK;
+    FromType = ToType.getUnqualifiedType();
   } else if (!S.getLangOpts().CPlusPlus &&
              S.Context.typesAreCompatible(ToType, FromType)) {
     // Compatible conversions (Clang extension for C function overloading)
@@ -6237,6 +6299,7 @@ static bool CheckConvertedConstantConversions(Sema &S,
   case ICK_Incompatible_Pointer_Conversion:
   case ICK_Fixed_Point_Conversion:
   case ICK_HLSL_Vector_Truncation:
+  case ICK_HLSL_Matrix_Truncation:
     return false;
 
   case ICK_Lvalue_To_Rvalue:
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp 
b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index 4ddf8fd5b4b0f..db27c06cd18a3 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -560,6 +560,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const 
Expr *Ex,
       case CK_VectorSplat:
       case CK_HLSLElementwiseCast:
       case CK_HLSLAggregateSplatCast:
+      case CK_HLSLMatrixTruncation:
       case CK_HLSLVectorTruncation: {
         QualType resultType = CastE->getType();
         if (CastE->isGLValue())
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
new file mode 100644
index 0000000000000..081b8013efcbc
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl
@@ -0,0 +1,186 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// RUN: %clang_cc1 -finclude-default-header -triple 
dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes 
-fnative-half-type -fnative-int16-type -o - %s | FileCheck %s
+
+
+// CHECK-LABEL: define hidden noundef <6 x i32> 
@_Z22elementwise_type_cast0u11matrix_typeILm3ELm2EfE(
+// CHECK-SAME: <6 x float> noundef nofpclass(nan inf) [[F32:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca [6 x float], align 4
+// CHECK-NEXT:    [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <6 x float> [[F32]], ptr [[F32_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <6 x float>, ptr [[F32_ADDR]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi <6 x float> [[TMP0]] to <6 x i32>
+// CHECK-NEXT:    store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast0(float3x2 f32) {
+    int3x2 i32 = (int3x2)f32;
+    return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> 
@_Z22elementwise_type_cast1u11matrix_typeILm3ELm2EsE(
+// CHECK-SAME: <6 x i16> noundef [[I16_32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I16_32_ADDR:%.*]] = alloca [6 x i16], align 2
+// CHECK-NEXT:    [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <6 x i16> [[I16_32]], ptr [[I16_32_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <6 x i16>, ptr [[I16_32_ADDR]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = sext <6 x i16> [[TMP0]] to <6 x i32>
+// CHECK-NEXT:    store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast1(int16_t3x2 i16_32) {
+    int3x2 i32 = (int3x2)i16_32;
+    return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> 
@_Z22elementwise_type_cast2u11matrix_typeILm3ELm2ElE(
+// CHECK-SAME: <6 x i64> noundef [[I64_32:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I64_32_ADDR:%.*]] = alloca [6 x i64], align 8
+// CHECK-NEXT:    [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <6 x i64> [[I64_32]], ptr [[I64_32_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load <6 x i64>, ptr [[I64_32_ADDR]], align 8
+// CHECK-NEXT:    [[CONV:%.*]] = trunc <6 x i64> [[TMP0]] to <6 x i32>
+// CHECK-NEXT:    store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast2(int64_t3x2 i64_32) {
+    int3x2 i32 = (int3x2)i64_32;
+    return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i16> 
@_Z22elementwise_type_cast3u11matrix_typeILm2ELm3EDhE(
+// CHECK-SAME: <6 x half> noundef nofpclass(nan inf) [[H23:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[H23_ADDR:%.*]] = alloca [6 x half], align 2
+// CHECK-NEXT:    [[I23:%.*]] = alloca [6 x i16], align 2
+// CHECK-NEXT:    store <6 x half> [[H23]], ptr [[H23_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load <6 x half>, ptr [[H23_ADDR]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi <6 x half> [[TMP0]] to <6 x i16>
+// CHECK-NEXT:    store <6 x i16> [[CONV]], ptr [[I23]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i16>, ptr [[I23]], align 2
+// CHECK-NEXT:    ret <6 x i16> [[TMP1]]
+//
+int16_t2x3 elementwise_type_cast3(half2x3 h23) {
+    int16_t2x3 i23 = (int16_t2x3)h23;
+    return i23;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> 
@_Z22elementwise_type_cast4u11matrix_typeILm3ELm2EdE(
+// CHECK-SAME: <6 x double> noundef nofpclass(nan inf) [[D32:%.*]]) #[[ATTR0]] 
{
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[D32_ADDR:%.*]] = alloca [6 x double], align 8
+// CHECK-NEXT:    [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <6 x double> [[D32]], ptr [[D32_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load <6 x double>, ptr [[D32_ADDR]], align 8
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi <6 x double> [[TMP0]] to <6 x i32>
+// CHECK-NEXT:    store <6 x i32> [[CONV]], ptr [[I32]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+int3x2 elementwise_type_cast4(double3x2 d32) {
+    int3x2 i32 = (int3x2)d32;
+    return i32;
+}
+
+// CHECK-LABEL: define hidden void @_Z5call2v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A:%.*]] = alloca [2 x [1 x i32]], align 4
+// CHECK-NEXT:    [[B:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [2 x [1 x i32]], align 4
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr 
align 4 @__const._Z5call2v.A, i32 8, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_TEMP]], 
ptr align 4 [[A]], i32 8, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr 
[[AGG_TEMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr 
[[AGG_TEMP]], i32 0, i32 1, i32 0
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[FLATCAST_TMP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[GEP]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP0]], i32 
[[TMP1]], i64 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> [[TMP2]], i32 
[[TMP3]], i64 1
+// CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[B]], align 4
+// CHECK-NEXT:    ret void
+//
+void call2() {
+  int A[2][1] = {{1},{2}};
+  int2x1 B = (int2x1)A;
+}
+
+struct S {
+  int X;
+  float Y;
+};
+
+// CHECK-LABEL: define hidden void @_Z5call3v(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT:    [[A:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <2 x i32>, align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr 
align 1 @__const._Z5call3v.s, i32 8, i1 false)
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], 
ptr align 1 [[S]], i32 8, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr 
[[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr 
[[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[FLATCAST_TMP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[GEP]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP0]], i32 
[[TMP1]], i64 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[GEP1]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP3]] to i32
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> [[TMP2]], i32 
[[CONV]], i64 1
+// CHECK-NEXT:    store <2 x i32> [[TMP4]], ptr [[A]], align 4
+// CHECK-NEXT:    ret void
+//
+void call3() {
+  S s = {1, 2.0};
+  int2x1 A = (int2x1)s;
+}
+
+struct BFields {
+  double D;
+  int E: 15;
+  int : 8;
+  float F;
+};
+
+struct Derived : BFields {
+  int G;
+};
+
+// CHECK-LABEL: define hidden void @_Z5call47Derived(
+// CHECK-SAME: ptr noundef byval([[STRUCT_DERIVED:%.*]]) align 1 [[D:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT:    [[AGG_TEMP:%.*]] = alloca [[STRUCT_DERIVED]], align 1
+// CHECK-NEXT:    [[FLATCAST_TMP:%.*]] = alloca <4 x i32>, align 4
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], 
ptr align 1 [[D]], i32 19, i1 false)
+// CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], ptr 
[[AGG_TEMP]], i32 0, i32 0
+// CHECK-NEXT:    [[E:%.*]] = getelementptr inbounds nuw 
[[STRUCT_BFIELDS:%.*]], ptr [[GEP]], i32 0, i32 1
+// CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], 
ptr [[AGG_TEMP]], i32 0, i32 0, i32 0
+// CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], 
ptr [[AGG_TEMP]], i32 0, i32 0, i32 2
+// CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [[STRUCT_DERIVED]], 
ptr [[AGG_TEMP]], i32 0, i32 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[FLATCAST_TMP]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[GEP1]], align 8
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[TMP1]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP0]], i32 
[[CONV]], i64 0
+// CHECK-NEXT:    [[BF_LOAD:%.*]] = load i24, ptr [[E]], align 1
+// CHECK-NEXT:    [[BF_SHL:%.*]] = shl i24 [[BF_LOAD]], 9
+// CHECK-NEXT:    [[BF_ASHR:%.*]] = ashr i24 [[BF_SHL]], 9
+// CHECK-NEXT:    [[BF_CAST:%.*]] = sext i24 [[BF_ASHR]] to i32
+// CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 
[[BF_CAST]], i64 1
+// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[GEP2]], align 4
+// CHECK-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP4]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP3]], i32 
[[CONV4]], i64 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[GEP3]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 
[[TMP6]], i64 3
+// CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[A]], align 4
+// CHECK-NEXT:    ret void
+//
+void call4(Derived D) {
+  int2x2 A = (int2x2)D;
+}
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixTruncation.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixTruncation.hlsl
new file mode 100644
index 0000000000000..f16d01e1d12ea
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixTruncation.hlsl
@@ -0,0 +1,156 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes 
-emit-llvm -finclude-default-header -o - %s | FileCheck %s
+
+// CHECK-LABEL: define hidden noundef <12 x i32> 
@_Z10trunc_castu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I34:%.*]] = alloca [12 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    store <12 x i32> [[TRUNC]], ptr [[I34]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <12 x i32>, ptr [[I34]], align 4
+// CHECK-NEXT:    ret <12 x i32> [[TMP1]]
+//
+ int3x4 trunc_cast(int4x4 i44) {
+    int3x4 i34 = (int3x4)i44;
+    return i34;
+}
+
+// CHECK-LABEL: define hidden noundef <12 x i32> 
@_Z11trunc_cast0u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I43:%.*]] = alloca [12 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
+// CHECK-NEXT:    ret <12 x i32> [[TMP1]]
+//
+ int4x3 trunc_cast0(int4x4 i44) {
+    int4x3 i43 = (int4x3)i44;
+    return i43;
+}
+
+// CHECK-LABEL: define hidden noundef <9 x i32> 
@_Z11trunc_cast1u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I33:%.*]] = alloca [9 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 
i32 8>
+// CHECK-NEXT:    store <9 x i32> [[TRUNC]], ptr [[I33]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <9 x i32>, ptr [[I33]], align 4
+// CHECK-NEXT:    ret <9 x i32> [[TMP1]]
+//
+ int3x3 trunc_cast1(int4x4 i44) {
+    int3x3 i33 = (int3x3)i44;
+    return i33;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> 
@_Z11trunc_cast2u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I32:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT:    store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+ int3x2 trunc_cast2(int4x4 i44) {
+    int3x2 i32 = (int3x2)i44;
+    return i32;
+}
+
+// CHECK-LABEL: define hidden noundef <6 x i32> 
@_Z11trunc_cast3u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I23:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT:    store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
+// CHECK-NEXT:    ret <6 x i32> [[TMP1]]
+//
+ int2x3 trunc_cast3(int4x4 i44) {
+    int2x3 i23 = (int2x3)i44;
+    return i23;
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i32> 
@_Z11trunc_cast4u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I22:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    store <4 x i32> [[TRUNC]], ptr [[I22]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[I22]], align 4
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+ int2x2 trunc_cast4(int4x4 i44) {
+    int2x2 i22 = (int2x2)i44;
+    return i22;
+}
+
+// CHECK-LABEL: define hidden noundef <2 x i32> 
@_Z11trunc_cast5u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I21:%.*]] = alloca [2 x i32], align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
+// CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+//
+ int2x1 trunc_cast5(int4x4 i44) {
+    int2x1 i21 = (int2x1)i44;
+    return i21;
+}
+
+// CHECK-LABEL: define hidden noundef i32 
@_Z11trunc_cast6u11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[CAST_MTRUNC:%.*]] = extractelement <16 x i32> [[TMP0]], 
i32 0
+// CHECK-NEXT:    store i32 [[CAST_MTRUNC]], ptr [[I1]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I1]], align 4
+// CHECK-NEXT:    ret i32 [[TMP1]]
+//
+ int trunc_cast6(int4x4 i44) {
+    int i1 = (int)i44;
+    return i1;
+}
+
+// CHECK-LABEL: define hidden noundef i32 
@_Z16trunc_multi_castu11matrix_typeILm4ELm4EiE(
+// CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[I1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
+// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    [[CAST_MTRUNC:%.*]] = extractelement <12 x i32> [[TRUNC]], 
i32 0
+// CHECK-NEXT:    store i32 [[CAST_MTRUNC]], ptr [[I1]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I1]], align 4
+// CHECK-NEXT:    ret i32 [[TMP1]]
+//
+ int trunc_multi_cast(int4x4 i44) {
+    int i1 = (int)(int3x4)i44;
+    return i1;
+}
diff --git a/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixCastErrors.hlsl 
b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixCastErrors.hlsl
new file mode 100644
index 0000000000000..59d432cd3eb00
--- /dev/null
+++ b/clang/test/SemaHLSL/Types/BuiltinMatrix/MatrixCastErrors.hlsl
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library 
-finclude-default-header -std=hlsl202x -verify %s
+
+// Note column is too large
+export int3x2 shape_cast_error(float2x3 f23) {
+    int3x2 i32 = (int3x2)f23;
+    // expected-error@-1 {{conversion between matrix types 'int3x2' (aka 
'matrix<int, 3, 2>') and 'matrix<float, 2, 3>' of different size is not 
allowed}}
+    return i32;
+}
+// Note row is too large
+export int2x3 shape_cast_error2(float3x2 f32) {
+    int2x3 i23 = (int2x3)f32;
+    // expected-error@-1 {{conversion between matrix types 'int2x3' (aka 
'matrix<int, 2, 3>') and 'matrix<float, 3, 2>' of different size is not 
allowed}}
+    return i23;
+}
+
+// Note do the type change independent of the shape should still error
+export int2x3 shape_cast_error3(float3x2 f32) {
+    int2x3 i23 = (int3x2)f32;
+    // expected-error@-1 {{cannot initialize a variable of type 'matrix<[...], 
2, 3>' with an rvalue of type 'matrix<[...], 3, 2>}}
+    return i23;
+}

>From 398fbf7cb7f359559f945ee4a5d32a283f6aa7d5 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <[email protected]>
Date: Thu, 20 Nov 2025 13:05:55 -0500
Subject: [PATCH 2/2] fix ObjC warning

---
 clang/lib/Edit/RewriteObjCFoundationAPI.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp 
b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
index 40f8348241ecc..e8d4660fd36b2 100644
--- a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
+++ b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
@@ -1085,6 +1085,7 @@ static bool rewriteToNumericBoxedExpression(const 
ObjCMessageExpr *Msg,
       llvm_unreachable("OpenCL-specific cast in Objective-C?");
 
     case CK_HLSLVectorTruncation:
+    case CK_HLSLMatrixTruncation:
     case CK_HLSLElementwiseCast:
     case CK_HLSLAggregateSplatCast:
       llvm_unreachable("HLSL-specific cast in Objective-C?");

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL][Matrix] Add support for Matrix element and trunc Casts (PR #168915)

Reply via email to