Author: Helena Kotas Date: 2025-11-25T12:50:25-08:00 New Revision: c475f8e5d809e2109a67c8523472eb28cd7bbb2a
URL: https://github.com/llvm/llvm-project/commit/c475f8e5d809e2109a67c8523472eb28cd7bbb2a DIFF: https://github.com/llvm/llvm-project/commit/c475f8e5d809e2109a67c8523472eb28cd7bbb2a.diff LOG: [HLSL] Update vector swizzle elements individually (#169090) When individual elements of a vector are updated via vector swizzle, it needs to be handled as separate store operations to the individual vector elements. Clang treats vectors as one unit, so if a part of a vector needs to be updated, the whole vector is loaded, some elements modified, and then the whole vector is stored. In HLSL vector elements are handled separately. We need to avoid this load/modify/store sequence to prevent overwriting other vector elements that might be getting updated in parallel. Fixes #152815 Added: clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl Modified: clang/lib/CodeGen/CGExpr.cpp clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index b33772919b8c8..221d0d6016a9c 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2801,18 +2801,49 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, LValue Dst) { llvm::Value *SrcVal = Src.getScalarVal(); Address DstAddr = Dst.getExtVectorAddress(); + const llvm::Constant *Elts = Dst.getExtVectorElts(); if (DstAddr.getElementType()->getScalarSizeInBits() > SrcVal->getType()->getScalarSizeInBits()) SrcVal = Builder.CreateZExt( SrcVal, convertTypeForLoadStore(Dst.getType(), SrcVal->getType())); - // HLSL allows storing to scalar values through ExtVector component LValues. - // To support this we need to handle the case where the destination address is - // a scalar. - if (!DstAddr.getElementType()->isVectorTy()) { - assert(!Dst.getType()->isVectorType() && - "this should only occur for non-vector l-values"); - Builder.CreateStore(SrcVal, DstAddr, Dst.isVolatileQualified()); + if (getLangOpts().HLSL) { + llvm::Type *DestAddrTy = DstAddr.getElementType(); + // HLSL allows storing to scalar values through ExtVector component LValues. + // To support this we need to handle the case where the destination address + // is a scalar. + if (!DestAddrTy->isVectorTy()) { + assert(!Dst.getType()->isVectorType() && + "this should only occur for non-vector l-values"); + Builder.CreateStore(SrcVal, DstAddr, Dst.isVolatileQualified()); + return; + } + + // HLSL allows direct access to vector elements, so storing to individual + // elements of a vector through ExtVector is handled as separate store + // instructions. + // If we are updating multiple elements, Dst and Src are vectors; for + // a single element update they are scalars. + const VectorType *VTy = Dst.getType()->getAs<VectorType>(); + unsigned NumSrcElts = VTy ? VTy->getNumElements() : 1; + CharUnits ElemAlign = CharUnits::fromQuantity( + CGM.getDataLayout().getPrefTypeAlign(DestAddrTy->getScalarType())); + llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0); + + for (unsigned I = 0; I != NumSrcElts; ++I) { + llvm::Value *Val = VTy ? Builder.CreateExtractElement( + SrcVal, llvm::ConstantInt::get(Int32Ty, I)) + : SrcVal; + unsigned FieldNo = getAccessedFieldNo(I, Elts); + Address DstElemAddr = Address::invalid(); + if (FieldNo == 0) + DstElemAddr = DstAddr.withAlignment(ElemAlign); + else + DstElemAddr = Builder.CreateGEP( + DstAddr, {Zero, llvm::ConstantInt::get(Int32Ty, FieldNo)}, + DestAddrTy, ElemAlign); + Builder.CreateStore(Val, DstElemAddr, Dst.isVolatileQualified()); + } return; } @@ -2820,7 +2851,6 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, // value now. llvm::Value *Vec = Builder.CreateLoad(DstAddr, Dst.isVolatileQualified()); llvm::Type *VecTy = Vec->getType(); - const llvm::Constant *Elts = Dst.getExtVectorElts(); if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) { unsigned NumSrcElts = VTy->getNumElements(); diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl index d0ba8f447b732..ec03804ad1a4c 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl @@ -101,10 +101,16 @@ void funky(inout int3 X) { // Call the function with the temporary. // CHECK: call void {{.*}}funky{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) [[ArgTmp]]) -// Shuffle it back. +// Write it back. // CHECK: [[RetVal:%.*]] = load <3 x i32>, ptr [[ArgTmp]] -// CHECK: [[Vxyz:%.*]] = shufflevector <3 x i32> [[RetVal]], <3 x i32> poison, <3 x i32> <i32 2, i32 0, i32 1> -// CHECK: store <3 x i32> [[Vxyz]], ptr [[V]] +// CHECK: [[Src0:%.*]] = extractelement <3 x i32> [[RetVal]], i32 0 +// CHECK: [[PtrY:%.*]] = getelementptr <3 x i32>, ptr %V, i32 0, i32 1 +// CHECK: store i32 [[Src0]], ptr [[PtrY]], align 4 +// CHECK: [[Src1:%.*]] = extractelement <3 x i32> [[RetVal]], i32 1 +// CHECK: [[PtrZ:%.*]] = getelementptr <3 x i32>, ptr %V, i32 0, i32 2 +// CHECK: store i32 [[Src1]], ptr [[PtrZ]], align 4 +// CHECK: [[Src2:%.*]] = extractelement <3 x i32> [[RetVal]], i32 2 +// CHECK: store i32 [[Src2]], ptr %V, align 4 // OPT: ret <3 x i32> <i32 3, i32 1, i32 2> export int3 case4() { diff --git a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl index 7804239edccae..270598265c660 100644 --- a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl +++ b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl @@ -259,9 +259,8 @@ bool AssignBool(bool V) { // CHECK-NEXT: [[B:%.*]] = load i32, ptr [[VAddr]], align 4 // CHECK-NEXT: [[LV1:%.*]] = trunc i32 [[B]] to i1 // CHECK-NEXT: [[D:%.*]] = zext i1 [[LV1]] to i32 -// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[X]], align 8 -// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i32 1 -// CHECK-NEXT: store <2 x i32> [[E]], ptr [[X]], align 8 +// CHECK-NEXT: [[C:%.*]] = getelementptr <2 x i32>, ptr [[X]], i32 0, i32 1 +// CHECK-NEXT: store i32 [[D]], ptr [[C]], align 4 // CHECK-NEXT: ret void void AssignBool2(bool V) { bool2 X = true.xx; @@ -277,10 +276,13 @@ void AssignBool2(bool V) { // CHECK-NEXT: [[Z:%.*]] = load <2 x i32>, ptr [[VAddr]], align 8 // CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[Z]] to <2 x i1> // CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[LV]] to <2 x i32> -// CHECK-NEXT: [[A:%.*]] = load <2 x i32>, ptr [[X]], align 8 -// CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 0, i32 1> -// CHECK-NEXT: store <2 x i32> [[C]], ptr [[X]], align 8 +// CHECK-NEXT: [[V1:%.*]] = extractelement <2 x i32> [[B]], i32 0 +// CHECK-NEXT: store i32 [[V1]], ptr [[X]], align 4 +// CHECK-NEXT: [[V2:%.*]] = extractelement <2 x i32> [[B]], i32 1 +// CHECK-NEXT: [[X2:%.*]] = getelementptr <2 x i32>, ptr [[X]], i32 0, i32 1 +// CHECK-NEXT: store i32 [[V2]], ptr [[X2]], align 4 // CHECK-NEXT: ret void + void AssignBool3(bool2 V) { bool2 X = {true,true}; X.xy = V; @@ -313,10 +315,13 @@ bool2 AccessBools() { // CHECK-NEXT: [[L1:%.*]] = shufflevector <1 x i32> [[L0]], <1 x i32> poison, <3 x i32> zeroinitializer // CHECK-NEXT: [[TruncV:%.*]] = trunc <3 x i32> [[L1]] to <3 x i1> // CHECK-NEXT: [[L2:%.*]] = zext <3 x i1> [[TruncV]] to <3 x i32> -// CHECK-NEXT: [[L3:%.*]] = load <4 x i32>, ptr [[B]], align 16 -// CHECK-NEXT: [[L4:%.*]] = shufflevector <3 x i32> [[L2]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> -// CHECK-NEXT: [[L5:%.*]] = shufflevector <4 x i32> [[L3]], <4 x i32> [[L4]], <4 x i32> <i32 4, i32 5, i32 6, i32 3> -// CHECK-NEXT: store <4 x i32> [[L5]], ptr [[B]], align 16 +// CHECK-NEXT: [[V1:%.*]] = extractelement <3 x i32> [[L2]], i32 0 +// CHECK-NEXT: store i32 [[V1]], ptr %B, align 4 +// CHECK-NEXT: [[V2:%.*]] = extractelement <3 x i32> [[L2]], i32 1 +// CHECK-NEXT: [[B2:%.*]] = getelementptr <4 x i32>, ptr %B, i32 0, i32 1 +// CHECK-NEXT: store i32 [[V2]], ptr [[B2]], align 4 +// CHECK-NEXT: [[V3:%.*]] = extractelement <3 x i32> [[L2]], i32 2 +// CHECK-NEXT: [[B3:%.*]] = getelementptr <4 x i32>, ptr %B, i32 0, i32 2 void BoolSizeMismatch() { bool4 B = {true,true,true,true}; B.xyz = false.xxx; diff --git a/clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl new file mode 100644 index 0000000000000..c632e795098ea --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl @@ -0,0 +1,96 @@ +// RUN: %clang_cc1 -finclude-default-header -fnative-half-type \ +// RUN: -triple dxil-pc-shadermodel6.3-library %s -disable-llvm-passes \ +// RUN: -emit-llvm -o - | FileCheck %s + +// CHECK-LABEL: Single + +// Setup local vars. +// CHECK: [[VecAddr:%.*]] = alloca <3 x i64>, align 32 +// CHECK-NEXT: [[AAddr:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store <3 x i64> %vec, ptr [[VecAddr]], align 32 +// CHECK-NEXT: store i64 %a, ptr [[AAddr]], align 8 + +// Update single element of the vector. +// CHECK-NEXT: [[A:%.*]] = load i64, ptr [[AAddr]], align 8 +// CHECK-NEXT: [[Vy:%.*]] = getelementptr <3 x i64>, ptr [[VecAddr]], i32 0, i32 1 +// CHECK-NEXT: store i64 [[A]], ptr [[Vy]], align 8 + +// Return. +// CHECK-NEXT: [[RetVal:%.*]] = load <3 x i64>, ptr [[VecAddr]], align 32 +// CHECK-NEXT: ret <3 x i64> [[RetVal]] +uint64_t3 Single(uint64_t3 vec, uint64_t a){ + vec.y = a; + return vec; +} + +// CHECK-LABEL: Double + +// Setup local vars. +// CHECK: [[VecAddr:%.*]] = alloca <3 x float>, align 16 +// CHECK-NEXT: [[AAddr:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[BAddr:%.*]] = alloca float, align 4 +// CHECK-NEXT: store <3 x float> %vec, ptr [[VecAddr]], align 16 +// CHECK-NEXT: store float %a, ptr [[AAddr]], align 4 +// CHECK-NEXT: store float %b, ptr [[BAddr]], align 4 + +// Create temporary vector {a, b}. +// CHECK-NEXT: [[A:%.*]] = load float, ptr [[AAddr]], align 4 +// CHECK-NEXT: [[TmpVec0:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0 +// CHECK-NEXT: [[B:%.*]] = load float, ptr [[BAddr]], align 4 +// CHECK-NEXT: [[TmpVec1:%.*]] = insertelement <2 x float> [[TmpVec0]], float [[B]], i32 1 + +// Update two elements of the vector from temporary vector. +// CHECK-NEXT: [[TmpX:%.*]] = extractelement <2 x float> [[TmpVec1]], i32 0 +// CHECK-NEXT: [[VecZ:%.*]] = getelementptr <3 x float>, ptr [[VecAddr]], i32 0, i32 2 +// CHECK-NEXT: store float [[TmpX]], ptr [[VecZ]], align 4 +// CHECK-NEXT: [[TmpY:%.*]] = extractelement <2 x float> [[TmpVec1]], i32 1 +// CHECK-NEXT: [[VecY:%.*]] = getelementptr <3 x float>, ptr [[VecAddr]], i32 0, i32 1 +// CHECK-NEXT: store float [[TmpY]], ptr [[VecY]], align 4 + +// Return. +// CHECK-NEXT: [[RetVal:%.*]] = load <3 x float>, ptr [[VecAddr]], align 16 +// CHECK-NEXT: ret <3 x float> [[RetVal]] +float3 Double(float3 vec, float a, float b) { + vec.zy = {a, b}; + return vec; +} + +// CHECK-LABEL: Shuffle + +// Setup local vars. +// CHECK: [[VecAddr:%.*]] = alloca <4 x half>, align 8 +// CHECK-NEXT: [[AAddr:%.*]] = alloca half, align 2 +// CHECK-NEXT: [[BAddr:%.*]] = alloca half, align 2 +// CHECK-NEXT: store <4 x half> %vec, ptr [[VecAddr]], align 8 +// CHECK-NEXT: store half %a, ptr [[AAddr]], align 2 +// CHECK-NEXT: store half %b, ptr [[BAddr]], align 2 + +// Create temporary vector {a, b, 13.74, a}. +// CHECK-NEXT: [[A:%.*]] = load half, ptr [[AAddr]], align 2 +// CHECK-NEXT: [[TmpVec0:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-NEXT: [[B:%.*]] = load half, ptr [[BAddr]], align 2 +// CHECK-NEXT: [[TmpVec1:%.*]] = insertelement <4 x half> [[TmpVec0]], half [[B]], i32 1 +// CHECK-NEXT: [[TmpVec2:%.*]] = insertelement <4 x half> %vecinit1, half 0xH4ADF, i32 2 +// CHECK-NEXT: [[A:%.*]] = load half, ptr [[AAddr]], align 2 +// CHECK-NEXT: [[TmpVec3:%.*]] = insertelement <4 x half> [[TmpVec2]], half [[A]], i32 3 + +// Update four elements of the vector via mixed up swizzle from the temporary vector. +// CHECK-NEXT: [[TmpX:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 0 +// CHECK-NEXT: [[VecZ:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 2 +// CHECK-NEXT: store half [[TmpX]], ptr [[VecZ]], align 2 +// CHECK-NEXT: [[TmpY:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 1 +// CHECK-NEXT: [[VecW:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 3 +// CHECK-NEXT: store half [[TmpY]], ptr [[VecW]], align 2 +// CHECK-NEXT: [[TmpZ:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 2 +// CHECK-NEXT: store half [[TmpZ]], ptr [[VecAddr]], align 2 +// CHECK-NEXT: [[TmpW:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 3 +// CHECK-NEXT: [[VecY:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 1 +// CHECK-NEXT: store half [[TmpW]], ptr [[VecY]], align 2 + +// Return. +// CHECK-NEXT: [[RetVal:%.*]] = load <4 x half>, ptr [[VecAddr]], align 8 +// CHECK-NEXT: ret <4 x half> [[RetVal]] +half4 Shuffle(half4 vec, half a, half b) { + vec.zwxy = {a, b, 13.74, a}; + return vec; +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
