https://github.com/tcorringham updated https://github.com/llvm/llvm-project/pull/172469
>From 6f2b5ae0758b3f2903801e96b468a6c7b9a353b1 Mon Sep 17 00:00:00 2001 From: Tim Corringham <[email protected]> Date: Mon, 8 Dec 2025 17:49:06 +0000 Subject: [PATCH 1/2] [HLSL] Implement f32tof16() intrinsic Implement the f32tof16() intrinsic, DXIL and SPIRV codegen, and related tests. Fixes #99113 --- clang/include/clang/Basic/Builtins.td | 6 + clang/lib/CodeGen/CGHLSLBuiltins.cpp | 56 ++++++++ .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 21 +++ clang/lib/Sema/SemaHLSL.cpp | 23 +++ .../builtins/f32tof16-builtin.hlsl | 27 ++++ .../CodeGenHLSL/builtins/f32tof16-builtin.ll | 67 +++++++++ clang/test/CodeGenHLSL/builtins/f32tof16.hlsl | 27 ++++ clang/test/CodeGenHLSL/builtins/f32tof16.ll | 67 +++++++++ .../SemaHLSL/BuiltIns/f32tof16-errors.hlsl | 134 ++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsDirectX.td | 3 + llvm/include/llvm/IR/IntrinsicsSPIRV.td | 2 + llvm/lib/Target/DirectX/DXIL.td | 9 ++ .../DirectX/DirectXTargetTransformInfo.cpp | 2 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 3 + llvm/test/CodeGen/DirectX/f32tof16.ll | 57 ++++++++ .../SPIRV/opencl/packhalf2x16-error.ll | 10 ++ llvm/test/CodeGen/SPIRV/packhalf2x16.ll | 15 ++ 17 files changed, 529 insertions(+) create mode 100644 clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl create mode 100644 clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll create mode 100644 clang/test/CodeGenHLSL/builtins/f32tof16.hlsl create mode 100644 clang/test/CodeGenHLSL/builtins/f32tof16.ll create mode 100644 clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl create mode 100644 llvm/test/CodeGen/DirectX/f32tof16.ll create mode 100644 llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll create mode 100644 llvm/test/CodeGen/SPIRV/packhalf2x16.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index aab2418511399..e5fe97f952d06 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5272,6 +5272,12 @@ def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLF32ToF16 : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_f32tof16"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def HLSLDdxCoarse : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_elementwise_ddx_coarse"]; let Attributes = [NoThrow, Const, CustomTypeChecking]; diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 317e64d595243..e0ee9ab47f92f 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -211,6 +211,59 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture"); } +static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF, + const CallExpr *E) { + Value *Op0 = CGF.EmitScalarExpr(E->getArg(0)); + QualType Op0Ty = E->getArg(0)->getType(); + llvm::Type *ResType = CGF.IntTy; + uint64_t NumElements = 0; + if (Op0->getType()->isVectorTy()) { + NumElements = + E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements(); + ResType = + llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements)); + } + if (!Op0Ty->hasFloatingRepresentation()) + llvm_unreachable( + "f32tof16 operand must have a float representation"); + + if (CGF.CGM.getTriple().isDXIL()) + return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf32tof16, + ArrayRef<Value *>{Op0}, nullptr, + "hlsl.f32tof16"); + + if (CGF.CGM.getTriple().isSPIRV()) { + // We use the SPIRV PackHalf2x16 operation to avoid the need for the + // Int16 and Float16 capabilities + auto PackType = + llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2)); + if (NumElements == 0) { + // a scalar input - simply insert the scalar in the first element + // of the 2 element float vector + Value *Float2 = Constant::getNullValue(PackType); + Float2 = CGF.Builder.CreateInsertElement(Float2, Op0, (uint64_t)0); + Value *Result = CGF.Builder.CreateIntrinsic( + ResType, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2}); + return Result; + } else { + // a vector input - build a congruent output vector by iterating through + // the input vector calling packhalf2x16 for each element + Value *Result = PoisonValue::get(ResType); + for (uint64_t i = 0; i < NumElements; i++) { + Value *Float2 = Constant::getNullValue(PackType); + Value *InVal = CGF.Builder.CreateExtractElement(Op0, i); + Float2 = CGF.Builder.CreateInsertElement(Float2, InVal, (uint64_t)0); + Value *Res = CGF.Builder.CreateIntrinsic( + CGF.IntTy, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2}); + Result = CGF.Builder.CreateInsertElement(Result, Res, i); + } + return Result; + } + } + + llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture"); +} + static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr, LValue &Stride) { // Figure out the stride of the buffer elements from the handle type. @@ -676,6 +729,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, case Builtin::BI__builtin_hlsl_elementwise_f16tof32: { return handleElementwiseF16ToF32(*this, E); } + case Builtin::BI__builtin_hlsl_elementwise_f32tof16: { + return handleElementwiseF32ToF16(*this, E); + } case Builtin::BI__builtin_hlsl_elementwise_frac: { Value *Op0 = EmitScalarExpr(E->getArg(0)); if (!E->getArg(0)->getType()->hasFloatingRepresentation()) diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index f58150ed61106..8905dd9f897ca 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -1073,6 +1073,27 @@ float3 f16tof32(uint3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32) float4 f16tof32(uint4); +//===----------------------------------------------------------------------===// +// f32tof16 builtins +//===----------------------------------------------------------------------===// + +/// \fn uint f16tof32(float x) +/// \brief Returns the float arg value converted to half in the low 16 bits of +/// the uint return value +/// \param x The float to be converted to half. +/// +/// The return value is a uint containing the converted half value in the low +/// 16 bits. + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16) +uint f32tof16(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16) +uint2 f32tof16(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16) +uint3 f32tof16(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16) +uint4 f32tof16(float4); + //===----------------------------------------------------------------------===// // firstbitlow builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 28744ff0ff42e..b8c558c3e73ac 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2877,6 +2877,20 @@ static bool CheckAllArgTypesAreCorrect( return false; } +static bool CheckFloatRepresentation(Sema *S, SourceLocation Loc, + int ArgOrdinal, + clang::QualType PassedType) { + clang::QualType BaseType = + PassedType->isVectorType() + ? PassedType->castAs<clang::VectorType>()->getElementType() + : PassedType; + if (!BaseType->isFloat32Type()) + return S->Diag(Loc, diag::err_builtin_invalid_arg_type) + << ArgOrdinal << /* scalar or vector of */ 5 << /* no int */ 0 + << /* float */ 1 << PassedType; + return false; +} + static bool CheckFloatOrHalfRepresentation(Sema *S, SourceLocation Loc, int ArgOrdinal, clang::QualType PassedType) { @@ -3564,6 +3578,15 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy); break; } + case Builtin::BI__builtin_hlsl_elementwise_f32tof16: { + if (SemaRef.checkArgCount(TheCall, 1)) + return true; + if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall, + CheckFloatRepresentation)) + return true; + SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().UnsignedIntTy); + break; + } } return false; } diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl new file mode 100644 index 0000000000000..ede6d5c0f3236 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s + +// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 { +// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0) +// CHECK: ret i32 %hlsl.f32tof16 +// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1 +uint test_scalar(float p0) { return __builtin_hlsl_elementwise_f32tof16(p0); } + +// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 { +// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0) +// CHECK: ret <2 x i32> %hlsl.f32tof16 +// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1 +uint2 test_uint2(float2 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); } + +// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 { +// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0) +// CHECK: ret <3 x i32> %hlsl.f32tof16 +// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1 +uint3 test_uint3(float3 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); } + +// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 { +// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0) +// CHECK: ret <4 x i32> %hlsl.f32tof16 +// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1 +uint4 test_uint4(float4 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll new file mode 100644 index 0000000000000..047423a1ed9a6 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.ll @@ -0,0 +1,67 @@ +; ModuleID = 'clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl' +source_filename = "clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl" +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64" +target triple = "dxilv1.3-pc-shadermodel6.3-library" + +; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind +define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 { +entry: + %p0.addr = alloca float, align 4 + store float %p0, ptr %p0.addr, align 4 + %0 = load float, ptr %p0.addr, align 4 + %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0) + ret i32 %hlsl.f32tof16 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare i32 @llvm.dx.legacyf32tof16.f32(float) #1 + +; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind +define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 { +entry: + %p0.addr = alloca <2 x float>, align 8 + store <2 x float> %p0, ptr %p0.addr, align 8 + %0 = load <2 x float>, ptr %p0.addr, align 8 + %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0) + ret <2 x i32> %hlsl.f32tof16 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1 + +; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind +define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 { +entry: + %p0.addr = alloca <3 x float>, align 16 + store <3 x float> %p0, ptr %p0.addr, align 16 + %0 = load <3 x float>, ptr %p0.addr, align 16 + %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0) + ret <3 x i32> %hlsl.f32tof16 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1 + +; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind +define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 { +entry: + %p0.addr = alloca <4 x float>, align 16 + store <4 x float> %p0, ptr %p0.addr, align 16 + %0 = load <4 x float>, ptr %p0.addr, align 16 + %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0) + ret <4 x i32> %hlsl.f32tof16 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1 + +attributes #0 = { alwaysinline convergent mustprogress norecurse nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } + +!llvm.module.flags = !{!0} +!dx.valver = !{!1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 8} +!2 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git 082f296ac09ca7c183aac27883cc6489250db75d)"} diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl new file mode 100644 index 0000000000000..008f495ef869c --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s + +// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 { +// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0) +// CHECK: ret i32 %hlsl.f32tof16 +// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1 +uint test_scalar(float p0) { return f32tof16(p0); } + +// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 { +// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0) +// CHECK: ret <2 x i32> %hlsl.f32tof16 +// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1 +uint2 test_uint2(float2 p0) { return f32tof16(p0); } + +// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 { +// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0) +// CHECK: ret <3 x i32> %hlsl.f32tof16 +// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1 +uint3 test_uint3(float3 p0) { return f32tof16(p0); } + +// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 { +// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0) +// CHECK: ret <4 x i32> %hlsl.f32tof16 +// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1 +uint4 test_uint4(float4 p0) { return f32tof16(p0); } diff --git a/clang/test/CodeGenHLSL/builtins/f32tof16.ll b/clang/test/CodeGenHLSL/builtins/f32tof16.ll new file mode 100644 index 0000000000000..2e92b73ee5cfb --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/f32tof16.ll @@ -0,0 +1,67 @@ +; ModuleID = 'f32tof16.hlsl' +source_filename = "f32tof16.hlsl" +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64-v48:16:16-v96:32:32-v192:64:64" +target triple = "dxilv1.3-pc-shadermodel6.3-library" + +; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind +define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 { +entry: + %p0.addr = alloca float, align 4 + store float %p0, ptr %p0.addr, align 4 + %0 = load float, ptr %p0.addr, align 4 + %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0) + ret i32 %hlsl.f32tof16 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare i32 @llvm.dx.legacyf32tof16.f32(float) #1 + +; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind +define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 { +entry: + %p0.addr = alloca <2 x float>, align 8 + store <2 x float> %p0, ptr %p0.addr, align 8 + %0 = load <2 x float>, ptr %p0.addr, align 8 + %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0) + ret <2 x i32> %hlsl.f32tof16 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1 + +; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind +define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 { +entry: + %p0.addr = alloca <3 x float>, align 16 + store <3 x float> %p0, ptr %p0.addr, align 16 + %0 = load <3 x float>, ptr %p0.addr, align 16 + %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0) + ret <3 x i32> %hlsl.f32tof16 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1 + +; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind +define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 { +entry: + %p0.addr = alloca <4 x float>, align 16 + store <4 x float> %p0, ptr %p0.addr, align 16 + %0 = load <4 x float>, ptr %p0.addr, align 16 + %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0) + ret <4 x i32> %hlsl.f32tof16 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1 + +attributes #0 = { alwaysinline convergent mustprogress norecurse nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } + +!llvm.module.flags = !{!0} +!dx.valver = !{!1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 8} +!2 = !{!"clang version 22.0.0git (https://github.com/llvm/llvm-project.git a5e8e77f7ccd15945eb432a3619e57f9600c142a)"} diff --git a/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl new file mode 100644 index 0000000000000..cd95602b413c5 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl @@ -0,0 +1,134 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify + +uint builtin_f32tof16_too_few_arg() { + return __builtin_hlsl_elementwise_f32tof16(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}} +} + +uint builtin_f32tof16_too_many_arg(uint p0) { + return __builtin_hlsl_elementwise_f32tof16(p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}} +} + +uint builtin_f32tof16_bool(bool p0) { + return __builtin_hlsl_elementwise_f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool')}} +} + +uint builtin_f32tof16_bool4(bool4 p0) { + return __builtin_hlsl_elementwise_f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool4' (aka 'vector<bool, 4>')}} +} + +uint builtin_f32tof16_short(short p0) { + return __builtin_hlsl_elementwise_f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'short')}} +} + +uint builtin_f32tof16_unsigned_short(unsigned short p0) { + return __builtin_hlsl_elementwise_f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned short')}} +} + +uint builtin_f32tof16_int(int p0) { + return __builtin_hlsl_elementwise_f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}} +} + +uint builtin_f32tof16_int64_t(long p0) { + return __builtin_hlsl_elementwise_f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'long')}} +} + +uint2 builtin_f32tof16_int2_to_float2_promotion(int2 p0) { + return __builtin_hlsl_elementwise_f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int2' (aka 'vector<int, 2>'))}} +} + +uint builtin_f32tof16_half(half p0) { + return __builtin_hlsl_elementwise_f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'half')}} +} + +uint builtin_f32tof16_half4(half4 p0) { + return __builtin_hlsl_elementwise_f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'half4' (aka 'vector<half, 4>'))}} +} + +uint builtin_f32tof16_float(unsigned int p0) { + return __builtin_hlsl_elementwise_f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned int')}} +} + +uint builtin_f32tof16_double(double p0) { + return __builtin_hlsl_elementwise_f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'double')}} +} + +uint f32tof16_too_few_arg() { + return f32tof16(); + // expected-error@-1 {{no matching function for call to 'f32tof16'}} +} + +uint f32tof16_too_many_arg(uint p0) { + return f32tof16(p0, p0); + // expected-error@-1 {{no matching function for call to 'f32tof16'}} +} + +uint f32tof16_bool(bool p0) { + return f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool')}} +} + +uint f32tof16_bool3(bool3 p0) { + return f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'bool3' (aka 'vector<bool, 3>'))}} +} + + +uint f32tof16_int16_t(short p0) { + return f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'short')}} +} + +uint f32tof16_int16_t(unsigned short p0) { + return f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned short')}} +} + +uint f32tof16_int(int p0) { + return f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}} +} + +uint f32tof16_int64_t(long p0) { + return f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'long')}} +} + +uint2 f32tof16_int2_to_float2_promotion(int3 p0) { + return f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int3' (aka 'vector<int, 3>'))}} +} + +uint f32tof16_half(half p0) { + return f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'half')}} +} + +uint f32tof16_half2(half2 p0) { + return f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'half2' (aka 'vector<half, 2>'))}} +} + +uint f32tof16_float(uint p0) { + return f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'uint' (aka 'unsigned int'))}} +} + +uint f32tof16_double(double p0) { + return f32tof16(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'double')}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 8ca93731ffa04..8b2537f7649f1 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -143,6 +143,9 @@ def int_dx_isnan : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1 def int_dx_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>], [llvm_anyint_ty], [IntrNoMem]>; +def int_dx_legacyf32tof16 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], + [llvm_anyfloat_ty], [IntrNoMem]>; + def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 6ca6af4a8622e..14d21f2098350 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -209,5 +209,7 @@ def int_spv_resource_nonuniformindex [IntrNoMem, NoUndef<RetIndex>]>; def int_spv_unpackhalf2x16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_spv_packhalf2x16 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; + } diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index b221fa2d7fe87..d117c5dd1a491 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -1124,6 +1124,15 @@ def LegacyF16ToF32 : DXILOp<131, legacyF16ToF32> { let stages = [Stages<DXIL1_0, [all_stages]>]; } +def LegacyF32ToF16 : DXILOp<130, legacyF32ToF16> { + let Doc = "converts the float stored in the first element of the float2 " + "to a half and stores it in the low 16 bits of the output uint"; + let intrinsics = [IntrinSelect<int_dx_legacyf32tof16>]; + let arguments = [FloatTy]; + let result = Int32Ty; + let stages = [Stages<DXIL1_0, [all_stages]>]; +} + def WaveAllBitCount : DXILOp<135, waveAllOp> { let Doc = "returns the count of bits set to 1 across the wave"; let intrinsics = [IntrinSelect<int_dx_wave_active_countbits>]; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index f54b48b91265e..23f45b5fe2270 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -35,6 +35,7 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, case Intrinsic::dx_isinf: case Intrinsic::dx_isnan: case Intrinsic::dx_legacyf16tof32: + case Intrinsic::dx_legacyf32tof16: return OpdIdx == 0; default: return OpdIdx == -1; @@ -52,6 +53,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_isinf: case Intrinsic::dx_isnan: case Intrinsic::dx_legacyf16tof32: + case Intrinsic::dx_legacyf32tof16: case Intrinsic::dx_rsqrt: case Intrinsic::dx_saturate: case Intrinsic::dx_splitdouble: diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 2e4563795e8f0..3af0f45d09ace 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3855,6 +3855,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_unpackhalf2x16: { return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16); } + case Intrinsic::spv_packhalf2x16: { + return selectExtInst(ResVReg, ResType, I, GL::PackHalf2x16); + } case Intrinsic::spv_ddx: return selectDerivativeInst(ResVReg, ResType, I, SPIRV::OpDPdx); case Intrinsic::spv_ddy: diff --git a/llvm/test/CodeGen/DirectX/f32tof16.ll b/llvm/test/CodeGen/DirectX/f32tof16.ll new file mode 100644 index 0000000000000..5012fc0b2a3db --- /dev/null +++ b/llvm/test/CodeGen/DirectX/f32tof16.ll @@ -0,0 +1,57 @@ +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s + +define hidden noundef i32 @_Z11test_scalarj(float noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK : [[UINT:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float %p0) + ; CHECK : ret i32 [[UINT]] + %hlsl.f32tof16 = tail call i32 @llvm.dx.legacyf32tof16.i32(float %p0) + ret i32 %hlsl.f32tof16 +} + +define hidden noundef <2 x i32> @_Z10test_uint2Dv2_j(<2 x float> noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK: [[FLOAT2_0:%.*]] = extractelement <2 x float> %p0, i64 0 + ; CHECK: [[UINT_0:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT2_0]]) + ; CHECK: [[FLOAT2_1:%.*]] = extractelement <2 x float> %p0, i64 1 + ; CHECK: [[UINT_1:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT2_1]]) + ; CHECK: [[UINT2_0:%.*]] = insertelement <2 x i32> poison, i32 [[UINT_0]], i64 0 + ; CHECK: [[UINT2_1:%.*]] = insertelement <2 x i32> [[UINT2_0]], i32 [[UINT_1]], i64 1 + ; CHECK : ret <2 x i32> [[UINT2_1]] + %hlsl.f32tof16 = tail call <2 x i32> @llvm.dx.legacyf32tof16.v2i32(<2 x float> %p0) + ret <2 x i32> %hlsl.f32tof16 +} + +define hidden noundef <3 x i32> @_Z10test_uint3Dv3_j(<3 x float> noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK: [[FLOAT3_0:%.*]] = extractelement <3 x float> %p0, i64 0 + ; CHECK: [[UINT_0:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT3_0]]) + ; CHECK: [[FLOAT3_1:%.*]] = extractelement <3 x float> %p0, i64 1 + ; CHECK: [[UINT_1:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT3_1]]) + ; CHECK: [[FLOAT3_2:%.*]] = extractelement <3 x float> %p0, i64 2 + ; CHECK: [[UINT_2:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT3_2]]) + ; CHECK: [[UINT3_0:%.*]] = insertelement <3 x i32> poison, i32 [[UINT_0]], i64 0 + ; CHECK: [[UINT3_1:%.*]] = insertelement <3 x i32> [[UINT3_0]], i32 [[UINT_1]], i64 1 + ; CHECK: [[UINT3_2:%.*]] = insertelement <3 x i32> [[UINT3_1]], i32 [[UINT_2]], i64 2 + ; CHECK : ret <3 x i32> [[UINT3_2]] + %hlsl.f32tof16 = tail call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %p0) + ret <3 x i32> %hlsl.f32tof16 +} + +define hidden noundef <4 x i32> @_Z10test_uint4Dv4_j(<4 x float> noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK: [[FLOAT4_0:%.*]] = extractelement <4 x float> %p0, i64 0 + ; CHECK: [[UINT_0:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT4_0]]) + ; CHECK: [[FLOAT4_1:%.*]] = extractelement <4 x float> %p0, i64 1 + ; CHECK: [[UINT_1:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT4_1]]) + ; CHECK: [[FLOAT4_2:%.*]] = extractelement <4 x float> %p0, i64 2 + ; CHECK: [[UINT_2:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT4_2]]) + ; CHECK: [[FLOAT4_3:%.*]] = extractelement <4 x float> %p0, i64 3 + ; CHECK: [[UINT_3:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT4_3]]) + ; CHECK: [[UINT4_0:%.*]] = insertelement <4 x i32> poison, i32 [[UINT_0]], i64 0 + ; CHECK: [[UINT4_1:%.*]] = insertelement <4 x i32> [[UINT4_0]], i32 [[UINT_1]], i64 1 + ; CHECK: [[UINT4_2:%.*]] = insertelement <4 x i32> [[UINT4_1]], i32 [[UINT_2]], i64 2 + ; CHECK: [[UINT4_3:%.*]] = insertelement <4 x i32> [[UINT4_2]], i32 [[UINT_3]], i64 3 + ; CHECK : ret <4 x i32> [[UINT4_3]] + %hlsl.f32tof16 = tail call <4 x i32> @llvm.dx.legacyf32tof16.v4i32(<4 x float> %p0) + ret <4 x i32> %hlsl.f32tof16 +} diff --git a/llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll b/llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll new file mode 100644 index 0000000000000..371d51d68e8b8 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll @@ -0,0 +1,10 @@ +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: %6:id(s64) = G_INTRINSIC intrinsic(@llvm.spv.packhalf2x16), %0:vfid(<2 x s64>) is only supported with the GLSL extended instruction set. + +define hidden spir_func noundef i32 @_Z9test_funcj(<2 x float> noundef %0) local_unnamed_addr #0 { + %2 = tail call i32 @llvm.spv.packhalf2x16.i32(<2 x float> %0) + ret i32 %2 +} + diff --git a/llvm/test/CodeGen/SPIRV/packhalf2x16.ll b/llvm/test/CodeGen/SPIRV/packhalf2x16.ll new file mode 100644 index 0000000000000..14a9b8f8412f8 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/packhalf2x16.ll @@ -0,0 +1,15 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: [[SET:%.*]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: [[FLOAT:%.*]] = OpTypeFloat 32 +; CHECK-DAG: [[FLOAT2:%.*]] = OpTypeVector [[FLOAT]] 2 +; CHECK-DAG: [[UINT:%.*]] = OpTypeInt 32 0 + +; CHECK: [[P0:%.*]] = OpFunctionParameter [[FLOAT2]] +; CHECK: [[PACK:%.*]] = OpExtInst [[UINT]] [[SET]] PackHalf2x16 [[P0]] +; CHECK: OpReturnValue [[PACK]] +define hidden spir_func noundef i32 @_Z9test_funcj(<2 x float> noundef %0) local_unnamed_addr #0 { + %2 = tail call i32 @llvm.spv.packhalf2x16.i32(<2 x float> %0) + ret i32 %2 +} >From 5f6848d0f2fc78b38ac13463e97d3b5b5654d073 Mon Sep 17 00:00:00 2001 From: Tim Corringham <[email protected]> Date: Tue, 16 Dec 2025 16:57:20 +0000 Subject: [PATCH 2/2] Fix code formatting Fix git-clang-format errors --- clang/lib/CodeGen/CGHLSLBuiltins.cpp | 5 ++--- clang/lib/Sema/SemaHLSL.cpp | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index e0ee9ab47f92f..e0cb04e05933d 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -224,8 +224,7 @@ static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF, llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements)); } if (!Op0Ty->hasFloatingRepresentation()) - llvm_unreachable( - "f32tof16 operand must have a float representation"); + llvm_unreachable("f32tof16 operand must have a float representation"); if (CGF.CGM.getTriple().isDXIL()) return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf32tof16, @@ -254,7 +253,7 @@ static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF, Value *InVal = CGF.Builder.CreateExtractElement(Op0, i); Float2 = CGF.Builder.CreateInsertElement(Float2, InVal, (uint64_t)0); Value *Res = CGF.Builder.CreateIntrinsic( - CGF.IntTy, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2}); + CGF.IntTy, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2}); Result = CGF.Builder.CreateInsertElement(Result, Res, i); } return Result; diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index b8c558c3e73ac..8e3fa0e1b6d95 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -3581,10 +3581,10 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case Builtin::BI__builtin_hlsl_elementwise_f32tof16: { if (SemaRef.checkArgCount(TheCall, 1)) return true; - if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall, - CheckFloatRepresentation)) + if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall, CheckFloatRepresentation)) return true; - SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().UnsignedIntTy); + SetElementTypeAsReturnType(&SemaRef, TheCall, + getASTContext().UnsignedIntTy); break; } } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
