llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir Author: Stanislav Mekhanoshin (rampitec) <details> <summary>Changes</summary> --- Patch is 43.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151482.diff 16 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+1) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+27) - (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+4) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+1) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1) - (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+3) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.f16.ll (+64) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s (+45) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s (+45) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s (+48) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s (+48) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s (+40) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s (+40) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt (+45) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt (+36) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt (+30) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 172ac467f7cad..5b1c14ec5a17e 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -698,6 +698,7 @@ TARGET_BUILTIN(__builtin_amdgcn_sin_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_cos_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_bf16_f32, "V2yffi", "nc", "bf16-cvt-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_f16_f32, "V2hffi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts") diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 1c67fc3879bff..77d56739b9eb6 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -282,6 +282,33 @@ void test_cvt_sr_pk_bf16_f32(global bfloat2* out, float a, float b, uint sr) *out = __builtin_amdgcn_cvt_sr_pk_bf16_f32(a, b, sr); } +// CHECK-LABEL: @test_cvt_sr_pk_f16_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr +// CHECK-NEXT: [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store float [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float [[TMP0]], float [[TMP1]], i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x half> [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: ret void +// +void test_cvt_sr_pk_f16_f32(global half2* out, float a, float b, uint sr) +{ + *out = __builtin_amdgcn_cvt_sr_pk_f16_f32(a, b, sr); +} + // CHECK-LABEL: @test_cvt_f16_fp8( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 4a50558ca8e86..e57f9b3b44bfc 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -593,6 +593,10 @@ def int_amdgcn_tanh : DefaultAttrsIntrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; +def int_amdgcn_cvt_sr_pk_f16_f32 : DefaultAttrsIntrinsic< + [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] +>, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_f16_f32">; + def int_amdgcn_cvt_sr_pk_bf16_f32 : DefaultAttrsIntrinsic< [llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >, ClangBuiltin<"__builtin_amdgcn_cvt_sr_pk_bf16_f32">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 6bca2fec17c6d..c3b14abb736f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4574,6 +4574,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_cvt_pknorm_u16: case Intrinsic::amdgcn_cvt_pk_i16: case Intrinsic::amdgcn_cvt_pk_u16: + case Intrinsic::amdgcn_cvt_sr_pk_f16_f32: case Intrinsic::amdgcn_cvt_sr_pk_bf16_f32: case Intrinsic::amdgcn_cvt_pk_f16_fp8: case Intrinsic::amdgcn_cvt_pk_f16_bf8: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index efcc88e564e65..c5931fcd5d909 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2928,6 +2928,7 @@ def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>; def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>; def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>; def VOP_V2BF16_F32_F32_I32 : VOPProfile <[v2bf16, f32, f32, i32]>; +def VOP_V2F16_F32_F32_I32 : VOPProfile <[v2f16, f32, f32, i32]>; def VOP_V6I32_V32F16_F32 : VOPProfile<[v6i32, v32f16, f32, untyped]>; def VOP_V6I32_V32BF16_F32 : VOPProfile<[v6i32, v32bf16, f32, untyped]>; def VOP_V6I32_V16F32_V16F32_F32 : VOPProfile<[v6i32, v16f32, v16f32, f32]>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 96fe503c369ad..cfc5fe519a3c1 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1650,6 +1650,8 @@ def VOP3_CVT_SR_F8_F16_Fake16_Profile : VOP3_Profile_Fake16<VOP3_CVT_SR_F8_F16_P let SubtargetPredicate = isGFX1250Plus in { let ReadsModeReg = 0 in { + defm V_CVT_SR_PK_F16_F32 : VOP3Inst<"v_cvt_sr_pk_f16_f32", VOP3_Profile<VOP_V2F16_F32_F32_I32>, int_amdgcn_cvt_sr_pk_f16_f32>; + // These instructions have non-standard use of op_sel. They are using bits 2 and 3 of opsel // to select a byte in the vdst. Bits 0 and 1 are unused. let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in { @@ -2067,6 +2069,7 @@ defm V_ASHR_PK_U8_I32 : VOP3Only_Realtriple_gfx1250<0x291>; defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>; defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>; defm V_CVT_PK_F16_F32 : VOP3Only_Realtriple_gfx1250<0x36f>; +defm V_CVT_SR_PK_F16_F32 : VOP3Only_Realtriple_gfx1250<0x370>; defm V_CVT_PK_FP8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x372, "v_cvt_pk_fp8_f16">; defm V_CVT_PK_BF8_F16_gfx1250 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x373, "v_cvt_pk_bf8_f16">; defm V_CVT_SR_FP8_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx1250<0x374>; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.f16.ll new file mode 100644 index 0000000000000..2179800f9d317 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.f16.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s + +declare <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float, float, i32) #0 + +define amdgpu_ps float @cvt_sr_pk_f16_f32_vvv(float %src0, float %src1, i32 %src2) #1 { +; GCN-LABEL: cvt_sr_pk_f16_f32_vvv: +; GCN: ; %bb.0: +; GCN-NEXT: v_cvt_sr_pk_f16_f32 v0, v0, v1, v2 +; GCN-NEXT: ; return to shader part epilog + %cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 %src2) #0 + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} + +define amdgpu_ps float @cvt_sr_pk_f16_f32_sss(float inreg %src0, float inreg %src1, i32 inreg %src2) #1 { +; GCN-LABEL: cvt_sr_pk_f16_f32_sss: +; GCN: ; %bb.0: +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GCN-NEXT: v_cvt_sr_pk_f16_f32 v0, s0, s1, v0 +; GCN-NEXT: ; return to shader part epilog + %cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 %src2) #0 + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} + +define amdgpu_ps float @cvt_sr_pk_f16_f32_vvi(float %src0, float %src1) #1 { +; GCN-LABEL: cvt_sr_pk_f16_f32_vvi: +; GCN: ; %bb.0: +; GCN-NEXT: v_cvt_sr_pk_f16_f32 v0, v0, v1, 0x10002 +; GCN-NEXT: ; return to shader part epilog + %cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 65538) #0 + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} + +define amdgpu_ps float @cvt_sr_pk_f16_f32_vvi_mods(float %src0, float %src1) #1 { +; GCN-LABEL: cvt_sr_pk_f16_f32_vvi_mods: +; GCN: ; %bb.0: +; GCN-NEXT: v_cvt_sr_pk_f16_f32 v0, -v0, |v1|, 1 +; GCN-NEXT: ; return to shader part epilog + %s0 = fneg float %src0 + %s1 = call float @llvm.fabs.f32(float %src1) #0 + %cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %s0, float %s1, i32 1) #0 + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} + +define amdgpu_ps float @cvt_sr_pk_f16_f32_ssi(float inreg %src0, float inreg %src1) #1 { +; GCN-LABEL: cvt_sr_pk_f16_f32_ssi: +; GCN: ; %bb.0: +; GCN-NEXT: v_cvt_sr_pk_f16_f32 v0, s0, s1, 1 +; GCN-NEXT: ; return to shader part epilog + %cvt = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float %src0, float %src1, i32 1) #0 + %ret = bitcast <2 x half> %cvt to float + ret float %ret +} + +declare float @llvm.fabs.f32(float) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s index 7d4f28f28b4c5..70090ac6bc3a4 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s @@ -512,6 +512,51 @@ v_cvt_pk_f16_f32 v5, src_scc, vcc_lo mul:4 v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 // GFX1250: v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6f,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] +v_cvt_sr_pk_f16_f32 v5, v1, v2, s3 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x70,0xd7,0x01,0x05,0x0e,0x00] + +v_cvt_sr_pk_f16_f32 v5, v255, s2, s105 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x70,0xd7,0xff,0x05,0xa4,0x01] + +v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi +// GFX1250: v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x70,0xd7,0x01,0xfe,0xff,0x01] + +v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo +// GFX1250: v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x70,0xd7,0x69,0xd2,0xf8,0x01] + +v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x70,0xd7,0x6a,0xf6,0x0c,0x04] + +v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x70,0xd7,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15 ; encoding: [0x05,0x03,0x70,0xd7,0x7b,0xfa,0xed,0x61] + +v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x70,0xd7,0x7d,0xe0,0xf5,0x01] + +v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi +// GFX1250: v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x70,0xd7,0x7e,0x82,0xad,0x01] + +v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo +// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo ; encoding: [0x05,0x01,0x70,0xd7,0x7f,0xf8,0xa8,0x21] + +v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x70,0xd7,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc +// GFX1250: v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc ; encoding: [0x05,0x02,0x70,0xd7,0xc1,0xfe,0xf4,0x43] + +v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x70,0xd7,0xf0,0xfa,0xc0,0x4b] + +v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x70,0xd7,0xfd,0xd4,0x04,0x33] + +v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX1250: v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x70,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + v_cvt_sr_bf8_f16 v1, v2, v3 // GFX1250: v_cvt_sr_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s index f910d85e3b9b1..8e6f238f48d2e 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s @@ -512,6 +512,51 @@ v_cvt_pk_f16_f32 v5, src_scc, vcc_lo mul:4 v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 // GFX1250: v_cvt_pk_f16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6f,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] +v_cvt_sr_pk_f16_f32 v5, v1, v2, s3 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x70,0xd7,0x01,0x05,0x0e,0x00] + +v_cvt_sr_pk_f16_f32 v5, v255, s2, s105 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x70,0xd7,0xff,0x05,0xa4,0x01] + +v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi +// GFX1250: v_cvt_sr_pk_f16_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x70,0xd7,0x01,0xfe,0xff,0x01] + +v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo +// GFX1250: v_cvt_sr_pk_f16_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x70,0xd7,0x69,0xd2,0xf8,0x01] + +v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x70,0xd7,0x6a,0xf6,0x0c,0x04] + +v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x70,0xd7,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + +v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|ttmp15|, -|src_scc|, ttmp15 ; encoding: [0x05,0x03,0x70,0xd7,0x7b,0xfa,0xed,0x61] + +v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x70,0xd7,0x7d,0xe0,0xf5,0x01] + +v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi +// GFX1250: v_cvt_sr_pk_f16_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x70,0xd7,0x7e,0x82,0xad,0x01] + +v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo +// GFX1250: v_cvt_sr_pk_f16_f32 v5, -|exec_hi|, null, vcc_lo ; encoding: [0x05,0x01,0x70,0xd7,0x7f,0xf8,0xa8,0x21] + +v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x70,0xd7,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc +// GFX1250: v_cvt_sr_pk_f16_f32 v5, -1, -|exec_hi|, src_scc ; encoding: [0x05,0x02,0x70,0xd7,0xc1,0xfe,0xf4,0x43] + +v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x70,0xd7,0xf0,0xfa,0xc0,0x4b] + +v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX1250: v_cvt_sr_pk_f16_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x70,0xd7,0xfd,0xd4,0x04,0x33] + +v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX1250: v_cvt_sr_pk_f16_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x70,0xd7,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] + v_cvt_sr_bf8_f16 v1, v2.l, v3 // GFX1250: v_cvt_sr_bf8_f16 v1, v2.l, v3 ; encoding: [0x01,0x00,0x75,0xd7,0x02,0x07,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s index 4ffc9057acff4..64304693859dd 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s @@ -402,6 +402,54 @@ v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0 // GFX1250: v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x6f,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x70,0xd7,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x70,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, v2, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, v2, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x70,0xd7,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x70,0xd7,0xfa,0x04,0x16,0x52,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_sr_pk_f16_f32_e64_dpp v255, -|... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/151482 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits