https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/157900
>From d7ea946ada63b8ff0a29cc33721ebbd7d5765dec Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Wed, 10 Sep 2025 12:56:57 -0400 Subject: [PATCH] [AMDGPU] Change `scale_sel` to be 4 bits The latest SP changes updated it to use `OP_SEL[0:3]` instead of `OP_SEL[0:2]`. Fixes SWDEV-554472. --- clang/lib/Sema/SemaAMDGPU.cpp | 2 +- .../builtins-amdgcn-error-gfx1250-param.cl | 30 +++++++++---------- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +- llvm/lib/Target/AMDGPU/VOPInstructions.td | 5 ++-- .../AMDGPU/llvm.amdgcn.cvt.scale.pk.ll | 10 +++---- llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s | 6 ++++ llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s | 6 ++++ llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s | 4 +-- .../Disassembler/AMDGPU/gfx1250_dasm_vop3.txt | 6 ++++ 10 files changed, 45 insertions(+), 28 deletions(-) diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index baba503239e9f..bb98a39948fce 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -100,7 +100,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_bf6: case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6: case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6: - return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7); + return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 15); case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_32x4B: case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_16x8B: case AMDGPU::BI__builtin_amdgcn_cooperative_atomic_load_8x16B: diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl index 273c65e6d106d..3cea47b66d6a6 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl @@ -75,21 +75,21 @@ void test_cvt_scale_pk(global half8 *outh8, global bfloat8 *outy8, uint2 src2, *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_fp6(src3, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk16_f32_fp6' must be a constant integer}} *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_bf6(src3, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk16_f32_bf6' must be a constant integer}} - *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_bf8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_bf8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp4(src1, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp4(src1, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_bf8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp4(src1, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_fp6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_fp6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_bf6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_bf6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_fp6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} - *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_bf6(src3, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_bf8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_bf8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp4(src1, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp4(src1, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_bf8(src2, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp4(src1, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_fp6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_fp6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_bf6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_bf6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_fp6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_bf6(src3, scale, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} } void test_amdgcn_load_monitor(global int* b32gaddr, global v2i* b64gaddr, global v4i* b128gaddr, int *b32faddr, v2i* b64faddr, v4i *b128faddr, diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 5bbc16f2dc743..d2a1acc034d02 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -651,7 +651,7 @@ def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic< [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>] >, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">; -// llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..7] +// llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..15] class AMDGPUCvtScaleIntrinsic<LLVMType DstTy, LLVMType Src0Ty, string name> : DefaultAttrsIntrinsic< [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index aa5dae09ca185..7b877a4f74373 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1353,7 +1353,7 @@ def MatrixAReuse : NamedBitOperand<"matrix_a_reuse">; def MatrixBReuse : NamedBitOperand<"matrix_b_reuse">; def ScaleSel : NamedIntOperand<"scale_sel"> { - let Validator = "isUInt<3>"; + let Validator = "isUInt<4>"; } class KImmFPOperand<ValueType vt> : ImmOperand<vt> { diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 5550a0c08b918..b900510d7622a 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -414,10 +414,9 @@ class VOP3a_BITOP3_gfx12<bits<10> op, VOPProfile p> : VOP3e_gfx11_gfx12<op, p> { } class VOP3a_ScaleSel_gfx1250<bits<10> op, VOPProfile p> : VOP3e_gfx11_gfx12<op, p> { - bits<3> scale_sel; + bits<4> scale_sel; - let Inst{13-11} = scale_sel; - let Inst{14} = 0; + let Inst{14-11} = scale_sel; } class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll index c29c52cc58aa2..5c439f631a426 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll @@ -106,7 +106,7 @@ define amdgpu_ps void @test_cvt_scale_pk8_f32_fp8_vv(<2 x i32> %src, i32 %scale, ; GFX1250-SDAG-LABEL: test_cvt_scale_pk8_f32_fp8_vv: ; GFX1250-SDAG: ; %bb.0: ; GFX1250-SDAG-NEXT: v_dual_mov_b32 v13, v4 :: v_dual_mov_b32 v12, v3 -; GFX1250-SDAG-NEXT: v_cvt_scale_pk8_f32_fp8 v[4:11], v[0:1], v2 scale_sel:7 +; GFX1250-SDAG-NEXT: v_cvt_scale_pk8_f32_fp8 v[4:11], v[0:1], v2 scale_sel:8 ; GFX1250-SDAG-NEXT: s_clause 0x1 ; GFX1250-SDAG-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16 ; GFX1250-SDAG-NEXT: global_store_b128 v[12:13], v[4:7], off @@ -115,12 +115,12 @@ define amdgpu_ps void @test_cvt_scale_pk8_f32_fp8_vv(<2 x i32> %src, i32 %scale, ; GFX1250-GISEL-LABEL: test_cvt_scale_pk8_f32_fp8_vv: ; GFX1250-GISEL: ; %bb.0: ; GFX1250-GISEL-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v13, v4 -; GFX1250-GISEL-NEXT: v_cvt_scale_pk8_f32_fp8 v[4:11], v[0:1], v2 scale_sel:7 +; GFX1250-GISEL-NEXT: v_cvt_scale_pk8_f32_fp8 v[4:11], v[0:1], v2 scale_sel:8 ; GFX1250-GISEL-NEXT: s_clause 0x1 ; GFX1250-GISEL-NEXT: global_store_b128 v[12:13], v[4:7], off ; GFX1250-GISEL-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16 ; GFX1250-GISEL-NEXT: s_endpgm - %cvt = tail call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp8(<2 x i32> %src, i32 %scale, i32 7) + %cvt = tail call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp8(<2 x i32> %src, i32 %scale, i32 8) store <8 x float> %cvt, ptr addrspace(1) %out, align 16 ret void } @@ -313,12 +313,12 @@ define amdgpu_ps void @test_cvt_scale_pk16_bf16_bf6_sl(<3 x i32> inreg %src, ptr ; GFX1250-NEXT: v_dual_mov_b32 v10, s0 :: v_dual_mov_b32 v11, s1 ; GFX1250-NEXT: v_mov_b32_e32 v12, s2 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-NEXT: v_cvt_scale_pk16_bf16_bf6 v[2:9], v[10:12], 0x64 scale_sel:7 +; GFX1250-NEXT: v_cvt_scale_pk16_bf16_bf6 v[2:9], v[10:12], 0x64 scale_sel:8 ; GFX1250-NEXT: s_clause 0x1 ; GFX1250-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16 ; GFX1250-NEXT: global_store_b128 v[0:1], v[2:5], off ; GFX1250-NEXT: s_endpgm - %cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x i32> %src, i32 100, i32 7) + %cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x i32> %src, i32 100, i32 8) store <16 x bfloat> %cvt, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s index 13f1bb036188d..d3b44eb788444 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s @@ -713,6 +713,9 @@ v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], 0xcf00 v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:7 // GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xab,0xd6,0x14,0x11,0x02,0x00] +v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:8 +// GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xab,0xd6,0x14,0x11,0x02,0x00] + v_cvt_scale_pk8_bf16_bf8 v[10:13], v[20:21], v8 // GFX1250: v_cvt_scale_pk8_bf16_bf8 v[10:13], v[20:21], v8 ; encoding: [0x0a,0x00,0xac,0xd6,0x14,0x11,0x02,0x00] @@ -758,6 +761,9 @@ v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], 0xcf00 v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:7 // GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xad,0xd6,0x14,0x11,0x02,0x00] +v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:8 +// GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xad,0xd6,0x14,0x11,0x02,0x00] + v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 // GFX1250: v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 ; encoding: [0x0a,0x00,0xa1,0xd6,0x14,0x11,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s index 1441f3806987c..b4d4e365d0453 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s @@ -713,6 +713,9 @@ v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], 0xcf00 v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:7 // GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xab,0xd6,0x14,0x11,0x02,0x00] +v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:8 +// GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xab,0xd6,0x14,0x11,0x02,0x00] + v_cvt_scale_pk8_bf16_bf8 v[10:13], v[20:21], v8 // GFX1250: v_cvt_scale_pk8_bf16_bf8 v[10:13], v[20:21], v8 ; encoding: [0x0a,0x00,0xac,0xd6,0x14,0x11,0x02,0x00] @@ -758,6 +761,9 @@ v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], 0xcf00 v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:7 // GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xad,0xd6,0x14,0x11,0x02,0x00] +v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:8 +// GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xad,0xd6,0x14,0x11,0x02,0x00] + v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 // GFX1250: v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 ; encoding: [0x0a,0x00,0xa1,0xd6,0x14,0x11,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s index e87943224e8f5..cce8e1ef24f5f 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s @@ -277,9 +277,9 @@ v_cvt_sr_fp8_f16 v1, v2, v3 mul:2 // GFX125X-ERR-NEXT:{{^}}v_cvt_sr_fp8_f16 v1, v2, v3 mul:2 // GFX125X-ERR-NEXT:{{^}} ^ -v_cvt_scale_pk8_f32_fp8 v[10:17], v[20:21], v8 scale_sel:8 +v_cvt_scale_pk8_f32_fp8 v[10:17], v[20:21], v8 scale_sel:16 // GFX125X-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid scale_sel value. -// GFX125X-ERR-NEXT:{{^}}v_cvt_scale_pk8_f32_fp8 v[10:17], v[20:21], v8 scale_sel:8 +// GFX125X-ERR-NEXT:{{^}}v_cvt_scale_pk8_f32_fp8 v[10:17], v[20:21], v8 scale_sel:16 // GFX125X-ERR-NEXT:{{^}} ^ v_cvt_sr_bf8_f16 v1, v2, v3 byte_sel:4 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt index 4b44c27570af5..29bfa54f2c10d 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt @@ -761,6 +761,9 @@ 0x0a,0x38,0xab,0xd6,0x14,0x11,0x02,0x00 # GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xab,0xd6,0x14,0x11,0x02,0x00] +0x0a,0x40,0xab,0xd6,0x14,0x11,0x02,0x00 +# GFX1250: v_cvt_scale_pk8_f16_bf8 v[10:13], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xab,0xd6,0x14,0x11,0x02,0x00] + 0x0a,0x00,0xa8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00 # GFX1250: v_cvt_scale_pk8_f16_fp8 v[10:13], v[20:21], 0xcf00 ; encoding: [0x0a,0x00,0xa8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00] @@ -800,6 +803,9 @@ 0x0a,0x38,0xad,0xd6,0x14,0x11,0x02,0x00 # GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:7 ; encoding: [0x0a,0x38,0xad,0xd6,0x14,0x11,0x02,0x00] +0x0a,0x40,0xad,0xd6,0x14,0x11,0x02,0x00 +# GFX1250: v_cvt_scale_pk8_f32_bf8 v[10:17], v[20:21], v8 scale_sel:8 ; encoding: [0x0a,0x40,0xad,0xd6,0x14,0x11,0x02,0x00] + 0x0a,0x00,0xaa,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00 # GFX1250: v_cvt_scale_pk8_f32_fp8 v[10:17], v[20:21], 0xcf00 ; encoding: [0x0a,0x00,0xaa,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00] _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits