https://github.com/rampitec updated 
https://github.com/llvm/llvm-project/pull/151804

>From 99e3d295b3ce5c9046a781b01b5fc6b07991d1db Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <stanislav.mekhanos...@amd.com>
Date: Sat, 2 Aug 2025 01:08:19 -0700
Subject: [PATCH] [AMDGPU] v_cvt_scale_pk16 gfx1250 instructions

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  |   6 +
 clang/lib/Sema/SemaAMDGPU.cpp                 |   6 +
 .../CodeGenOpenCL/builtins-amdgcn-gfx1250.cl  |  36 +++
 .../builtins-amdgcn-error-gfx1250-param.cl    |  12 +
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      |   6 +
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   6 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         |   2 +
 llvm/lib/Target/AMDGPU/VOP3Instructions.td    |  12 +
 .../AMDGPU/llvm.amdgcn.cvt.scale.pk.ll        | 210 ++++++++++++++++++
 llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s |  54 +++++
 llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s        |  54 +++++
 .../Disassembler/AMDGPU/gfx1250_dasm_vop3.txt |  54 +++++
 12 files changed, 458 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a2e109b416b9d..3773031e187c1 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -716,6 +716,12 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_bf16_fp4, 
"V8yUiUiIUi", "nc", "gfx
 TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_fp8, "V8fV2UiUiIUi", "nc", 
"gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_bf8, "V8fV2UiUiIUi", "nc", 
"gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_fp4, "V8fUiUiIUi", "nc", 
"gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_f16_fp6, "V16hV3UiUiIUi", "nc", 
"gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_bf16_fp6, "V16yV3UiUiIUi", 
"nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_f16_bf6, "V16hV3UiUiIUi", "nc", 
"gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_bf16_bf6, "V16yV3UiUiIUi", 
"nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_f32_fp6, "V16fV3UiUiIUi", "nc", 
"gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk16_f32_bf6, "V16fV3UiUiIUi", "nc", 
"gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp8_bf16, "V2UiV8yf", "nc", 
"gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_bf8_bf16, "V2UiV8yf", "nc", 
"gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp8_f16, "V2UiV8hf", "nc", 
"gfx1250-insts")
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 8580de2f0c03c..a5fbd70f97f64 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -93,6 +93,12 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp8:
   case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_bf8:
   case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp4:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_fp6:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_fp6:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f16_bf6:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_bf16_bf6:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_fp6:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk16_f32_bf6:
     return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7);
   }
   default:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 7ca6106432e50..c25aaf11bb0e1 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -655,6 +655,36 @@ void test_cvt_sr_fp8_f16(global int* out, half a, short 
sr, int old)
 // CHECK-NEXT:    [[TMP34:%.*]] = call <8 x float> 
@llvm.amdgcn.cvt.scale.pk8.f32.fp4(i32 [[TMP32]], i32 [[TMP33]], i32 7)
 // CHECK-NEXT:    [[TMP35:%.*]] = load ptr addrspace(1), ptr 
[[OUTF8_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store <8 x float> [[TMP34]], ptr addrspace(1) [[TMP35]], 
align 32
+// CHECK-NEXT:    [[TMP36:%.*]] = load <3 x i32>, ptr [[SRC3_ADDR_ASCAST]], 
align 16
+// CHECK-NEXT:    [[TMP37:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP38:%.*]] = call <16 x half> 
@llvm.amdgcn.cvt.scale.pk16.f16.fp6(<3 x i32> [[TMP36]], i32 [[TMP37]], i32 0)
+// CHECK-NEXT:    [[TMP39:%.*]] = load ptr addrspace(1), ptr 
[[OUTH16_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <16 x half> [[TMP38]], ptr addrspace(1) [[TMP39]], 
align 32
+// CHECK-NEXT:    [[TMP40:%.*]] = load <3 x i32>, ptr [[SRC3_ADDR_ASCAST]], 
align 16
+// CHECK-NEXT:    [[TMP41:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP42:%.*]] = call <16 x bfloat> 
@llvm.amdgcn.cvt.scale.pk16.bf16.fp6(<3 x i32> [[TMP40]], i32 [[TMP41]], i32 1)
+// CHECK-NEXT:    [[TMP43:%.*]] = load ptr addrspace(1), ptr 
[[OUTY16_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <16 x bfloat> [[TMP42]], ptr addrspace(1) [[TMP43]], 
align 32
+// CHECK-NEXT:    [[TMP44:%.*]] = load <3 x i32>, ptr [[SRC3_ADDR_ASCAST]], 
align 16
+// CHECK-NEXT:    [[TMP45:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP46:%.*]] = call <16 x half> 
@llvm.amdgcn.cvt.scale.pk16.f16.bf6(<3 x i32> [[TMP44]], i32 [[TMP45]], i32 2)
+// CHECK-NEXT:    [[TMP47:%.*]] = load ptr addrspace(1), ptr 
[[OUTH16_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <16 x half> [[TMP46]], ptr addrspace(1) [[TMP47]], 
align 32
+// CHECK-NEXT:    [[TMP48:%.*]] = load <3 x i32>, ptr [[SRC3_ADDR_ASCAST]], 
align 16
+// CHECK-NEXT:    [[TMP49:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP50:%.*]] = call <16 x bfloat> 
@llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x i32> [[TMP48]], i32 [[TMP49]], i32 3)
+// CHECK-NEXT:    [[TMP51:%.*]] = load ptr addrspace(1), ptr 
[[OUTY16_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <16 x bfloat> [[TMP50]], ptr addrspace(1) [[TMP51]], 
align 32
+// CHECK-NEXT:    [[TMP52:%.*]] = load <3 x i32>, ptr [[SRC3_ADDR_ASCAST]], 
align 16
+// CHECK-NEXT:    [[TMP53:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP54:%.*]] = call <16 x float> 
@llvm.amdgcn.cvt.scale.pk16.f32.fp6(<3 x i32> [[TMP52]], i32 [[TMP53]], i32 3)
+// CHECK-NEXT:    [[TMP55:%.*]] = load ptr addrspace(1), ptr 
[[OUTF16_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <16 x float> [[TMP54]], ptr addrspace(1) [[TMP55]], 
align 64
+// CHECK-NEXT:    [[TMP56:%.*]] = load <3 x i32>, ptr [[SRC3_ADDR_ASCAST]], 
align 16
+// CHECK-NEXT:    [[TMP57:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP58:%.*]] = call <16 x float> 
@llvm.amdgcn.cvt.scale.pk16.f32.bf6(<3 x i32> [[TMP56]], i32 [[TMP57]], i32 4)
+// CHECK-NEXT:    [[TMP59:%.*]] = load ptr addrspace(1), ptr 
[[OUTF16_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <16 x float> [[TMP58]], ptr addrspace(1) [[TMP59]], 
align 64
 // CHECK-NEXT:    ret void
 //
 void test_cvt_scale_pk(global half8 *outh8, global bfloat8 *outy8, uint2 src2,
@@ -672,6 +702,12 @@ void test_cvt_scale_pk(global half8 *outh8, global bfloat8 
*outy8, uint2 src2,
   *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp8(src2, scale, 5);
   *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_bf8(src2, scale, 6);
   *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp4(src1, scale, 7);
+  *outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_fp6(src3, scale, 0);
+  *outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_fp6(src3, scale, 1);
+  *outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_bf6(src3, scale, 2);
+  *outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_bf6(src3, scale, 3);
+  *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_fp6(src3, scale, 3);
+  *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_bf6(src3, scale, 4);
 }
 
 // CHECK-LABEL: @test_cvt_scalef32_pk(
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
index 83c63f1465a8b..8f34cccaecb7a 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
@@ -57,6 +57,12 @@ void test_cvt_scale_pk(global half8 *outh8, global bfloat8 
*outy8, uint2 src2,
   *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp8(src2, scale, scale_sel); // 
expected-error {{'__builtin_amdgcn_cvt_scale_pk8_f32_fp8' must be a constant 
integer}}
   *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_bf8(src2, scale, scale_sel); // 
expected-error {{'__builtin_amdgcn_cvt_scale_pk8_f32_bf8' must be a constant 
integer}}
   *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp4(src1, scale, scale_sel); // 
expected-error {{'__builtin_amdgcn_cvt_scale_pk8_f32_fp4' must be a constant 
integer}}
+  *outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_fp6(src3, scale, scale_sel); 
// expected-error {{'__builtin_amdgcn_cvt_scale_pk16_f16_fp6' must be a 
constant integer}}
+  *outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_fp6(src3, scale, scale_sel); 
// expected-error {{'__builtin_amdgcn_cvt_scale_pk16_bf16_fp6' must be a 
constant integer}}
+  *outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_bf6(src3, scale, scale_sel); 
// expected-error {{'__builtin_amdgcn_cvt_scale_pk16_f16_bf6' must be a 
constant integer}}
+  *outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_bf6(src3, scale, scale_sel); 
// expected-error {{'__builtin_amdgcn_cvt_scale_pk16_bf16_bf6' must be a 
constant integer}}
+  *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_fp6(src3, scale, scale_sel); 
// expected-error {{'__builtin_amdgcn_cvt_scale_pk16_f32_fp6' must be a 
constant integer}}
+  *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_bf6(src3, scale, scale_sel); 
// expected-error {{'__builtin_amdgcn_cvt_scale_pk16_f32_bf6' must be a 
constant integer}}
 
   *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp8(src2, scale, 8); // 
expected-error {{argument value 8 is outside the valid range [0, 7]}}
   *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp8(src2, scale, 8); // 
expected-error {{argument value 8 is outside the valid range [0, 7]}}
@@ -67,6 +73,12 @@ void test_cvt_scale_pk(global half8 *outh8, global bfloat8 
*outy8, uint2 src2,
   *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp8(src2, scale, 8); // 
expected-error {{argument value 8 is outside the valid range [0, 7]}}
   *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_bf8(src2, scale, 8); // 
expected-error {{argument value 8 is outside the valid range [0, 7]}}
   *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp4(src1, scale, 8); // 
expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_fp6(src3, scale, 8); // 
expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_fp6(src3, scale, 8); // 
expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outh16 = __builtin_amdgcn_cvt_scale_pk16_f16_bf6(src3, scale, 8); // 
expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outy16 = __builtin_amdgcn_cvt_scale_pk16_bf16_bf6(src3, scale, 8); // 
expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_fp6(src3, scale, 8); // 
expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outf16 = __builtin_amdgcn_cvt_scale_pk16_f32_bf6(src3, scale, 8); // 
expected-error {{argument value 8 is outside the valid range [0, 7]}}
 }
 
 void test_amdgcn_load_monitor(global int* b32gaddr, global v2i* b64gaddr, 
global v4i* b128gaddr, int *b32faddr, v2i* b64faddr, v4i *b128faddr,
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 3e331d0635910..fcb4b02a594a0 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -655,6 +655,12 @@ def int_amdgcn_cvt_scale_pk8_bf16_fp4  : 
AMDGPUCvtScaleIntrinsic<llvm_v8bf16_ty,
 def int_amdgcn_cvt_scale_pk8_f32_fp8   : 
AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty,   llvm_v2i32_ty, 
"cvt_scale_pk8_f32_fp8">;
 def int_amdgcn_cvt_scale_pk8_f32_bf8   : 
AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty,   llvm_v2i32_ty, 
"cvt_scale_pk8_f32_bf8">;
 def int_amdgcn_cvt_scale_pk8_f32_fp4   : 
AMDGPUCvtScaleIntrinsic<llvm_v8f32_ty,   llvm_i32_ty,   
"cvt_scale_pk8_f32_fp4">;
+def int_amdgcn_cvt_scale_pk16_f16_bf6  : 
AMDGPUCvtScaleIntrinsic<llvm_v16f16_ty,  llvm_v3i32_ty, 
"cvt_scale_pk16_f16_bf6">;
+def int_amdgcn_cvt_scale_pk16_bf16_bf6 : 
AMDGPUCvtScaleIntrinsic<llvm_v16bf16_ty, llvm_v3i32_ty, 
"cvt_scale_pk16_bf16_bf6">;
+def int_amdgcn_cvt_scale_pk16_f16_fp6  : 
AMDGPUCvtScaleIntrinsic<llvm_v16f16_ty,  llvm_v3i32_ty, 
"cvt_scale_pk16_f16_fp6">;
+def int_amdgcn_cvt_scale_pk16_bf16_fp6 : 
AMDGPUCvtScaleIntrinsic<llvm_v16bf16_ty, llvm_v3i32_ty, 
"cvt_scale_pk16_bf16_fp6">;
+def int_amdgcn_cvt_scale_pk16_f32_fp6  : 
AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty,  llvm_v3i32_ty, 
"cvt_scale_pk16_f32_fp6">;
+def int_amdgcn_cvt_scale_pk16_f32_bf6  : 
AMDGPUCvtScaleIntrinsic<llvm_v16f32_ty,  llvm_v3i32_ty, 
"cvt_scale_pk16_f32_bf6">;
 
 class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, 
LLVMType Src1Ty, string name> : DefaultAttrsIntrinsic<
   [DstTy], [Src0Ty, Src1Ty, llvm_float_ty], [IntrNoMem, IntrSpeculatable]
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index df4244db6de8d..ff64613402776 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4603,6 +4603,12 @@ AMDGPURegisterBankInfo::getInstrMapping(const 
MachineInstr &MI) const {
     case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp8:
     case Intrinsic::amdgcn_cvt_scale_pk8_f32_bf8:
     case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp4:
+    case Intrinsic::amdgcn_cvt_scale_pk16_f16_fp6:
+    case Intrinsic::amdgcn_cvt_scale_pk16_bf16_fp6:
+    case Intrinsic::amdgcn_cvt_scale_pk16_f16_bf6:
+    case Intrinsic::amdgcn_cvt_scale_pk16_bf16_bf6:
+    case Intrinsic::amdgcn_cvt_scale_pk16_f32_fp6:
+    case Intrinsic::amdgcn_cvt_scale_pk16_f32_bf6:
     case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_bf16:
     case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_bf16:
     case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f16:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 78f45447d1fc7..392bf42d3f377 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2950,6 +2950,8 @@ def VOP_BF16_F32_I32 : VOPProfile<[bf16, f32, i32, 
untyped]>;
 def VOP_F16_F32_I32 : VOPProfile<[f16, f32, i32, untyped]>;
 def VOP_I32_BF16_I32_F32 : VOPProfile<[i32, bf16, i32, f32]>;
 def VOP_I32_F16_I32_F32 : VOPProfile<[i32, f16, i32, f32]>;
+def VOP_V16F16_V3I32_I32  : VOPProfile<[v16f16, v3i32, i32, untyped]>;
+def VOP_V16BF16_V3I32_I32 : VOPProfile<[v16bf16, v3i32, i32, untyped]>;
 def VOP_V8F16_V2I32_I32  : VOPProfile<[v8f16, v2i32, i32, untyped]>;
 def VOP_V8BF16_V2I32_I32 : VOPProfile<[v8bf16, v2i32, i32, untyped]>;
 def VOP_V8F16_I32_I32  : VOPProfile<[v8f16, i32, i32, untyped]>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td 
b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 63f83e0850849..9e0f4f131abf9 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1777,6 +1777,12 @@ let SubtargetPredicate = isGFX1250Plus in {
       defm V_CVT_SCALE_PK8_BF16_BF8  : 
VOP3CvtScaleSelInst<"v_cvt_scale_pk8_bf16_bf8",  VOP_V8BF16_V2I32_I32,  
int_amdgcn_cvt_scale_pk8_bf16_bf8>;
       defm V_CVT_SCALE_PK8_F32_FP8   : 
VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f32_fp8",   VOP_V8F32_V2I32_I32,   
int_amdgcn_cvt_scale_pk8_f32_fp8>;
       defm V_CVT_SCALE_PK8_F32_BF8   : 
VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f32_bf8",   VOP_V8F32_V2I32_I32,   
int_amdgcn_cvt_scale_pk8_f32_bf8>;
+      defm V_CVT_SCALE_PK16_F16_FP6  : 
VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f16_fp6",  VOP_V16F16_V3I32_I32,  
int_amdgcn_cvt_scale_pk16_f16_fp6>;
+      defm V_CVT_SCALE_PK16_BF16_FP6 : 
VOP3CvtScaleSelInst<"v_cvt_scale_pk16_bf16_fp6", VOP_V16BF16_V3I32_I32, 
int_amdgcn_cvt_scale_pk16_bf16_fp6>;
+      defm V_CVT_SCALE_PK16_F16_BF6  : 
VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f16_bf6",  VOP_V16F16_V3I32_I32,  
int_amdgcn_cvt_scale_pk16_f16_bf6>;
+      defm V_CVT_SCALE_PK16_BF16_BF6 : 
VOP3CvtScaleSelInst<"v_cvt_scale_pk16_bf16_bf6", VOP_V16BF16_V3I32_I32, 
int_amdgcn_cvt_scale_pk16_bf16_bf6>;
+      defm V_CVT_SCALE_PK16_F32_FP6  : 
VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f32_fp6",  VOP_V16F32_V3I32_I32,  
int_amdgcn_cvt_scale_pk16_f32_fp6>;
+      defm V_CVT_SCALE_PK16_F32_BF6  : 
VOP3CvtScaleSelInst<"v_cvt_scale_pk16_f32_bf6",  VOP_V16F32_V3I32_I32,  
int_amdgcn_cvt_scale_pk16_f32_bf6>;
     } // End Constraints = "@earlyclobber $vdst"
 
     defm V_CVT_SCALE_PK8_F16_FP4   : 
VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f16_fp4",   VOP_V8F16_I32_I32,     
int_amdgcn_cvt_scale_pk8_f16_fp4>;
@@ -2248,6 +2254,12 @@ defm V_CVT_SCALEF32_PK8_FP8_F32      : 
VOP3Only_Real_Base_gfx1250<0x2c3>;
 defm V_CVT_SCALEF32_PK8_FP8_F16      : VOP3Only_Real_Base_gfx1250<0x2c4>;
 defm V_CVT_SCALEF32_PK8_BF8_F32      : VOP3Only_Real_Base_gfx1250<0x2c5>;
 defm V_CVT_SCALEF32_PK8_BF8_F16      : VOP3Only_Real_Base_gfx1250<0x2c6>;
+defm V_CVT_SCALE_PK16_F16_FP6        : VOP3Only_ScaleSel_Real_gfx1250<0x2c7>;
+defm V_CVT_SCALE_PK16_BF16_FP6       : VOP3Only_ScaleSel_Real_gfx1250<0x2c8>;
+defm V_CVT_SCALE_PK16_F32_FP6        : VOP3Only_ScaleSel_Real_gfx1250<0x2c9>;
+defm V_CVT_SCALE_PK16_F16_BF6        : VOP3Only_ScaleSel_Real_gfx1250<0x2ca>;
+defm V_CVT_SCALE_PK16_BF16_BF6       : VOP3Only_ScaleSel_Real_gfx1250<0x2cb>;
+defm V_CVT_SCALE_PK16_F32_BF6        : VOP3Only_ScaleSel_Real_gfx1250<0x2cc>;
 defm V_CVT_SCALEF32_SR_PK8_FP4_F32   : VOP3Only_Real_Base_gfx1250<0x297>;
 defm V_CVT_SCALEF32_SR_PK8_FP8_F32   : VOP3Only_Real_Base_gfx1250<0x298>;
 defm V_CVT_SCALEF32_SR_PK8_BF8_F32   : VOP3Only_Real_Base_gfx1250<0x299>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll 
b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll
index 4309cfbe1b124..c29c52cc58aa2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll
@@ -11,6 +11,12 @@ declare <8 x bfloat> @llvm.amdgcn.cvt.scale.pk8.bf16.fp4(i32 
%src, i32 %scale, i
 declare <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp8(<2 x i32> %src, i32 
%scale, i32 %scale_sel)
 declare <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.bf8(<2 x i32> %src, i32 
%scale, i32 %scale_sel)
 declare <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp4(i32 %src, i32 %scale, 
i32 %scale_sel)
+declare <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.fp6(<3 x i32> %src, i32 
%scale, i32 %scale_sel)
+declare <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.fp6(<3 x i32> %src, i32 
%scale, i32 %scale_sel)
+declare <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.bf6(<3 x i32> %src, i32 
%scale, i32 %scale_sel)
+declare <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x i32> %src, i32 
%scale, i32 %scale_sel)
+declare <16 x float> @llvm.amdgcn.cvt.scale.pk16.f32.fp6(<3 x i32> %src, i32 
%scale, i32 %scale_sel)
+declare <16 x float> @llvm.amdgcn.cvt.scale.pk16.f32.bf6(<3 x i32> %src, i32 
%scale, i32 %scale_sel)
 
 define amdgpu_ps void @test_cvt_scale_pk8_f16_fp8_vv(<2 x i32> %src, i32 
%scale, ptr addrspace(1) %out) {
 ; GFX1250-SDAG-LABEL: test_cvt_scale_pk8_f16_fp8_vv:
@@ -162,3 +168,207 @@ define amdgpu_ps void @test_cvt_scale_pk8_f32_fp4_vv(i32 
%src, i32 %scale, ptr a
   store <8 x float> %cvt, ptr addrspace(1) %out, align 32
   ret void
 }
+
+define amdgpu_ps void @test_cvt_scale_pk16_f16_fp6_vv(<3 x i32> %src, i32 
%scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f16_fp6_vv:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    v_cvt_scale_pk16_f16_fp6 v[6:13], v[0:2], v3
+; GFX1250-SDAG-NEXT:    s_clause 0x1
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f16_fp6_vv:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    v_cvt_scale_pk16_f16_fp6 v[6:13], v[0:2], v3
+; GFX1250-GISEL-NEXT:    s_clause 0x1
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.fp6(<3 x i32> 
%src, i32 %scale, i32 0)
+  store <16 x half> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_f16_fp6_sl(<3 x i32> inreg %src, 
ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f16_fp6_sl:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v10, s0 :: v_dual_mov_b32 v11, s1
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v12, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_cvt_scale_pk16_f16_fp6 v[2:9], v[10:12], 0x64 
scale_sel:1
+; GFX1250-SDAG-NEXT:    s_clause 0x1
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f16_fp6_sl:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v10, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT:    v_cvt_scale_pk16_f16_fp6 v[2:9], v[10:12], 0x64 
scale_sel:1
+; GFX1250-GISEL-NEXT:    s_clause 0x1
+; GFX1250-GISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX1250-GISEL-NEXT:    global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.fp6(<3 x i32> 
%src, i32 100, i32 1)
+  store <16 x half> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_bf16_fp6_vv(<3 x i32> %src, i32 
%scale, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_cvt_scale_pk16_bf16_fp6_vv:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_cvt_scale_pk16_bf16_fp6 v[6:13], v[0:2], v3 scale_sel:2
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1250-NEXT:    s_endpgm
+  %cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.fp6(<3 x 
i32> %src, i32 %scale, i32 2)
+  store <16 x bfloat> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_bf16_fp6_sl(<3 x i32> inreg %src, 
ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_cvt_scale_pk16_bf16_fp6_sl:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_dual_mov_b32 v10, s0 :: v_dual_mov_b32 v11, s1
+; GFX1250-NEXT:    v_mov_b32_e32 v12, s2
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_cvt_scale_pk16_bf16_fp6 v[2:9], v[10:12], 0x64 scale_sel:3
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX1250-NEXT:    s_endpgm
+  %cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.fp6(<3 x 
i32> %src, i32 100, i32 3)
+  store <16 x bfloat> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_f16_bf6_vv(<3 x i32> %src, i32 
%scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f16_bf6_vv:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    v_cvt_scale_pk16_f16_bf6 v[6:13], v[0:2], v3 
scale_sel:4
+; GFX1250-SDAG-NEXT:    s_clause 0x1
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f16_bf6_vv:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    v_cvt_scale_pk16_f16_bf6 v[6:13], v[0:2], v3 
scale_sel:4
+; GFX1250-GISEL-NEXT:    s_clause 0x1
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.bf6(<3 x i32> 
%src, i32 %scale, i32 4)
+  store <16 x half> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_f16_bf6_sl(<3 x i32> inreg %src, 
ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f16_bf6_sl:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v10, s0 :: v_dual_mov_b32 v11, s1
+; GFX1250-SDAG-NEXT:    v_mov_b32_e32 v12, s2
+; GFX1250-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT:    v_cvt_scale_pk16_f16_bf6 v[2:9], v[10:12], 0x64 
scale_sel:5
+; GFX1250-SDAG-NEXT:    s_clause 0x1
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-SDAG-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f16_bf6_sl:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1
+; GFX1250-GISEL-NEXT:    v_mov_b32_e32 v10, s0
+; GFX1250-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT:    v_cvt_scale_pk16_f16_bf6 v[2:9], v[10:12], 0x64 
scale_sel:5
+; GFX1250-GISEL-NEXT:    s_clause 0x1
+; GFX1250-GISEL-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX1250-GISEL-NEXT:    global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.bf6(<3 x i32> 
%src, i32 100, i32 5)
+  store <16 x half> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_bf16_bf6_vv(<3 x i32> %src, i32 
%scale, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_cvt_scale_pk16_bf16_bf6_vv:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_cvt_scale_pk16_bf16_bf6 v[6:13], v[0:2], v3 scale_sel:6
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1250-NEXT:    s_endpgm
+  %cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x 
i32> %src, i32 %scale, i32 6)
+  store <16 x bfloat> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_bf16_bf6_sl(<3 x i32> inreg %src, 
ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_cvt_scale_pk16_bf16_bf6_sl:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    v_dual_mov_b32 v10, s0 :: v_dual_mov_b32 v11, s1
+; GFX1250-NEXT:    v_mov_b32_e32 v12, s2
+; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT:    v_cvt_scale_pk16_bf16_bf6 v[2:9], v[10:12], 0x64 scale_sel:7
+; GFX1250-NEXT:    s_clause 0x1
+; GFX1250-NEXT:    global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-NEXT:    global_store_b128 v[0:1], v[2:5], off
+; GFX1250-NEXT:    s_endpgm
+  %cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x 
i32> %src, i32 100, i32 7)
+  store <16 x bfloat> %cvt, ptr addrspace(1) %out, align 8
+  ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_f32_fp6_vv(<3 x i32> %src, i32 
%scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f32_fp6_vv:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    v_cvt_scale_pk16_f32_fp6 v[6:21], v[0:2], v3 
scale_sel:5
+; GFX1250-SDAG-NEXT:    s_clause 0x3
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[18:21], off offset:48
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[14:17], off offset:32
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f32_fp6_vv:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    v_cvt_scale_pk16_f32_fp6 v[6:21], v[0:2], v3 
scale_sel:5
+; GFX1250-GISEL-NEXT:    s_clause 0x3
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[14:17], off offset:32
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[18:21], off offset:48
+; GFX1250-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <16 x float> @llvm.amdgcn.cvt.scale.pk16.f32.fp6(<3 x i32> 
%src, i32 %scale, i32 5)
+  store <16 x float> %cvt, ptr addrspace(1) %out, align 16
+  ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_f32_bf6_vv(<3 x i32> %src, i32 
%scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f32_bf6_vv:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    v_cvt_scale_pk16_f32_bf6 v[6:21], v[0:2], v3 
scale_sel:6
+; GFX1250-SDAG-NEXT:    s_clause 0x3
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[18:21], off offset:48
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[14:17], off offset:32
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-SDAG-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f32_bf6_vv:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    v_cvt_scale_pk16_f32_bf6 v[6:21], v[0:2], v3 
scale_sel:6
+; GFX1250-GISEL-NEXT:    s_clause 0x3
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[6:9], off
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[14:17], off offset:32
+; GFX1250-GISEL-NEXT:    global_store_b128 v[4:5], v[18:21], off offset:48
+; GFX1250-GISEL-NEXT:    s_endpgm
+  %cvt = tail call <16 x float> @llvm.amdgcn.cvt.scale.pk16.f32.bf6(<3 x i32> 
%src, i32 %scale, i32 6)
+  store <16 x float> %cvt, ptr addrspace(1) %out, align 16
+  ret void
+}
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
index 10c76733f0b95..9a6ddf5299be0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -1039,3 +1039,57 @@ v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 100
 
 v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 4
 // GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 4 ; encoding: 
[0x02,0x00,0x43,0xd6,0x04,0x09,0x12,0x02]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xcb,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 scale_sel:1
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 scale_sel:1 ; 
encoding: [0x0a,0x08,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xca,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 scale_sel:2
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 scale_sel:2 ; 
encoding: [0x0a,0x10,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xc8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 scale_sel:3
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 scale_sel:3 ; 
encoding: [0x0a,0x18,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xc7,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 scale_sel:4
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 scale_sel:4 ; 
encoding: [0x0a,0x20,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xc9,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xc9,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 scale_sel:4
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 scale_sel:4 ; 
encoding: [0x0a,0x20,0xc9,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xcc,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xcc,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 scale_sel:5
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 scale_sel:5 ; 
encoding: [0x0a,0x28,0xcc,0xd6,0x14,0x11,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s 
b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
index 16f8425ca8d05..988408cf12534 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -1039,3 +1039,57 @@ v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 100
 
 v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 4
 // GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 4 ; encoding: 
[0x02,0x00,0x43,0xd6,0x04,0x09,0x12,0x02]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xcb,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 scale_sel:1
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 scale_sel:1 ; 
encoding: [0x0a,0x08,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xca,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 scale_sel:2
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 scale_sel:2 ; 
encoding: [0x0a,0x10,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xc8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 scale_sel:3
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 scale_sel:3 ; 
encoding: [0x0a,0x18,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xc7,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 scale_sel:4
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 scale_sel:4 ; 
encoding: [0x0a,0x20,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xc9,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xc9,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 scale_sel:4
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 scale_sel:4 ; 
encoding: [0x0a,0x20,0xc9,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xcc,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xcc,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 scale_sel:5
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 scale_sel:5 ; 
encoding: [0x0a,0x28,0xcc,0xd6,0x14,0x11,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt 
b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
index 6ac2506218130..cc485312ccbae 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
@@ -1090,3 +1090,57 @@
 
 0x02,0x00,0x43,0xd6,0x04,0x11,0x1a,0x04
 # GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[8:9], v[6:7] ; encoding: 
[0x02,0x00,0x43,0xd6,0x04,0x11,0x1a,0x04]
+
+0x0a,0x00,0xcb,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xcb,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xcb,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x08,0xcb,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 scale_sel:1 ; 
encoding: [0x0a,0x08,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xc8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xc8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xc8,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x18,0xc8,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 scale_sel:3 ; 
encoding: [0x0a,0x18,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xca,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xca,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xca,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x10,0xca,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 scale_sel:2 ; 
encoding: [0x0a,0x10,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xc7,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xc7,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xc7,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x20,0xc7,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 scale_sel:4 ; 
encoding: [0x0a,0x20,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xcc,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xcc,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xcc,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xcc,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x28,0xcc,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 scale_sel:5 ; 
encoding: [0x0a,0x28,0xcc,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xc9,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], 0xcf00 ; encoding: 
[0x0a,0x00,0xc9,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xc9,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 ; encoding: 
[0x0a,0x00,0xc9,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x20,0xc9,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 scale_sel:4 ; 
encoding: [0x0a,0x20,0xc9,0xd6,0x14,0x11,0x02,0x00]

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to