Despite most of them being about conversion to BF8/HF8, they are still somewhat similar to various existing convert insns.
Signed-off-by: Jan Beulich <[email protected]> --- SDE: -dmr / -future --- v4: Update to spec version 3. Switch to using fallthrough pseudo-keyword. Series re-ordering adjustments. Update to spec version 6. Add ChangeLog entry. v3: New. --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ The format is based on [Keep a Changelog mitigate (by rate-limiting) the system wide impact of an HVM guest misusing atomic instructions. - Support for CPIO microcode in discrete multiboot modules. - - Support for AVX10.1. (Experimental) + - Support for AVX10.2. (Experimental) - On Arm: - Support for guest suspend and resume to/from RAM via vPSCI. --- a/tools/tests/x86_emulator/evex-disp8.c +++ b/tools/tests/x86_emulator/evex-disp8.c @@ -727,8 +727,22 @@ static const struct test avx10_2_all[] = INSN(comisbf16, 66, map5, 2f, el, bf16, el), INSN_SFP(comx, 0f, 2f), INSN(comxsh, f3, map5, 2f, el, fp16, el), + INSN(cvt2ph2bf8, f2, 0f38, 74, vl, fp16, vl), + INSN(cvt2ph2bf8s, f2, map5, 74, vl, fp16, vl), + INSN(cvt2ph2hf8, f2, map5, 18, vl, fp16, vl), + INSN(cvt2ph2hf8s, f2, map5, 1b, vl, fp16, vl), + INSN(cvt2ps2phx, 66, 0f38, 67, vl, d, vl), + INSN(cvtbiasph2bf8, , 0f38, 74, vl, fp16, vl), + INSN(cvtbiasph2bf8s, , map5, 74, vl, fp16, vl), + INSN(cvtbiasph2hf8, , map5, 18, vl, fp16, vl), + INSN(cvtbiasph2hf8s, , map5, 1b, vl, fp16, vl), + INSN(cvthf82ph, f2, map5, 1e, vl_2, b, vl), INSN(cvtbf162ibs, f2, map5, 69, vl, bf16, vl), INSN(cvtbf162iubs, f2, map5, 6b, vl, bf16, vl), + INSN(cvtph2bf8, f3, 0f38, 74, vl, fp16, vl), + INSN(cvtph2bf8s, f3, map5, 74, vl, fp16, vl), + INSN(cvtph2hf8, f3, map5, 18, vl, fp16, vl), + INSN(cvtph2hf8s, f3, map5, 1b, vl, fp16, vl), INSN(cvtph2ibs, , map5, 69, vl, fp16, vl), INSN(cvtph2iubs, , map5, 6b, vl, fp16, vl), INSN(cvtps2ibs, 66, map5, 69, vl, d, vl), --- a/tools/tests/x86_emulator/predicates.c +++ b/tools/tests/x86_emulator/predicates.c @@ -1952,6 +1952,7 @@ static const struct evex { { { 0x64 }, 2, T, R, pfx_66, Wn, Ln }, /* vpblendm{d,q} */ { { 0x65 }, 2, T, R, pfx_66, Wn, Ln }, /* vblendmp{s,d} */ { { 0x66 }, 2, T, R, pfx_66, Wn, Ln }, /* vpblendm{b,w} */ + { { 0x67 }, 2, T, R, pfx_66, W0, Ln }, /* vcvt2ps2phx */ { { 0x68 }, 2, T, R, pfx_f2, Wn, Ln }, /* vp2intersect{d,q} */ { { 0x70 }, 2, T, R, pfx_66, W1, Ln }, /* vpshldvw */ { { 0x71 }, 2, T, R, pfx_66, Wn, Ln }, /* vpshldv{d,q} */ @@ -1959,6 +1960,9 @@ static const struct evex { { { 0x72 }, 2, T, R, pfx_f3, W1, Ln }, /* vcvtneps2bf16 */ { { 0x72 }, 2, T, R, pfx_f2, W1, Ln }, /* vcvtne2ps2bf16 */ { { 0x73 }, 2, T, R, pfx_66, Wn, Ln }, /* vpshrdv{d,q} */ + { { 0x74 }, 2, T, R, pfx_no, W0, Ln }, /* vcvtbiasph2bf8 */ + { { 0x74 }, 2, T, R, pfx_f3, W0, Ln }, /* vcvtph2bf8 */ + { { 0x74 }, 2, T, R, pfx_f2, W0, Ln }, /* vcvt2ph2bf8 */ { { 0x75 }, 2, T, R, pfx_66, Wn, Ln }, /* vpermi2{b,w} */ { { 0x76 }, 2, T, R, pfx_66, Wn, Ln }, /* vpermi2{d,q} */ { { 0x77 }, 2, T, R, pfx_66, Wn, Ln }, /* vpermi2p{s,d} */ @@ -2124,8 +2128,15 @@ static const struct evex { }, evex_map5[] = { { { 0x10 }, 2, T, R, pfx_f3, W0, LIG }, /* vmovsh */ { { 0x11 }, 2, T, W, pfx_f3, W0, LIG }, /* vmovsh */ + { { 0x18 }, 2, T, R, pfx_no, W0, Ln }, /* vcvtbiasph2hf8 */ + { { 0x18 }, 2, T, R, pfx_f3, W0, Ln }, /* vcvtph2hf8 */ + { { 0x18 }, 2, T, R, pfx_f2, W0, Ln }, /* vcvt2ph2hf8 */ + { { 0x1b }, 2, T, R, pfx_no, W0, Ln }, /* vcvtbiasph2hf8s */ + { { 0x1b }, 2, T, R, pfx_f3, W0, Ln }, /* vcvtph2hf8s */ + { { 0x1b }, 2, T, R, pfx_f2, W0, Ln }, /* vcvt2ph2hf8s */ { { 0x1d }, 2, T, R, pfx_66, W0, Ln }, /* vcvtps2phx */ { { 0x1d }, 2, T, R, pfx_no, W0, LIG }, /* vcvtss2sh */ + { { 0x1e }, 2, T, R, pfx_f2, W0, Ln }, /* cvthf82ph */ { { 0x2a }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsi2sh */ { { 0x2c }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttsh2si */ { { 0x2d }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsh2si */ @@ -2187,6 +2198,9 @@ static const struct evex { { { 0x6e }, 2, T, R, pfx_f3, W0, L0 }, /* vmovw */ { { 0x6f }, 2, T, R, pfx_f3, Wn, Ln }, /* vmovrs{d,q} */ { { 0x6f }, 2, T, R, pfx_f2, Wn, Ln }, /* vmovrs{b,w} */ + { { 0x74 }, 2, T, R, pfx_no, W0, Ln }, /* vcvtbiasph2bf8s */ + { { 0x74 }, 2, T, R, pfx_f3, W0, Ln }, /* vcvtph2bf8s */ + { { 0x74 }, 2, T, R, pfx_f2, W0, Ln }, /* vcvt2ph2bf8s */ { { 0x78 }, 2, T, R, pfx_no, W0, Ln }, /* vcvttph2udq */ { { 0x78 }, 2, T, R, pfx_66, W0, Ln }, /* vcvttph2uqq */ { { 0x78 }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttsh2usi */ --- a/xen/arch/x86/x86_emulate/decode.c +++ b/xen/arch/x86/x86_emulate/decode.c @@ -378,8 +378,10 @@ static const struct ext0f38_table { [0x62] = { .simd_size = simd_packed_int, .two_op = 1, .d8s = d8s_bw }, [0x63] = { .simd_size = simd_packed_int, .to_mem = 1, .two_op = 1, .d8s = d8s_bw }, [0x64 ... 0x66] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, + [0x67] = { .simd_size = simd_other, .d8s = d8s_vl }, [0x68] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, [0x70 ... 0x73] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, + [0x74] = { .simd_size = simd_other, .d8s = d8s_vl }, [0x75 ... 0x76] = { .simd_size = simd_packed_int, .d8s = d8s_vl }, [0x77] = { .simd_size = simd_packed_fp, .d8s = d8s_vl }, [0x78] = { .simd_size = simd_other, .two_op = 1 }, @@ -1439,6 +1441,15 @@ int x86emul_decode(struct x86_emulate_st s->simd_size = ext0f38_table[b].simd_size; if ( evex_encoded() ) { + switch ( b ) + { + case 0x74: /* cvt{bias,ne,ne2}ph2bf8 */ + s->fp16 = true; + if ( s->evex.pfx != vex_f2 ) + d |= TwoOp; + break; + } + /* * VPMOVUS* are identical to VPMOVS* Disp8-scaling-wise, but * their attributes don't match those of the vex_66 encoded @@ -1586,6 +1597,23 @@ int x86emul_decode(struct x86_emulate_st switch ( b ) { + case 0x18: /* vcvt{bias,ne,ne2}ph2hf8 */ + case 0x1b: /* vcvt{bias,ne,ne2}ph2hf8s */ + case 0x74: /* vcvt{bias,ne,ne2}ph2bf8s */ + s->fp16 = true; + d = DstReg | SrcMem; + if ( s->evex.pfx != vex_f2 ) + d |= TwoOp; + s->simd_size = simd_other; + disp8scale = s->evex.brs ? 1 : 4 + s->evex.lr; + break; + + case 0x1e: /* vcvthf82ph */ + d = DstReg | SrcMem | TwoOp; + s->simd_size = simd_other; + disp8scale = 3 + s->evex.lr; + break; + case 0x78: case 0x79: /* vcvt{,t}ph2u{d,q}q need special casing */ --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -6243,6 +6243,30 @@ x86_emulate( } goto simd_zmm; + case X86EMUL_OPC_EVEX (0x0f38, 0x74): /* vcvtbiasph2bf8 [xyz]mm,[xyz]mm/mem{k} */ + case X86EMUL_OPC_EVEX_F3(0x0f38, 0x74): /* vcvtph2bf8 [xyz]mm,[xyz]mm/mem{k} */ + case X86EMUL_OPC_EVEX_F2(0x0f38, 0x74): /* vcvt2ph2bf8 [xyz]mm,[xyz]mm,[xyz]mm/mem{k} */ + case X86EMUL_OPC_EVEX ( 5, 0x18): /* vcvtbiasph2hf8 [xyz]mm,[xyz]mm/mem{k} */ + case X86EMUL_OPC_EVEX_F3( 5, 0x18): /* vcvtph2hf8 [xyz]mm,[xyz]mm/mem{k} */ + case X86EMUL_OPC_EVEX_F2( 5, 0x18): /* vcvt2ph2hf8 [xyz]mm,[xyz]mm,[xyz]mm/mem{k} */ + case X86EMUL_OPC_EVEX ( 5, 0x1b): /* vcvtbiasph2hf8s [xyz]mm,[xyz]mm/mem{k} */ + case X86EMUL_OPC_EVEX_F3( 5, 0x1b): /* vcvtph2hf8s [xyz]mm,[xyz]mm/mem{k} */ + case X86EMUL_OPC_EVEX_F2( 5, 0x1b): /* vcvt2ph2hf8s [xyz]mm,[xyz]mm,[xyz]mm/mem{k} */ + case X86EMUL_OPC_EVEX ( 5, 0x74): /* vcvtbiasph2bf8s [xyz]mm,[xyz]mm/mem{k} */ + case X86EMUL_OPC_EVEX_F3( 5, 0x74): /* vcvtph2bf8s [xyz]mm,[xyz]mm/mem{k} */ + case X86EMUL_OPC_EVEX_F2( 5, 0x74): /* vcvt2ph2bf8s [xyz]mm,[xyz]mm,[xyz]mm/mem{k} */ + generate_exception_if(ea.type != OP_MEM && evex.brs, X86_EXC_UD); + fault_suppression = false; + fallthrough; + case X86EMUL_OPC_EVEX_66(0x0f38, 0x67): /* vcvt2ps2phx [xyz]mm,[xyz]mm,[xyz]mm/mem{k} */ + generate_exception_if(evex.w, X86_EXC_UD); + if ( !cp->avx10.avx10_v1_aux ) + vcpu_must_have(avx10, 2); + if ( ea.type != OP_REG || !evex.brs ) + avx512_vlen_check(false); + op_bytes = 16 << evex.lr; + goto simd_zmm; + case X86EMUL_OPC_EVEX_F2(0x0f38, 0x68): /* vp2intersect{d,q} [xyz]mm/mem,[xyz]mm,k+1 */ host_and_vcpu_must_have(avx512_vp2intersect); generate_exception_if(evex.opmsk || !evex.r || !evex.R, X86_EXC_UD); @@ -7946,6 +7970,15 @@ x86_emulate( generate_exception_if(evex.w, X86_EXC_UD); goto avx512f_all_fp; + case X86EMUL_OPC_EVEX_F2(5, 0x1e): /* vcvthf82ph [xyz]mm,[xyz]mm/mem{k} */ + generate_exception_if(evex.w || evex.brs, X86_EXC_UD); + if ( !cp->avx10.avx10_v1_aux ) + vcpu_must_have(avx10, 2); + if ( ea.type != OP_REG || !evex.brs ) + avx512_vlen_check(false); + op_bytes = 8 << evex.lr; + goto simd_zmm; + case X86EMUL_OPC_EVEX_66(5, 0x51): /* vsqrtbf16 [xyz]mm/mem,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(5, 0x58): /* vaddbf16 [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ case X86EMUL_OPC_EVEX_66(5, 0x59): /* vmulbf16 [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */ --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -410,7 +410,7 @@ XEN_CPUFEATURE(TSA_SQ_NO, 18*32 XEN_CPUFEATURE(TSA_L1_NO, 18*32+ 2) /*A No L1D Transitive Scheduler Attacks */ /* Intel-defined CPU features, CPUID level 0x00000024:1.ecx, word 19 */ -XEN_CPUFEATURE(AVX10_V1_AUX, 19*32+ 2) /* AVX10 V1 Auxiliary Instructions */ +XEN_CPUFEATURE(AVX10_V1_AUX, 19*32+ 2) /*a AVX10 V1 Auxiliary Instructions */ #endif /* XEN_CPUFEATURE */
