Hi Michael, Minor comments inline.
On 14/5/25 09:39, Michael Clark wrote:
the x86-mini library is a lightweight x86 encoder, decoder, and disassembler that uses extensions to the Intel instruction set metadata to encode modern VEX/EVEX instructions and legacy instructions with a parameterized LEX (legacy extension) format. this patch adds the x86-tablegen.py script, the disassembler, a print_insn_x86 implementation plus host and target cpu stubs. Signed-off-by: Michael Clark <mich...@anarch128.org> --- disas/disas-host.c | 5 + disas/meson.build | 97 ++ disas/x86-core.c | 2716 +++++++++++++++++++++++++++++++++++++++ disas/x86-disas.c | 96 ++ disas/x86.h | 1860 +++++++++++++++++++++++++++ include/disas/dis-asm.h | 1 + target/i386/cpu.c | 7 + 7 files changed, 4782 insertions(+) create mode 100644 disas/x86-core.c create mode 100644 disas/x86-disas.c create mode 100644 disas/x86.h
diff --git a/disas/x86-core.c b/disas/x86-core.c new file mode 100644 index 000000000000..c4f7034e3420 --- /dev/null +++ b/disas/x86-core.c @@ -0,0 +1,2716 @@ +/* + * Copyright (c) 2024-2025 Michael Clark + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <limits.h>
No need for these 5 includes ...
+ +#include "qemu/osdep.h"
... as they are provided by "qemu/osdep.h".
+#include "qemu/host-utils.h" + +#include "disas/x86.h" +#include "disas/x86-tables.inc"
+x86_opr_formatter x86_format_intel_hex; +x86_opr_formatter x86_format_intel_dec; +x86_opr_formats x86_opr_formats_intel_hex; +x86_opr_formats x86_opr_formats_intel_dec; + +static uint debug; + +void x86_set_debug(uint d) { debug = d; } +
const
+x86_map_str x86_mode_names[] = +{ + { x86_modes_64, "64" }, + { x86_modes_32, "32" }, + { x86_modes_16, "16" }, +}; +
const
+x86_map_str x86_map_names[] = +{ + { x86_map_map6, "map6" }, + { x86_map_map5, "map5" }, + { x86_map_map4, "map4" }, + { x86_map_0f3a, "0f3a" }, + { x86_map_0f38, "0f38" }, + { x86_map_0f, "0f" }, +}; +
const
+x86_map_str x86_ord_names[] = +{ + { x86_ord_rflags, "rflags" }, + { x86_ord_mxcsr, "mxcsr" }, + { x86_ord_xmm0_7, "xmm0_7" }, + { x86_ord_xmm0, "xmm0" }, + { x86_ord_seg, "seg" }, + { x86_ord_stx, "stx" }, + { x86_ord_st0, "st0" }, + { x86_ord_rdi, "rdi" }, + { x86_ord_rsi, "rsi" }, + { x86_ord_rbp, "rbp" }, + { x86_ord_rsp, "rsp" }, + { x86_ord_rbx, "rbx" }, + { x86_ord_rdx, "rdx" }, + { x86_ord_rcx, "rcx" }, + { x86_ord_rax, "rax" }, + { x86_ord_one, "one" }, + { x86_ord_ime, "ime" }, + { x86_ord_is4, "is4" }, + { x86_ord_sib, "sib" }, + { x86_ord_vec, "vec" }, + { x86_ord_opr, "opr" }, + { x86_ord_mrm, "mrm" }, + { x86_ord_reg, "reg" }, + { x86_ord_imm, "imm" }, + { x86_ord_rwi, "rwi" }, + { x86_ord_wi, "wi" }, + { x86_ord_ri, "ri" }, + { x86_ord_rw, "rw" }, + { x86_ord_i, "i" }, + { x86_ord_r, "r" }, + { x86_ord_w, "w" }, + { 0, NULL }, +}; +
const
+x86_map_str x86_opr_names[] = +{ + { x86_opr_bnd_mem, "bnd/mem" }, + { x86_opr_k_m64, "k/m64" }, + { x86_opr_k_m32, "k/m32" }, + { x86_opr_k_m16, "k/m16" }, + { x86_opr_k_m8, "k/m8" }, + { x86_opr_vm64z, "vm64z" }, + { x86_opr_vm64y, "vm64y" }, + { x86_opr_vm64x, "vm64x" }, + { x86_opr_vm32z, "vm32z" }, + { x86_opr_vm32y, "vm32y" }, + { x86_opr_vm32x, "vm32x" }, + { x86_opr_rw_mw, "rw/mw" }, + { x86_opr_r64_m64, "r64/m64" }, + { x86_opr_r64_m32, "r64/m32" }, + { x86_opr_r64_m16, "r64/m16" }, + { x86_opr_r64_m8, "r64/m8" }, + { x86_opr_r32_m32, "r32/m32" }, + { x86_opr_r32_m16, "r32/m16" }, + { x86_opr_r32_m8, "r32/m8" }, + { x86_opr_r16_m16, "r16/m16" }, + { x86_opr_r16_m8, "r16/m8" }, + { x86_opr_r8_m8, "r8/m8" }, + { x86_opr_zmm_m512_m64bcst, "zmm/m512/m64bcst" }, + { x86_opr_zmm_m512_m32bcst, "zmm/m512/m32bcst" }, + { x86_opr_zmm_m512_m16bcst, "zmm/m512/m16bcst" }, + { x86_opr_ymm_m256_m64bcst, "ymm/m256/m64bcst" }, + { x86_opr_ymm_m256_m32bcst, "ymm/m256/m32bcst" }, + { x86_opr_ymm_m256_m16bcst, "ymm/m256/m16bcst" }, + { x86_opr_xmm_m128_m64bcst, "xmm/m128/m64bcst" }, + { x86_opr_xmm_m128_m32bcst, "xmm/m128/m32bcst" }, + { x86_opr_xmm_m128_m16bcst, "xmm/m128/m16bcst" }, + { x86_opr_xmm_m64_m32bcst, "xmm/m64/m32bcst" }, + { x86_opr_xmm_m64_m16bcst, "xmm/m64/m16bcst" }, + { x86_opr_xmm_m32_m16bcst, "xmm/m32/m16bcst" }, + { x86_opr_zmm_m512, "zmm/m512" }, + { x86_opr_ymm_m256, "ymm/m256" }, + { x86_opr_xmm_m128, "xmm/m128" }, + { x86_opr_xmm_m64, "xmm/m64" }, + { x86_opr_xmm_m32, "xmm/m32" }, + { x86_opr_xmm_m16, "xmm/m16" }, + { x86_opr_xmm_m8, "xmm/m8" }, + { x86_opr_mm_m64, "mm/m64" }, + { x86_opr_mm_m32, "mm/m32" }, + { x86_opr_mp, "mp" }, + { x86_opr_mw, "mw" }, + { x86_opr_vm64, "vm64" }, + { x86_opr_vm32, "vm32" }, + { x86_opr_r_m64, "r/m64" }, + { x86_opr_r_m32, "r/m32" }, + { x86_opr_r_m16, "r/m16" }, + { x86_opr_r_m8, "r/m8" }, + { x86_opr_m64bcst, "m64bcst" }, + { x86_opr_m32bcst, "m32bcst" }, + { x86_opr_m16bcst, "m16bcst" }, + { x86_opr_mib, "mib" }, + { x86_opr_m384, "m384" }, + { x86_opr_m80, "m80" }, + { x86_opr_m512, "m512" }, + { x86_opr_m256, "m256" }, + { x86_opr_m128, "m128" }, + { x86_opr_m64, "m64" }, + { x86_opr_m32, "m32" }, + { x86_opr_m16, "m16" }, + { x86_opr_m8, "m8" }, + { x86_opr_seg_gs, "gs" }, + { x86_opr_seg_fs, "fs" }, + { x86_opr_seg_ds, "ds" }, + { x86_opr_seg_ss, "ss" }, + { x86_opr_seg_cs, "cs" }, + { x86_opr_seg_es, "es" }, + { x86_opr_reg_xmm0_7, "xmm0_7" }, + { x86_opr_reg_xmm0, "xmm0" }, + { x86_opr_reg_pdi, "pdi" }, + { x86_opr_reg_psi, "psi" }, + { x86_opr_reg_pb, "pb" }, + { x86_opr_reg_pd, "pd" }, + { x86_opr_reg_pc, "pc" }, + { x86_opr_reg_pa, "pa" }, + { x86_opr_reg_bw, "bw" }, + { x86_opr_reg_dw, "dw" }, + { x86_opr_reg_cw, "cw" }, + { x86_opr_reg_aw, "aw" }, + { x86_opr_reg_rbx, "rbx" }, + { x86_opr_reg_rdx, "rdx" }, + { x86_opr_reg_rcx, "rcx" }, + { x86_opr_reg_rax, "rax" }, + { x86_opr_reg_ebx, "ebx" }, + { x86_opr_reg_edx, "edx" }, + { x86_opr_reg_ecx, "ecx" }, + { x86_opr_reg_eax, "eax" }, + { x86_opr_reg_bx, "bx" }, + { x86_opr_reg_dx, "dx" }, + { x86_opr_reg_cx, "cx" }, + { x86_opr_reg_ax, "ax" }, + { x86_opr_reg_cl, "cl" }, + { x86_opr_reg_al, "al" }, + { x86_opr_reg_st0, "st0" }, + { x86_opr_reg_v0, "v0" }, + { x86_opr_reg_ah, "ah" }, + { x86_opr_reg_di, "di" }, + { x86_opr_reg_si, "si" }, + { x86_opr_reg_d, "d" }, + { x86_opr_reg_c, "c" }, + { x86_opr_reg_a, "a" }, + { x86_opr_memfar16_64, "memfar16:64" }, + { x86_opr_memfar16_32, "memfar16:32" }, + { x86_opr_memfar16_16, "memfar16:16" }, + { x86_opr_far16_32, "far16:32" }, + { x86_opr_far16_16, "far16:16" }, + { x86_opr_relw, "relw" }, + { x86_opr_rel8, "rel8" }, + { x86_opr_moffs, "moffs" }, + { x86_opr_1, "1" }, + { x86_opr_zmm, "zmm" }, + { x86_opr_ymm, "ymm" }, + { x86_opr_xmm, "xmm" }, + { x86_opr_mm, "mm" }, + { x86_opr_ra, "ra" }, + { x86_opr_rw, "rw" }, + { x86_opr_r64, "r64" }, + { x86_opr_r32, "r32" }, + { x86_opr_r16, "r16" }, + { x86_opr_r8, "r8" }, + { x86_opr_iw, "iw" }, + { x86_opr_iwd, "iwd" }, + { x86_opr_i64, "i64" }, + { x86_opr_i32, "i32" }, + { x86_opr_i16, "i16" }, + { x86_opr_ib, "ib" }, + { x86_opr_bnd, "bnd" }, + { x86_opr_dreg, "dreg" }, + { x86_opr_creg, "creg" }, + { x86_opr_seg, "seg" }, + { x86_opr_k, "k" }, + { x86_opr_st, "st" }, + { x86_opr_mmx, "mmx" }, + { x86_opr_vec, "vec" }, + { x86_opr_reg, "reg" }, + { x86_opr_imm, "imm" }, + { x86_opr_bcst, "bcst" }, + { x86_opr_mem, "mem" }, + { x86_opr_flag_er, "{er}" }, + { x86_opr_flag_k, "{k}" }, + { x86_opr_flag_sae, "{sae}" }, + { x86_opr_flag_z, "{z}" }, + { x86_opr_flag_rs2, "{rs2}" }, + { x86_opr_flag_rs4, "{rs4}" }, + { x86_opr_f64x8, "/f64x8" }, + { x86_opr_f64x4, "/f64x4" }, + { x86_opr_f64x2, "/f64x2" }, + { x86_opr_f64x1, "/f64x1" }, + { x86_opr_f32x16, "/f32x16" }, + { x86_opr_f32x8, "/f32x8" }, + { x86_opr_f32x4, "/f32x4" }, + { x86_opr_f32x2, "/f32x2" }, + { x86_opr_f32x1, "/f32x1" }, + { x86_opr_f16x32, "/f16x32" }, + { x86_opr_f16x16, "/f16x16" }, + { x86_opr_f16x8, "/f16x8" }, + { x86_opr_f16x4, "/f16x4" }, + { x86_opr_f16x2, "/f16x2" }, + { x86_opr_f16x1, "/f16x1" }, + { x86_opr_f8x64, "/f8x64" }, + { x86_opr_f8x32, "/f8x32" }, + { x86_opr_f8x16, "/f8x16" }, + { x86_opr_f8x8, "/f8x8" }, + { x86_opr_f8x4, "/f8x4" }, + { x86_opr_f8x2, "/f8x2" }, + { x86_opr_f8x1, "/f8x1" }, + { x86_opr_i512x1, "/i512x1" }, + { x86_opr_i256x2, "/i256x2" }, + { x86_opr_i256x1, "/i256x1" }, + { x86_opr_i128x4, "/i128x4" }, + { x86_opr_i128x2, "/i128x2" }, + { x86_opr_i128x1, "/i128x1" }, + { x86_opr_i64x8, "/i64x8" }, + { x86_opr_i64x4, "/i64x4" }, + { x86_opr_i64x2, "/i64x2" }, + { x86_opr_i64x1, "/i64x1" }, + { x86_opr_i32x16, "/i32x16" }, + { x86_opr_i32x8, "/i32x8" }, + { x86_opr_i32x4, "/i32x4" }, + { x86_opr_i32x2, "/i32x2" }, + { x86_opr_i32x1, "/i32x1" }, + { x86_opr_i16x32, "/i16x32" }, + { x86_opr_i16x16, "/i16x16" }, + { x86_opr_i16x8, "/i16x8" }, + { x86_opr_i16x4, "/i16x4" }, + { x86_opr_i16x2, "/i16x2" }, + { x86_opr_i16x1, "/i16x1" }, + { x86_opr_i8x64, "/i8x64" }, + { x86_opr_i8x32, "/i8x32" }, + { x86_opr_i8x16, "/i8x16" }, + { x86_opr_i8x8, "/i8x8" }, + { x86_opr_i8x4, "/i8x4" }, + { x86_opr_i8x2, "/i8x2" }, + { x86_opr_i8x1, "/i8x1" }, + { 0, NULL }, +}; +
const
+x86_map_str x86_enc_names[] = +{ + { x86_enc_r_norexb, " .norexb" }, + { x86_enc_r_lock, " .lock" }, + { x86_enc_r_rep, " .rep" }, + { x86_enc_s_a64, " .a64" }, + { x86_enc_s_a32, " .a32" }, + { x86_enc_s_a16, " .a16" }, + { x86_enc_s_o64, " .o64" }, + { x86_enc_s_o32, " .o32" }, + { x86_enc_s_o16, " .o16" }, + { x86_enc_j_i16, " i16" }, + { x86_enc_j_ib, " ib" }, + { x86_enc_i_i64, " i64" }, + { x86_enc_i_i32, " i32" }, + { x86_enc_i_i16, " i16" }, + { x86_enc_i_iwd, " iwd" }, + { x86_enc_i_iw, " iw" }, + { x86_enc_i_ib, " ib" }, + { x86_enc_f_opcode_r, "" }, + { x86_enc_f_opcode, "" }, + { x86_enc_f_modrm_n, "" }, + { x86_enc_f_modrm_r, "" }, + { x86_enc_o_opcode_r, "" }, + { x86_enc_t_evex, ".evex" }, + { x86_enc_t_vex, ".vex" }, + { x86_enc_t_lex, ".lex" }, + { x86_enc_l_lig, ".lig" }, + { x86_enc_l_512, ".512" }, + { x86_enc_l_256, ".256" }, + { x86_enc_l_128, ".128" }, + { x86_enc_l_l1, ".l1" }, + { x86_enc_l_l0, ".l0" }, + { x86_enc_l_lz, ".lz" }, + { x86_enc_p_rexw, ".w" }, + { x86_enc_p_9b, ".9b" }, + { x86_enc_p_f2, ".f2" }, + { x86_enc_p_f3, ".f3" }, + { x86_enc_p_66, ".66" }, + { x86_enc_m_map6, ".map6" }, + { x86_enc_m_map5, ".map5" }, + { x86_enc_m_map4, ".map4" }, + { x86_enc_m_0f3a, ".0f3a" }, + { x86_enc_m_0f38, ".0f38" }, + { x86_enc_m_0f, ".0f" }, + { x86_enc_w_wig, ".wig" }, + { x86_enc_w_ww, ".ww" }, + { x86_enc_w_wx, ".wx" }, + { x86_enc_w_wn, ".wn" }, + { x86_enc_w_wb, ".wb" }, + { x86_enc_w_w1, ".w1" }, + { x86_enc_w_w0, ".w0" }, + { 0, NULL }, +};
+ +static void x86_print_row(size_t count, x86_table_col *cols) +{ + printf("|"); + for (size_t i = 0; i < count; i++) { + printf(" %-*s |", cols[i].width, cols[i].data); + g_free(cols[i].data); + } + printf("\n");
Shouldn't we use info->fprintf_func() in disas/ ?
+}
const
+x86_opr_formats x86_opr_formats_intel_hex = +{ + .ptr_rip = "%s[rip]", + .ptr_rip_disp = "%s[rip %s 0x%x]", + .ptr_reg = "%s[%s]", + .ptr_reg_disp = "%s[%s %s 0x%x]", + .ptr_reg_sreg = "%s[%s + %d*%s]", + .ptr_reg_sreg_disp = "%s[%s + %d*%s %s 0x%x]", + .ptr_reg_reg = "%s[%s + %s]", + .ptr_reg_reg_disp = "%s[%s + %s %s 0x%x]", + .ptr_sreg = "%s[%d*%s]", + .ptr_disp = "%s[%s0x%x]", + .ptr_imm64 = "%s[%s0x%llx]", + .ptr_imm32 = "%s[%s0x%x]", + .imm64 = "%s0x%llx", + .imm32 = "%s0x%x", + .reg = "%s", +}; +
const
+x86_opr_formats x86_opr_formats_intel_dec = +{ + .ptr_rip = "%s[rip]", + .ptr_rip_disp = "%s[rip %s %u]", + .ptr_reg = "%s[%s]", + .ptr_reg_disp = "%s[%s %s %u]", + .ptr_reg_sreg = "%s[%s + %d*%s]", + .ptr_reg_sreg_disp = "%s[%s + %d*%s %s %u]", + .ptr_reg_reg = "%s[%s + %s]", + .ptr_reg_reg_disp = "%s[%s + %s %s %u]", + .ptr_sreg = "%s[%d*%s]", + .ptr_disp = "%s[%s%u]", + .ptr_imm64 = "%s[%s%llu]", + .ptr_imm32 = "%s[%s%u]", + .imm64 = "%s%llu", + .imm32 = "%s%u", + .reg = "%s", +}; +
const
+x86_opr_formatter x86_format_intel_hex = +{ + .fmt_const = &x86_opr_intel_const_str, + .fmt_imm = &x86_opr_intel_imm_hex_str, + .fmt_reg = &x86_opr_intel_reg_str, + .fmt_mrm = &x86_opr_intel_mrm_hex_str, + .fmt_vec = &x86_opr_intel_vec_str, + .fmt_opb = &x86_opr_intel_opb_str, + .fmt_is4 = &x86_opr_intel_is4_str, + .fmt_ime = &x86_opr_intel_ime_hex_str, + .fmt_rel = &x86_opr_intel_rel_hex_str +}; +
const
+x86_opr_formatter x86_format_intel_dec = +{ + .fmt_const = &x86_opr_intel_const_str, + .fmt_imm = &x86_opr_intel_imm_dec_str, + .fmt_reg = &x86_opr_intel_reg_str, + .fmt_mrm = &x86_opr_intel_mrm_dec_str, + .fmt_vec = &x86_opr_intel_vec_str, + .fmt_opb = &x86_opr_intel_opb_str, + .fmt_is4 = &x86_opr_intel_is4_str, + .fmt_ime = &x86_opr_intel_ime_dec_str, + .fmt_rel = &x86_opr_intel_rel_dec_str +};