Module Name: src Committed By: maxv Date: Thu Feb 7 10:58:45 UTC 2019
Modified Files: src/lib/libnvmm: libnvmm_x86.c src/tests/lib/libnvmm: h_mem_assist.c h_mem_assist_asm.S Log Message: Improvements: - Emulate the instructions by executing them directly on the host CPU. This is easier and probably faster than doing it in software manually. - Decode SUB from Primary, CMP from Group1, TEST from Group3, and add associated tests. - Handle correctly the cases where an instruction that always implicitly reads the register operand is executed with the mem operand as source (eg: "orq (%rbx),%rax"). - Fix the MMU handling of 32bit-PAE. Under PAE CR3 is not page-aligned, so there are extra bits that are valid. With these changes in place I can boot Windows XP on Qemu+NVMM. To generate a diff of this commit: cvs rdiff -u -r1.18 -r1.19 src/lib/libnvmm/libnvmm_x86.c cvs rdiff -u -r1.4 -r1.5 src/tests/lib/libnvmm/h_mem_assist.c \ src/tests/lib/libnvmm/h_mem_assist_asm.S Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/lib/libnvmm/libnvmm_x86.c diff -u src/lib/libnvmm/libnvmm_x86.c:1.18 src/lib/libnvmm/libnvmm_x86.c:1.19 --- src/lib/libnvmm/libnvmm_x86.c:1.18 Fri Feb 1 06:49:58 2019 +++ src/lib/libnvmm/libnvmm_x86.c Thu Feb 7 10:58:45 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: libnvmm_x86.c,v 1.18 2019/02/01 06:49:58 maxv Exp $ */ +/* $NetBSD: libnvmm_x86.c,v 1.19 2019/02/07 10:58:45 maxv Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -111,6 +111,8 @@ nvmm_vcpu_dump(struct nvmm_machine *mach #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT) #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT) +#define CR3_FRAME_32BIT PG_FRAME + typedef uint32_t pte_32bit_t; static int @@ -125,7 +127,7 @@ x86_gva_to_gpa_32bit(struct nvmm_machine *prot = NVMM_PROT_ALL; /* Parse L2. */ - L2gpa = (cr3 & PG_FRAME); + L2gpa = (cr3 & CR3_FRAME_32BIT); if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1) return -1; pdir = (pte_32bit_t *)L2hva; @@ -181,6 +183,8 @@ x86_gva_to_gpa_32bit(struct nvmm_machine #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT) #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT) +#define CR3_FRAME_32BIT_PAE __BITS(31, 5) + typedef uint64_t pte_32bit_pae_t; static int @@ -195,7 +199,7 @@ x86_gva_to_gpa_32bit_pae(struct nvmm_mac *prot = NVMM_PROT_ALL; /* Parse L3. */ - L3gpa = (cr3 & PG_FRAME); + L3gpa = (cr3 & CR3_FRAME_32BIT_PAE); if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1) return -1; pdir = (pte_32bit_pae_t *)L3hva; @@ -272,6 +276,8 @@ x86_gva_to_gpa_32bit_pae(struct nvmm_mac #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT) #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT) +#define CR3_FRAME_64BIT PG_FRAME + typedef uint64_t pte_64bit_t; static inline bool @@ -297,7 +303,7 @@ x86_gva_to_gpa_64bit(struct nvmm_machine return -1; /* Parse L4. */ - L4gpa = (cr3 & PG_FRAME); + L4gpa = (cr3 & CR3_FRAME_64BIT); if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1) return -1; pdir = (pte_64bit_t *)L4hva; @@ -820,13 +826,68 @@ out: /* -------------------------------------------------------------------------- */ -static void x86_emul_or(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *); -static void x86_emul_and(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *); -static void x86_emul_xor(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *); -static void x86_emul_mov(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *); -static void x86_emul_stos(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *); -static void x86_emul_lods(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *); -static void x86_emul_movs(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *); +struct x86_emul { + bool read; + bool notouch; + void (*func)(struct nvmm_mem *, uint64_t *); +}; + +static void x86_func_or(struct nvmm_mem *, uint64_t *); +static void x86_func_and(struct nvmm_mem *, uint64_t *); +static void x86_func_sub(struct nvmm_mem *, uint64_t *); +static void x86_func_xor(struct nvmm_mem *, uint64_t *); +static void x86_func_cmp(struct nvmm_mem *, uint64_t *); +static void x86_func_test(struct nvmm_mem *, uint64_t *); +static void x86_func_mov(struct nvmm_mem *, uint64_t *); +static void x86_func_stos(struct nvmm_mem *, uint64_t *); +static void x86_func_lods(struct nvmm_mem *, uint64_t *); +static void x86_func_movs(struct nvmm_mem *, uint64_t *); + +static const struct x86_emul x86_emul_or = { + .read = true, + .func = x86_func_or +}; + +static const struct x86_emul x86_emul_and = { + .read = true, + .func = x86_func_and +}; + +static const struct x86_emul x86_emul_sub = { + .read = true, + .func = x86_func_sub +}; + +static const struct x86_emul x86_emul_xor = { + .read = true, + .func = x86_func_xor +}; + +static const struct x86_emul x86_emul_cmp = { + .notouch = true, + .func = x86_func_cmp +}; + +static const struct x86_emul x86_emul_test = { + .notouch = true, + .func = x86_func_test +}; + +static const struct x86_emul x86_emul_mov = { + .func = x86_func_mov +}; + +static const struct x86_emul x86_emul_stos = { + .func = x86_func_stos +}; + +static const struct x86_emul x86_emul_lods = { + .func = x86_func_lods +}; + +static const struct x86_emul x86_emul_movs = { + .func = x86_func_movs +}; /* Legacy prefixes. */ #define LEG_LOCK 0xF0 @@ -954,10 +1015,9 @@ struct x86_instr { struct x86_store src; struct x86_store dst; - struct x86_store *strm; - void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *); + const struct x86_emul *emul; }; struct x86_decode_fsm { @@ -985,14 +1045,15 @@ struct x86_opcode { int defsize; int allsize; bool group1; + bool group3; bool group11; bool immediate; int flags; - void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *); + const struct x86_emul *emul; }; struct x86_group_entry { - void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *); + const struct x86_emul *emul; }; #define OPSIZE_BYTE 0x01 @@ -1005,13 +1066,19 @@ struct x86_group_entry { #define FLAG_ze 0x04 static const struct x86_group_entry group1[8] = { - [1] = { .emul = x86_emul_or }, - [4] = { .emul = x86_emul_and }, - [6] = { .emul = x86_emul_xor } + [1] = { .emul = &x86_emul_or }, + [4] = { .emul = &x86_emul_and }, + [6] = { .emul = &x86_emul_xor }, + [7] = { .emul = &x86_emul_cmp } +}; + +static const struct x86_group_entry group3[8] = { + [0] = { .emul = &x86_emul_test }, + [1] = { .emul = &x86_emul_test } }; static const struct x86_group_entry group11[8] = { - [0] = { .emul = x86_emul_mov } + [0] = { .emul = &x86_emul_mov } }; static const struct x86_opcode primary_opcode_table[] = { @@ -1019,6 +1086,18 @@ static const struct x86_opcode primary_o * Group1 */ { + /* Eb, Ib */ + .byte = 0x80, + .regmodrm = true, + .regtorm = true, + .szoverride = false, + .defsize = OPSIZE_BYTE, + .allsize = -1, + .group1 = true, + .immediate = true, + .emul = NULL /* group1 */ + }, + { /* Ev, Iz */ .byte = 0x81, .regmodrm = true, @@ -1046,6 +1125,35 @@ static const struct x86_opcode primary_o }, /* + * Group3 + */ + { + /* Eb, Ib */ + .byte = 0xF6, + .regmodrm = true, + .regtorm = true, + .szoverride = false, + .defsize = OPSIZE_BYTE, + .allsize = -1, + .group3 = true, + .immediate = true, + .emul = NULL /* group3 */ + }, + { + /* Ev, Iz */ + .byte = 0xF7, + .regmodrm = true, + .regtorm = true, + .szoverride = true, + .defsize = -1, + .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, + .group3 = true, + .immediate = true, + .flags = FLAG_immz, + .emul = NULL /* group3 */ + }, + + /* * Group11 */ { @@ -1085,7 +1193,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_or + .emul = &x86_emul_or }, { /* Ev, Gv */ @@ -1095,7 +1203,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_or + .emul = &x86_emul_or }, { /* Gb, Eb */ @@ -1105,7 +1213,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_or + .emul = &x86_emul_or }, { /* Gv, Ev */ @@ -1115,7 +1223,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_or + .emul = &x86_emul_or }, /* @@ -1129,7 +1237,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_and + .emul = &x86_emul_and }, { /* Ev, Gv */ @@ -1139,7 +1247,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_and + .emul = &x86_emul_and }, { /* Gb, Eb */ @@ -1149,7 +1257,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_and + .emul = &x86_emul_and }, { /* Gv, Ev */ @@ -1159,7 +1267,51 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_and + .emul = &x86_emul_and + }, + + /* + * SUB + */ + { + /* Eb, Gb */ + .byte = 0x28, + .regmodrm = true, + .regtorm = true, + .szoverride = false, + .defsize = OPSIZE_BYTE, + .allsize = -1, + .emul = &x86_emul_sub + }, + { + /* Ev, Gv */ + .byte = 0x29, + .regmodrm = true, + .regtorm = true, + .szoverride = true, + .defsize = -1, + .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, + .emul = &x86_emul_sub + }, + { + /* Gb, Eb */ + .byte = 0x2A, + .regmodrm = true, + .regtorm = false, + .szoverride = false, + .defsize = OPSIZE_BYTE, + .allsize = -1, + .emul = &x86_emul_sub + }, + { + /* Gv, Ev */ + .byte = 0x2B, + .regmodrm = true, + .regtorm = false, + .szoverride = true, + .defsize = -1, + .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, + .emul = &x86_emul_sub }, /* @@ -1173,7 +1325,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_xor + .emul = &x86_emul_xor }, { /* Ev, Gv */ @@ -1183,7 +1335,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_xor + .emul = &x86_emul_xor }, { /* Gb, Eb */ @@ -1193,7 +1345,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_xor + .emul = &x86_emul_xor }, { /* Gv, Ev */ @@ -1203,7 +1355,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_xor + .emul = &x86_emul_xor }, /* @@ -1217,7 +1369,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_mov + .emul = &x86_emul_mov }, { /* Ev, Gv */ @@ -1227,7 +1379,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_mov + .emul = &x86_emul_mov }, { /* Gb, Eb */ @@ -1237,7 +1389,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_mov + .emul = &x86_emul_mov }, { /* Gv, Ev */ @@ -1247,7 +1399,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_mov + .emul = &x86_emul_mov }, { /* AL, Ob */ @@ -1257,7 +1409,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_mov + .emul = &x86_emul_mov }, { /* rAX, Ov */ @@ -1267,7 +1419,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_mov + .emul = &x86_emul_mov }, { /* Ob, AL */ @@ -1277,7 +1429,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_mov + .emul = &x86_emul_mov }, { /* Ov, rAX */ @@ -1287,7 +1439,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_mov + .emul = &x86_emul_mov }, /* @@ -1300,7 +1452,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_movs + .emul = &x86_emul_movs }, { /* Yv, Xv */ @@ -1309,7 +1461,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_movs + .emul = &x86_emul_movs }, /* @@ -1322,7 +1474,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_stos + .emul = &x86_emul_stos }, { /* Yv, rAX */ @@ -1331,7 +1483,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_stos + .emul = &x86_emul_stos }, /* @@ -1344,7 +1496,7 @@ static const struct x86_opcode primary_o .szoverride = false, .defsize = OPSIZE_BYTE, .allsize = -1, - .emul = x86_emul_lods + .emul = &x86_emul_lods }, { /* rAX, Xv */ @@ -1353,7 +1505,7 @@ static const struct x86_opcode primary_o .szoverride = true, .defsize = -1, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, - .emul = x86_emul_lods + .emul = &x86_emul_lods }, }; @@ -1370,7 +1522,7 @@ static const struct x86_opcode secondary .defsize = OPSIZE_BYTE, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, .flags = FLAG_ze, - .emul = x86_emul_mov + .emul = &x86_emul_mov }, { /* Gv, Ew */ @@ -1381,7 +1533,7 @@ static const struct x86_opcode secondary .defsize = OPSIZE_WORD, .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD, .flags = FLAG_ze, - .emul = x86_emul_mov + .emul = &x86_emul_mov }, }; @@ -2064,6 +2216,11 @@ node_regmodrm(struct x86_decode_fsm *fsm return -1; } instr->emul = group1[instr->regmodrm.reg].emul; + } else if (opcode->group3) { + if (group3[instr->regmodrm.reg].emul == NULL) { + return -1; + } + instr->emul = group3[instr->regmodrm.reg].emul; } else if (opcode->group11) { if (group11[instr->regmodrm.reg].emul == NULL) { return -1; @@ -2425,150 +2582,270 @@ x86_decode(uint8_t *inst_bytes, size_t i /* -------------------------------------------------------------------------- */ -static inline uint8_t -compute_parity(uint8_t *data) -{ - uint64_t *ptr = (uint64_t *)data; - uint64_t val = *ptr; +#define EXEC_INSTR(sz, instr) \ +static uint##sz##_t \ +exec_##instr####sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags)\ +{ \ + uint##sz##_t res; \ + __asm __volatile ( \ + #instr " %2, %3;" \ + "mov %3, %1;" \ + "pushfq;" \ + "popq %0" \ + : "=r" (*rflags), "=r" (res) \ + : "r" (op1), "r" (op2)); \ + return res; \ +} + +#define EXEC_DISPATCHER(instr) \ +static uint64_t \ +exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \ +{ \ + switch (opsize) { \ + case 1: \ + return exec_##instr##8(op1, op2, rflags); \ + case 2: \ + return exec_##instr##16(op1, op2, rflags); \ + case 4: \ + return exec_##instr##32(op1, op2, rflags); \ + default: \ + return exec_##instr##64(op1, op2, rflags); \ + } \ +} + +/* SUB: ret = op1 - op2 */ +#define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF) +EXEC_INSTR(8, sub) +EXEC_INSTR(16, sub) +EXEC_INSTR(32, sub) +EXEC_INSTR(64, sub) +EXEC_DISPATCHER(sub) + +/* OR: ret = op1 | op2 */ +#define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF) +EXEC_INSTR(8, or) +EXEC_INSTR(16, or) +EXEC_INSTR(32, or) +EXEC_INSTR(64, or) +EXEC_DISPATCHER(or) + +/* AND: ret = op1 & op2 */ +#define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF) +EXEC_INSTR(8, and) +EXEC_INSTR(16, and) +EXEC_INSTR(32, and) +EXEC_INSTR(64, and) +EXEC_DISPATCHER(and) + +/* XOR: ret = op1 ^ op2 */ +#define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF) +EXEC_INSTR(8, xor) +EXEC_INSTR(16, xor) +EXEC_INSTR(32, xor) +EXEC_INSTR(64, xor) +EXEC_DISPATCHER(xor) - val ^= val >> 32; - val ^= val >> 16; - val ^= val >> 8; - val ^= val >> 4; - val ^= val >> 2; - val ^= val >> 1; - return (~val) & 1; -} +/* -------------------------------------------------------------------------- */ + +/* + * Emulation functions. We don't care about the order of the operands, except + * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who + * is op1 and who is op2. + */ static void -x86_emul_or(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *), - uint64_t *gprs) +x86_func_or(struct nvmm_mem *mem, uint64_t *gprs) { + uint64_t *retval = (uint64_t *)mem->data; const bool write = mem->write; - uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS]; - uint8_t data[8]; - size_t i; - - fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF); + uint64_t *op1, op2, fl, ret; - memcpy(data, mem->data, sizeof(data)); + op1 = (uint64_t *)mem->data; + op2 = 0; - /* Fetch the value to be OR'ed. */ + /* Fetch the value to be OR'ed (op2). */ + mem->data = (uint8_t *)&op2; mem->write = false; - (*cb)(mem); + (*__callbacks.mem)(mem); /* Perform the OR. */ - for (i = 0; i < mem->size; i++) { - mem->data[i] |= data[i]; - if (mem->data[i] != 0) - fl |= PSL_Z; - } - if (mem->data[mem->size-1] & __BIT(7)) - fl |= PSL_N; - if (compute_parity(mem->data)) - fl |= PSL_PF; + ret = exec_or(*op1, op2, &fl, mem->size); if (write) { /* Write back the result. */ + mem->data = (uint8_t *)&ret; mem->write = true; - (*cb)(mem); + (*__callbacks.mem)(mem); + } else { + /* Return data to the caller. */ + *retval = ret; } - gprs[NVMM_X64_GPR_RFLAGS] = fl; + gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK; + gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK); } static void -x86_emul_and(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *), - uint64_t *gprs) +x86_func_and(struct nvmm_mem *mem, uint64_t *gprs) { + uint64_t *retval = (uint64_t *)mem->data; const bool write = mem->write; - uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS]; - uint8_t data[8]; - size_t i; - - fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF); + uint64_t *op1, op2, fl, ret; - memcpy(data, mem->data, sizeof(data)); + op1 = (uint64_t *)mem->data; + op2 = 0; - /* Fetch the value to be AND'ed. */ + /* Fetch the value to be AND'ed (op2). */ + mem->data = (uint8_t *)&op2; mem->write = false; - (*cb)(mem); + (*__callbacks.mem)(mem); /* Perform the AND. */ - for (i = 0; i < mem->size; i++) { - mem->data[i] &= data[i]; - if (mem->data[i] != 0) - fl |= PSL_Z; - } - if (mem->data[mem->size-1] & __BIT(7)) - fl |= PSL_N; - if (compute_parity(mem->data)) - fl |= PSL_PF; + ret = exec_and(*op1, op2, &fl, mem->size); if (write) { /* Write back the result. */ + mem->data = (uint8_t *)&ret; mem->write = true; - (*cb)(mem); + (*__callbacks.mem)(mem); + } else { + /* Return data to the caller. */ + *retval = ret; } - gprs[NVMM_X64_GPR_RFLAGS] = fl; + gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK; + gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK); } static void -x86_emul_xor(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *), - uint64_t *gprs) +x86_func_sub(struct nvmm_mem *mem, uint64_t *gprs) { + uint64_t *retval = (uint64_t *)mem->data; const bool write = mem->write; - uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS]; - uint8_t data[8]; - size_t i; + uint64_t *op1, *op2, fl, ret; + uint64_t tmp; + bool memop1; + + memop1 = !mem->write; + op1 = memop1 ? &tmp : (uint64_t *)mem->data; + op2 = memop1 ? (uint64_t *)mem->data : &tmp; + + /* Fetch the value to be SUB'ed (op1 or op2). */ + mem->data = (uint8_t *)&tmp; + mem->write = false; + (*__callbacks.mem)(mem); - fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF); + /* Perform the SUB. */ + ret = exec_sub(*op1, *op2, &fl, mem->size); + + if (write) { + /* Write back the result. */ + mem->data = (uint8_t *)&ret; + mem->write = true; + (*__callbacks.mem)(mem); + } else { + /* Return data to the caller. */ + *retval = ret; + } - memcpy(data, mem->data, sizeof(data)); + gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK; + gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK); +} - /* Fetch the value to be XOR'ed. */ +static void +x86_func_xor(struct nvmm_mem *mem, uint64_t *gprs) +{ + uint64_t *retval = (uint64_t *)mem->data; + const bool write = mem->write; + uint64_t *op1, op2, fl, ret; + + op1 = (uint64_t *)mem->data; + op2 = 0; + + /* Fetch the value to be XOR'ed (op2). */ + mem->data = (uint8_t *)&op2; mem->write = false; - (*cb)(mem); + (*__callbacks.mem)(mem); /* Perform the XOR. */ - for (i = 0; i < mem->size; i++) { - mem->data[i] ^= data[i]; - if (mem->data[i] != 0) - fl |= PSL_Z; - } - if (mem->data[mem->size-1] & __BIT(7)) - fl |= PSL_N; - if (compute_parity(mem->data)) - fl |= PSL_PF; + ret = exec_xor(*op1, op2, &fl, mem->size); if (write) { /* Write back the result. */ + mem->data = (uint8_t *)&ret; mem->write = true; - (*cb)(mem); + (*__callbacks.mem)(mem); + } else { + /* Return data to the caller. */ + *retval = ret; } - gprs[NVMM_X64_GPR_RFLAGS] = fl; + gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK; + gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK); } static void -x86_emul_mov(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *), - uint64_t *gprs) +x86_func_cmp(struct nvmm_mem *mem, uint64_t *gprs) +{ + uint64_t *op1, *op2, fl; + uint64_t tmp; + bool memop1; + + memop1 = !mem->write; + op1 = memop1 ? &tmp : (uint64_t *)mem->data; + op2 = memop1 ? (uint64_t *)mem->data : &tmp; + + /* Fetch the value to be CMP'ed (op1 or op2). */ + mem->data = (uint8_t *)&tmp; + mem->write = false; + (*__callbacks.mem)(mem); + + /* Perform the CMP. */ + exec_sub(*op1, *op2, &fl, mem->size); + + gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK; + gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK); +} + +static void +x86_func_test(struct nvmm_mem *mem, uint64_t *gprs) +{ + uint64_t *op1, *op2, fl; + uint64_t tmp; + bool memop1; + + memop1 = !mem->write; + op1 = memop1 ? &tmp : (uint64_t *)mem->data; + op2 = memop1 ? (uint64_t *)mem->data : &tmp; + + /* Fetch the value to be TEST'ed (op1 or op2). */ + mem->data = (uint8_t *)&tmp; + mem->write = false; + (*__callbacks.mem)(mem); + + /* Perform the TEST. */ + exec_and(*op1, *op2, &fl, mem->size); + + gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK; + gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK); +} + +static void +x86_func_mov(struct nvmm_mem *mem, uint64_t *gprs) { /* * Nothing special, just move without emulation. */ - (*cb)(mem); + (*__callbacks.mem)(mem); } static void -x86_emul_stos(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *), - uint64_t *gprs) +x86_func_stos(struct nvmm_mem *mem, uint64_t *gprs) { /* * Just move, and update RDI. */ - (*cb)(mem); + (*__callbacks.mem)(mem); if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) { gprs[NVMM_X64_GPR_RDI] -= mem->size; @@ -2578,13 +2855,12 @@ x86_emul_stos(struct nvmm_mem *mem, void } static void -x86_emul_lods(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *), - uint64_t *gprs) +x86_func_lods(struct nvmm_mem *mem, uint64_t *gprs) { /* * Just move, and update RSI. */ - (*cb)(mem); + (*__callbacks.mem)(mem); if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) { gprs[NVMM_X64_GPR_RSI] -= mem->size; @@ -2594,8 +2870,7 @@ x86_emul_lods(struct nvmm_mem *mem, void } static void -x86_emul_movs(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *), - uint64_t *gprs) +x86_func_movs(struct nvmm_mem *mem, uint64_t *gprs) { /* * Special instruction: double memory operand. Don't call the cb, @@ -2795,7 +3070,7 @@ assist_mem_double(struct nvmm_machine *m return -1; mem.size = size; - (*instr->emul)(&mem, NULL, state->gprs); + (*instr->emul->func)(&mem, state->gprs); return 0; } @@ -2860,15 +3135,25 @@ assist_mem_single(struct nvmm_machine *m default: DISASSEMBLER_BUG(); } + } else if (instr->emul->read) { + if (instr->dst.type != STORE_REG) { + DISASSEMBLER_BUG(); + } + if (instr->dst.disp.type != DISP_NONE) { + DISASSEMBLER_BUG(); + } + val = state->gprs[instr->dst.u.reg->num]; + val = __SHIFTOUT(val, instr->dst.u.reg->mask); + memcpy(mem.data, &val, mem.size); } - (*instr->emul)(&mem, __callbacks.mem, state->gprs); + (*instr->emul->func)(&mem, state->gprs); - if (!mem.write) { + if (!instr->emul->notouch && !mem.write) { if (instr->dst.type != STORE_REG) { DISASSEMBLER_BUG(); } - memcpy(&val, mem.data, sizeof(uint64_t)); + memcpy(&val, membuf, sizeof(uint64_t)); val = __SHIFTIN(val, instr->dst.u.reg->mask); state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask; state->gprs[instr->dst.u.reg->num] |= val; Index: src/tests/lib/libnvmm/h_mem_assist.c diff -u src/tests/lib/libnvmm/h_mem_assist.c:1.4 src/tests/lib/libnvmm/h_mem_assist.c:1.5 --- src/tests/lib/libnvmm/h_mem_assist.c:1.4 Fri Feb 1 06:49:58 2019 +++ src/tests/lib/libnvmm/h_mem_assist.c Thu Feb 7 10:58:45 2019 @@ -292,10 +292,13 @@ extern uint8_t test8_begin, test8_end; extern uint8_t test9_begin, test9_end; extern uint8_t test10_begin, test10_end; extern uint8_t test11_begin, test11_end; +extern uint8_t test12_begin, test12_end; +extern uint8_t test13_begin, test13_end; +extern uint8_t test14_begin, test14_end; static const struct test tests[] = { { "test1 - MOV", &test1_begin, &test1_end, 0x3004 }, - { "test2 - OR", &test2_begin, &test2_end, 0x14FF }, + { "test2 - OR", &test2_begin, &test2_end, 0x16FF }, { "test3 - AND", &test3_begin, &test3_end, 0x1FC0 }, { "test4 - XOR", &test4_begin, &test4_end, 0x10CF }, { "test5 - Address Sizes", &test5_begin, &test5_end, 0x1F00 }, @@ -305,6 +308,9 @@ static const struct test tests[] = { { "test9 - MOVS", &test9_begin, &test9_end, 0x12345678 }, { "test10 - MOVZXB", &test10_begin, &test10_end, 0x00000078 }, { "test11 - MOVZXW", &test11_begin, &test11_end, 0x00005678 }, + { "test12 - CMP", &test12_begin, &test12_end, 0x00000001 }, + { "test13 - SUB", &test13_begin, &test13_end, 0x0000000F0000A0FF }, + { "test14 - TEST", &test14_begin, &test14_end, 0x00000001 }, { NULL, NULL, NULL, -1 } }; Index: src/tests/lib/libnvmm/h_mem_assist_asm.S diff -u src/tests/lib/libnvmm/h_mem_assist_asm.S:1.4 src/tests/lib/libnvmm/h_mem_assist_asm.S:1.5 --- src/tests/lib/libnvmm/h_mem_assist_asm.S:1.4 Wed Feb 6 15:42:31 2019 +++ src/tests/lib/libnvmm/h_mem_assist_asm.S Thu Feb 7 10:58:45 2019 @@ -38,6 +38,9 @@ .globl test9_begin, test9_end .globl test10_begin, test10_end .globl test11_begin, test11_end + .globl test12_begin, test12_end + .globl test13_begin, test13_end + .globl test14_begin, test14_end .text .code64 @@ -74,6 +77,10 @@ test2_begin: movq $0x0400,%rcx orw %cx,(%rax) + movq $0x0200,%rcx + orq (%rax),%rcx + movq %rcx,(%rax) + TEST_END test2_end: @@ -202,3 +209,85 @@ test11_begin: TEST_END test11_end: + + .align 64 +test12_begin: + movq $0x1000,%rax + movq $0xFFFFFFFFF2345678,(%rax) + + cmpb $0x78,(%rax) + jne .L12_failure + cmpb $0x77,(%rax) + jl .L12_failure + cmpb $0x79,(%rax) + jg .L12_failure + + cmpw $0x5678,(%rax) + jne .L12_failure + cmpw $0x5677,(%rax) + jl .L12_failure + cmpw $0x5679,(%rax) + jg .L12_failure + + cmpl $0xF2345678,(%rax) + jne .L12_failure + cmpl $0xF2345677,(%rax) + jl .L12_failure + cmpl $0xF2345679,(%rax) + jg .L12_failure + + cmpq $0xFFFFFFFFF2345678,(%rax) + jne .L12_failure + cmpq $0xFFFFFFFFF2345677,(%rax) + jl .L12_failure + cmpq $0xFFFFFFFFF2345679,(%rax) + jg .L12_failure + +.L12_success: + movq $1,(%rax) + TEST_END +.L12_failure: + movq $0,(%rax) + TEST_END +test12_end: + + .align 64 +test13_begin: + movq $0x1000,%rax + movq $0x000000001000A0FF,(%rax) + + movq $0xFFFF,%rcx + subb %cl,(%rax) + + movq $0xA000,%rcx + subw %cx,(%rax) + + movq $0x0000000F1000A0FF,%rcx + subq (%rax),%rcx + + movq %rcx,(%rax) + + TEST_END +test13_end: + + .align 64 +test14_begin: + movq $0x1000,%rax + movq $0xA0FF,(%rax) + + testb $0x0F,(%rax) + jz .L14_failure + + testw $0x0F00,(%rax) + jnz .L14_failure + + testl $0xA000,(%rax) + jz .L14_failure + +.L14_success: + movq $1,(%rax) + TEST_END +.L14_failure: + movq $0,(%rax) + TEST_END +test14_end: