Module Name:    src
Committed By:   maxv
Date:           Thu Feb  7 10:58:45 UTC 2019

Modified Files:
        src/lib/libnvmm: libnvmm_x86.c
        src/tests/lib/libnvmm: h_mem_assist.c h_mem_assist_asm.S

Log Message:
Improvements:

 - Emulate the instructions by executing them directly on the host CPU.
   This is easier and probably faster than doing it in software
   manually.

 - Decode SUB from Primary, CMP from Group1, TEST from Group3, and add
   associated tests.

 - Handle correctly the cases where an instruction that always implicitly
   reads the register operand is executed with the mem operand as source
   (eg: "orq (%rbx),%rax").

 - Fix the MMU handling of 32bit-PAE. Under PAE CR3 is not page-aligned,
   so there are extra bits that are valid.

With these changes in place I can boot Windows XP on Qemu+NVMM.


To generate a diff of this commit:
cvs rdiff -u -r1.18 -r1.19 src/lib/libnvmm/libnvmm_x86.c
cvs rdiff -u -r1.4 -r1.5 src/tests/lib/libnvmm/h_mem_assist.c \
    src/tests/lib/libnvmm/h_mem_assist_asm.S

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libnvmm/libnvmm_x86.c
diff -u src/lib/libnvmm/libnvmm_x86.c:1.18 src/lib/libnvmm/libnvmm_x86.c:1.19
--- src/lib/libnvmm/libnvmm_x86.c:1.18	Fri Feb  1 06:49:58 2019
+++ src/lib/libnvmm/libnvmm_x86.c	Thu Feb  7 10:58:45 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: libnvmm_x86.c,v 1.18 2019/02/01 06:49:58 maxv Exp $	*/
+/*	$NetBSD: libnvmm_x86.c,v 1.19 2019/02/07 10:58:45 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -111,6 +111,8 @@ nvmm_vcpu_dump(struct nvmm_machine *mach
 #define pte32_l1idx(va)	(((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
 #define pte32_l2idx(va)	(((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
 
+#define CR3_FRAME_32BIT	PG_FRAME
+
 typedef uint32_t pte_32bit_t;
 
 static int
@@ -125,7 +127,7 @@ x86_gva_to_gpa_32bit(struct nvmm_machine
 	*prot = NVMM_PROT_ALL;
 
 	/* Parse L2. */
-	L2gpa = (cr3 & PG_FRAME);
+	L2gpa = (cr3 & CR3_FRAME_32BIT);
 	if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
 		return -1;
 	pdir = (pte_32bit_t *)L2hva;
@@ -181,6 +183,8 @@ x86_gva_to_gpa_32bit(struct nvmm_machine
 #define pte32_pae_l2idx(va)	(((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
 #define pte32_pae_l3idx(va)	(((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
 
+#define CR3_FRAME_32BIT_PAE	__BITS(31, 5)
+
 typedef uint64_t pte_32bit_pae_t;
 
 static int
@@ -195,7 +199,7 @@ x86_gva_to_gpa_32bit_pae(struct nvmm_mac
 	*prot = NVMM_PROT_ALL;
 
 	/* Parse L3. */
-	L3gpa = (cr3 & PG_FRAME);
+	L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
 	if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
 		return -1;
 	pdir = (pte_32bit_pae_t *)L3hva;
@@ -272,6 +276,8 @@ x86_gva_to_gpa_32bit_pae(struct nvmm_mac
 #define pte64_l3idx(va)	(((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
 #define pte64_l4idx(va)	(((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
 
+#define CR3_FRAME_64BIT	PG_FRAME
+
 typedef uint64_t pte_64bit_t;
 
 static inline bool
@@ -297,7 +303,7 @@ x86_gva_to_gpa_64bit(struct nvmm_machine
 		return -1;
 
 	/* Parse L4. */
-	L4gpa = (cr3 & PG_FRAME);
+	L4gpa = (cr3 & CR3_FRAME_64BIT);
 	if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1)
 		return -1;
 	pdir = (pte_64bit_t *)L4hva;
@@ -820,13 +826,68 @@ out:
 
 /* -------------------------------------------------------------------------- */
 
-static void x86_emul_or(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
-static void x86_emul_and(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
-static void x86_emul_xor(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
-static void x86_emul_mov(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
-static void x86_emul_stos(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
-static void x86_emul_lods(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
-static void x86_emul_movs(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
+struct x86_emul {
+	bool read;
+	bool notouch;
+	void (*func)(struct nvmm_mem *, uint64_t *);
+};
+
+static void x86_func_or(struct nvmm_mem *, uint64_t *);
+static void x86_func_and(struct nvmm_mem *, uint64_t *);
+static void x86_func_sub(struct nvmm_mem *, uint64_t *);
+static void x86_func_xor(struct nvmm_mem *, uint64_t *);
+static void x86_func_cmp(struct nvmm_mem *, uint64_t *);
+static void x86_func_test(struct nvmm_mem *, uint64_t *);
+static void x86_func_mov(struct nvmm_mem *, uint64_t *);
+static void x86_func_stos(struct nvmm_mem *, uint64_t *);
+static void x86_func_lods(struct nvmm_mem *, uint64_t *);
+static void x86_func_movs(struct nvmm_mem *, uint64_t *);
+
+static const struct x86_emul x86_emul_or = {
+	.read = true,
+	.func = x86_func_or
+};
+
+static const struct x86_emul x86_emul_and = {
+	.read = true,
+	.func = x86_func_and
+};
+
+static const struct x86_emul x86_emul_sub = {
+	.read = true,
+	.func = x86_func_sub
+};
+
+static const struct x86_emul x86_emul_xor = {
+	.read = true,
+	.func = x86_func_xor
+};
+
+static const struct x86_emul x86_emul_cmp = {
+	.notouch = true,
+	.func = x86_func_cmp
+};
+
+static const struct x86_emul x86_emul_test = {
+	.notouch = true,
+	.func = x86_func_test
+};
+
+static const struct x86_emul x86_emul_mov = {
+	.func = x86_func_mov
+};
+
+static const struct x86_emul x86_emul_stos = {
+	.func = x86_func_stos
+};
+
+static const struct x86_emul x86_emul_lods = {
+	.func = x86_func_lods
+};
+
+static const struct x86_emul x86_emul_movs = {
+	.func = x86_func_movs
+};
 
 /* Legacy prefixes. */
 #define LEG_LOCK	0xF0
@@ -954,10 +1015,9 @@ struct x86_instr {
 
 	struct x86_store src;
 	struct x86_store dst;
-
 	struct x86_store *strm;
 
-	void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
+	const struct x86_emul *emul;
 };
 
 struct x86_decode_fsm {
@@ -985,14 +1045,15 @@ struct x86_opcode {
 	int defsize;
 	int allsize;
 	bool group1;
+	bool group3;
 	bool group11;
 	bool immediate;
 	int flags;
-	void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
+	const struct x86_emul *emul;
 };
 
 struct x86_group_entry {
-	void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
+	const struct x86_emul *emul;
 };
 
 #define OPSIZE_BYTE 0x01
@@ -1005,13 +1066,19 @@ struct x86_group_entry {
 #define FLAG_ze		0x04
 
 static const struct x86_group_entry group1[8] = {
-	[1] = { .emul = x86_emul_or },
-	[4] = { .emul = x86_emul_and },
-	[6] = { .emul = x86_emul_xor }
+	[1] = { .emul = &x86_emul_or },
+	[4] = { .emul = &x86_emul_and },
+	[6] = { .emul = &x86_emul_xor },
+	[7] = { .emul = &x86_emul_cmp }
+};
+
+static const struct x86_group_entry group3[8] = {
+	[0] = { .emul = &x86_emul_test },
+	[1] = { .emul = &x86_emul_test }
 };
 
 static const struct x86_group_entry group11[8] = {
-	[0] = { .emul = x86_emul_mov }
+	[0] = { .emul = &x86_emul_mov }
 };
 
 static const struct x86_opcode primary_opcode_table[] = {
@@ -1019,6 +1086,18 @@ static const struct x86_opcode primary_o
 	 * Group1
 	 */
 	{
+		/* Eb, Ib */
+		.byte = 0x80,
+		.regmodrm = true,
+		.regtorm = true,
+		.szoverride = false,
+		.defsize = OPSIZE_BYTE,
+		.allsize = -1,
+		.group1 = true,
+		.immediate = true,
+		.emul = NULL /* group1 */
+	},
+	{
 		/* Ev, Iz */
 		.byte = 0x81,
 		.regmodrm = true,
@@ -1046,6 +1125,35 @@ static const struct x86_opcode primary_o
 	},
 
 	/*
+	 * Group3
+	 */
+	{
+		/* Eb, Ib */
+		.byte = 0xF6,
+		.regmodrm = true,
+		.regtorm = true,
+		.szoverride = false,
+		.defsize = OPSIZE_BYTE,
+		.allsize = -1,
+		.group3 = true,
+		.immediate = true,
+		.emul = NULL /* group3 */
+	},
+	{
+		/* Ev, Iz */
+		.byte = 0xF7,
+		.regmodrm = true,
+		.regtorm = true,
+		.szoverride = true,
+		.defsize = -1,
+		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
+		.group3 = true,
+		.immediate = true,
+		.flags = FLAG_immz,
+		.emul = NULL /* group3 */
+	},
+
+	/*
 	 * Group11
 	 */
 	{
@@ -1085,7 +1193,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_or
+		.emul = &x86_emul_or
 	},
 	{
 		/* Ev, Gv */
@@ -1095,7 +1203,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_or
+		.emul = &x86_emul_or
 	},
 	{
 		/* Gb, Eb */
@@ -1105,7 +1213,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_or
+		.emul = &x86_emul_or
 	},
 	{
 		/* Gv, Ev */
@@ -1115,7 +1223,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_or
+		.emul = &x86_emul_or
 	},
 
 	/*
@@ -1129,7 +1237,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_and
+		.emul = &x86_emul_and
 	},
 	{
 		/* Ev, Gv */
@@ -1139,7 +1247,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_and
+		.emul = &x86_emul_and
 	},
 	{
 		/* Gb, Eb */
@@ -1149,7 +1257,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_and
+		.emul = &x86_emul_and
 	},
 	{
 		/* Gv, Ev */
@@ -1159,7 +1267,51 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_and
+		.emul = &x86_emul_and
+	},
+
+	/*
+	 * SUB
+	 */
+	{
+		/* Eb, Gb */
+		.byte = 0x28,
+		.regmodrm = true,
+		.regtorm = true,
+		.szoverride = false,
+		.defsize = OPSIZE_BYTE,
+		.allsize = -1,
+		.emul = &x86_emul_sub
+	},
+	{
+		/* Ev, Gv */
+		.byte = 0x29,
+		.regmodrm = true,
+		.regtorm = true,
+		.szoverride = true,
+		.defsize = -1,
+		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
+		.emul = &x86_emul_sub
+	},
+	{
+		/* Gb, Eb */
+		.byte = 0x2A,
+		.regmodrm = true,
+		.regtorm = false,
+		.szoverride = false,
+		.defsize = OPSIZE_BYTE,
+		.allsize = -1,
+		.emul = &x86_emul_sub
+	},
+	{
+		/* Gv, Ev */
+		.byte = 0x2B,
+		.regmodrm = true,
+		.regtorm = false,
+		.szoverride = true,
+		.defsize = -1,
+		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
+		.emul = &x86_emul_sub
 	},
 
 	/*
@@ -1173,7 +1325,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_xor
+		.emul = &x86_emul_xor
 	},
 	{
 		/* Ev, Gv */
@@ -1183,7 +1335,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_xor
+		.emul = &x86_emul_xor
 	},
 	{
 		/* Gb, Eb */
@@ -1193,7 +1345,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_xor
+		.emul = &x86_emul_xor
 	},
 	{
 		/* Gv, Ev */
@@ -1203,7 +1355,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_xor
+		.emul = &x86_emul_xor
 	},
 
 	/*
@@ -1217,7 +1369,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_mov
+		.emul = &x86_emul_mov
 	},
 	{
 		/* Ev, Gv */
@@ -1227,7 +1379,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_mov
+		.emul = &x86_emul_mov
 	},
 	{
 		/* Gb, Eb */
@@ -1237,7 +1389,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_mov
+		.emul = &x86_emul_mov
 	},
 	{
 		/* Gv, Ev */
@@ -1247,7 +1399,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_mov
+		.emul = &x86_emul_mov
 	},
 	{
 		/* AL, Ob */
@@ -1257,7 +1409,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_mov
+		.emul = &x86_emul_mov
 	},
 	{
 		/* rAX, Ov */
@@ -1267,7 +1419,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_mov
+		.emul = &x86_emul_mov
 	},
 	{
 		/* Ob, AL */
@@ -1277,7 +1429,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_mov
+		.emul = &x86_emul_mov
 	},
 	{
 		/* Ov, rAX */
@@ -1287,7 +1439,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_mov
+		.emul = &x86_emul_mov
 	},
 
 	/*
@@ -1300,7 +1452,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_movs
+		.emul = &x86_emul_movs
 	},
 	{
 		/* Yv, Xv */
@@ -1309,7 +1461,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_movs
+		.emul = &x86_emul_movs
 	},
 
 	/*
@@ -1322,7 +1474,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_stos
+		.emul = &x86_emul_stos
 	},
 	{
 		/* Yv, rAX */
@@ -1331,7 +1483,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_stos
+		.emul = &x86_emul_stos
 	},
 
 	/*
@@ -1344,7 +1496,7 @@ static const struct x86_opcode primary_o
 		.szoverride = false,
 		.defsize = OPSIZE_BYTE,
 		.allsize = -1,
-		.emul = x86_emul_lods
+		.emul = &x86_emul_lods
 	},
 	{
 		/* rAX, Xv */
@@ -1353,7 +1505,7 @@ static const struct x86_opcode primary_o
 		.szoverride = true,
 		.defsize = -1,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
-		.emul = x86_emul_lods
+		.emul = &x86_emul_lods
 	},
 };
 
@@ -1370,7 +1522,7 @@ static const struct x86_opcode secondary
 		.defsize = OPSIZE_BYTE,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.flags = FLAG_ze,
-		.emul = x86_emul_mov
+		.emul = &x86_emul_mov
 	},
 	{
 		/* Gv, Ew */
@@ -1381,7 +1533,7 @@ static const struct x86_opcode secondary
 		.defsize = OPSIZE_WORD,
 		.allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
 		.flags = FLAG_ze,
-		.emul = x86_emul_mov
+		.emul = &x86_emul_mov
 	},
 };
 
@@ -2064,6 +2216,11 @@ node_regmodrm(struct x86_decode_fsm *fsm
 			return -1;
 		}
 		instr->emul = group1[instr->regmodrm.reg].emul;
+	} else if (opcode->group3) {
+		if (group3[instr->regmodrm.reg].emul == NULL) {
+			return -1;
+		}
+		instr->emul = group3[instr->regmodrm.reg].emul;
 	} else if (opcode->group11) {
 		if (group11[instr->regmodrm.reg].emul == NULL) {
 			return -1;
@@ -2425,150 +2582,270 @@ x86_decode(uint8_t *inst_bytes, size_t i
 
 /* -------------------------------------------------------------------------- */
 
-static inline uint8_t
-compute_parity(uint8_t *data)
-{
-	uint64_t *ptr = (uint64_t *)data;
-	uint64_t val = *ptr;
+#define EXEC_INSTR(sz, instr)						\
+static uint##sz##_t							\
+exec_##instr####sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags)\
+{									\
+	uint##sz##_t res;						\
+	__asm __volatile (						\
+		#instr " %2, %3;"					\
+		"mov %3, %1;"						\
+		"pushfq;"						\
+		"popq %0"						\
+	    : "=r" (*rflags), "=r" (res)				\
+	    : "r" (op1), "r" (op2));					\
+	return res;							\
+}
+
+#define EXEC_DISPATCHER(instr)						\
+static uint64_t								\
+exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
+{									\
+	switch (opsize) {						\
+	case 1:								\
+		return exec_##instr##8(op1, op2, rflags);		\
+	case 2:								\
+		return exec_##instr##16(op1, op2, rflags);		\
+	case 4:								\
+		return exec_##instr##32(op1, op2, rflags);		\
+	default:							\
+		return exec_##instr##64(op1, op2, rflags);		\
+	}								\
+}
+
+/* SUB: ret = op1 - op2 */
+#define PSL_SUB_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
+EXEC_INSTR(8, sub)
+EXEC_INSTR(16, sub)
+EXEC_INSTR(32, sub)
+EXEC_INSTR(64, sub)
+EXEC_DISPATCHER(sub)
+
+/* OR:  ret = op1 | op2 */
+#define PSL_OR_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
+EXEC_INSTR(8, or)
+EXEC_INSTR(16, or)
+EXEC_INSTR(32, or)
+EXEC_INSTR(64, or)
+EXEC_DISPATCHER(or)
+
+/* AND: ret = op1 & op2 */
+#define PSL_AND_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
+EXEC_INSTR(8, and)
+EXEC_INSTR(16, and)
+EXEC_INSTR(32, and)
+EXEC_INSTR(64, and)
+EXEC_DISPATCHER(and)
+
+/* XOR: ret = op1 ^ op2 */
+#define PSL_XOR_MASK	(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
+EXEC_INSTR(8, xor)
+EXEC_INSTR(16, xor)
+EXEC_INSTR(32, xor)
+EXEC_INSTR(64, xor)
+EXEC_DISPATCHER(xor)
 
-	val ^= val >> 32;
-	val ^= val >> 16;
-	val ^= val >> 8;
-	val ^= val >> 4;
-	val ^= val >> 2;
-	val ^= val >> 1;
-	return (~val) & 1;
-}
+/* -------------------------------------------------------------------------- */
+
+/*
+ * Emulation functions. We don't care about the order of the operands, except
+ * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who
+ * is op1 and who is op2.
+ */
 
 static void
-x86_emul_or(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
-    uint64_t *gprs)
+x86_func_or(struct nvmm_mem *mem, uint64_t *gprs)
 {
+	uint64_t *retval = (uint64_t *)mem->data;
 	const bool write = mem->write;
-	uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
-	uint8_t data[8];
-	size_t i;
-
-	fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
+	uint64_t *op1, op2, fl, ret;
 
-	memcpy(data, mem->data, sizeof(data));
+	op1 = (uint64_t *)mem->data;
+	op2 = 0;
 
-	/* Fetch the value to be OR'ed. */
+	/* Fetch the value to be OR'ed (op2). */
+	mem->data = (uint8_t *)&op2;
 	mem->write = false;
-	(*cb)(mem);
+	(*__callbacks.mem)(mem);
 
 	/* Perform the OR. */
-	for (i = 0; i < mem->size; i++) {
-		mem->data[i] |= data[i];
-		if (mem->data[i] != 0)
-			fl |= PSL_Z;
-	}
-	if (mem->data[mem->size-1] & __BIT(7))
-		fl |= PSL_N;
-	if (compute_parity(mem->data))
-		fl |= PSL_PF;
+	ret = exec_or(*op1, op2, &fl, mem->size);
 
 	if (write) {
 		/* Write back the result. */
+		mem->data = (uint8_t *)&ret;
 		mem->write = true;
-		(*cb)(mem);
+		(*__callbacks.mem)(mem);
+	} else {
+		/* Return data to the caller. */
+		*retval = ret;
 	}
 
-	gprs[NVMM_X64_GPR_RFLAGS] = fl;
+	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
+	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
 }
 
 static void
-x86_emul_and(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
-    uint64_t *gprs)
+x86_func_and(struct nvmm_mem *mem, uint64_t *gprs)
 {
+	uint64_t *retval = (uint64_t *)mem->data;
 	const bool write = mem->write;
-	uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
-	uint8_t data[8];
-	size_t i;
-
-	fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
+	uint64_t *op1, op2, fl, ret;
 
-	memcpy(data, mem->data, sizeof(data));
+	op1 = (uint64_t *)mem->data;
+	op2 = 0;
 
-	/* Fetch the value to be AND'ed. */
+	/* Fetch the value to be AND'ed (op2). */
+	mem->data = (uint8_t *)&op2;
 	mem->write = false;
-	(*cb)(mem);
+	(*__callbacks.mem)(mem);
 
 	/* Perform the AND. */
-	for (i = 0; i < mem->size; i++) {
-		mem->data[i] &= data[i];
-		if (mem->data[i] != 0)
-			fl |= PSL_Z;
-	}
-	if (mem->data[mem->size-1] & __BIT(7))
-		fl |= PSL_N;
-	if (compute_parity(mem->data))
-		fl |= PSL_PF;
+	ret = exec_and(*op1, op2, &fl, mem->size);
 
 	if (write) {
 		/* Write back the result. */
+		mem->data = (uint8_t *)&ret;
 		mem->write = true;
-		(*cb)(mem);
+		(*__callbacks.mem)(mem);
+	} else {
+		/* Return data to the caller. */
+		*retval = ret;
 	}
 
-	gprs[NVMM_X64_GPR_RFLAGS] = fl;
+	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
+	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
 }
 
 static void
-x86_emul_xor(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
-    uint64_t *gprs)
+x86_func_sub(struct nvmm_mem *mem, uint64_t *gprs)
 {
+	uint64_t *retval = (uint64_t *)mem->data;
 	const bool write = mem->write;
-	uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
-	uint8_t data[8];
-	size_t i;
+	uint64_t *op1, *op2, fl, ret;
+	uint64_t tmp;
+	bool memop1;
+
+	memop1 = !mem->write;
+	op1 = memop1 ? &tmp : (uint64_t *)mem->data;
+	op2 = memop1 ? (uint64_t *)mem->data : &tmp;
+
+	/* Fetch the value to be SUB'ed (op1 or op2). */
+	mem->data = (uint8_t *)&tmp;
+	mem->write = false;
+	(*__callbacks.mem)(mem);
 
-	fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
+	/* Perform the SUB. */
+	ret = exec_sub(*op1, *op2, &fl, mem->size);
+
+	if (write) {
+		/* Write back the result. */
+		mem->data = (uint8_t *)&ret;
+		mem->write = true;
+		(*__callbacks.mem)(mem);
+	} else {
+		/* Return data to the caller. */
+		*retval = ret;
+	}
 
-	memcpy(data, mem->data, sizeof(data));
+	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
+	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
+}
 
-	/* Fetch the value to be XOR'ed. */
+static void
+x86_func_xor(struct nvmm_mem *mem, uint64_t *gprs)
+{
+	uint64_t *retval = (uint64_t *)mem->data;
+	const bool write = mem->write;
+	uint64_t *op1, op2, fl, ret;
+
+	op1 = (uint64_t *)mem->data;
+	op2 = 0;
+
+	/* Fetch the value to be XOR'ed (op2). */
+	mem->data = (uint8_t *)&op2;
 	mem->write = false;
-	(*cb)(mem);
+	(*__callbacks.mem)(mem);
 
 	/* Perform the XOR. */
-	for (i = 0; i < mem->size; i++) {
-		mem->data[i] ^= data[i];
-		if (mem->data[i] != 0)
-			fl |= PSL_Z;
-	}
-	if (mem->data[mem->size-1] & __BIT(7))
-		fl |= PSL_N;
-	if (compute_parity(mem->data))
-		fl |= PSL_PF;
+	ret = exec_xor(*op1, op2, &fl, mem->size);
 
 	if (write) {
 		/* Write back the result. */
+		mem->data = (uint8_t *)&ret;
 		mem->write = true;
-		(*cb)(mem);
+		(*__callbacks.mem)(mem);
+	} else {
+		/* Return data to the caller. */
+		*retval = ret;
 	}
 
-	gprs[NVMM_X64_GPR_RFLAGS] = fl;
+	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
+	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
 }
 
 static void
-x86_emul_mov(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
-    uint64_t *gprs)
+x86_func_cmp(struct nvmm_mem *mem, uint64_t *gprs)
+{
+	uint64_t *op1, *op2, fl;
+	uint64_t tmp;
+	bool memop1;
+
+	memop1 = !mem->write;
+	op1 = memop1 ? &tmp : (uint64_t *)mem->data;
+	op2 = memop1 ? (uint64_t *)mem->data : &tmp;
+
+	/* Fetch the value to be CMP'ed (op1 or op2). */
+	mem->data = (uint8_t *)&tmp;
+	mem->write = false;
+	(*__callbacks.mem)(mem);
+
+	/* Perform the CMP. */
+	exec_sub(*op1, *op2, &fl, mem->size);
+
+	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
+	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
+}
+
+static void
+x86_func_test(struct nvmm_mem *mem, uint64_t *gprs)
+{
+	uint64_t *op1, *op2, fl;
+	uint64_t tmp;
+	bool memop1;
+
+	memop1 = !mem->write;
+	op1 = memop1 ? &tmp : (uint64_t *)mem->data;
+	op2 = memop1 ? (uint64_t *)mem->data : &tmp;
+
+	/* Fetch the value to be TEST'ed (op1 or op2). */
+	mem->data = (uint8_t *)&tmp;
+	mem->write = false;
+	(*__callbacks.mem)(mem);
+
+	/* Perform the TEST. */
+	exec_and(*op1, *op2, &fl, mem->size);
+
+	gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
+	gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
+}
+
+static void
+x86_func_mov(struct nvmm_mem *mem, uint64_t *gprs)
 {
 	/*
 	 * Nothing special, just move without emulation.
 	 */
-	(*cb)(mem);
+	(*__callbacks.mem)(mem);
 }
 
 static void
-x86_emul_stos(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
-    uint64_t *gprs)
+x86_func_stos(struct nvmm_mem *mem, uint64_t *gprs)
 {
 	/*
 	 * Just move, and update RDI.
 	 */
-	(*cb)(mem);
+	(*__callbacks.mem)(mem);
 
 	if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
 		gprs[NVMM_X64_GPR_RDI] -= mem->size;
@@ -2578,13 +2855,12 @@ x86_emul_stos(struct nvmm_mem *mem, void
 }
 
 static void
-x86_emul_lods(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
-    uint64_t *gprs)
+x86_func_lods(struct nvmm_mem *mem, uint64_t *gprs)
 {
 	/*
 	 * Just move, and update RSI.
 	 */
-	(*cb)(mem);
+	(*__callbacks.mem)(mem);
 
 	if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
 		gprs[NVMM_X64_GPR_RSI] -= mem->size;
@@ -2594,8 +2870,7 @@ x86_emul_lods(struct nvmm_mem *mem, void
 }
 
 static void
-x86_emul_movs(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
-    uint64_t *gprs)
+x86_func_movs(struct nvmm_mem *mem, uint64_t *gprs)
 {
 	/*
 	 * Special instruction: double memory operand. Don't call the cb,
@@ -2795,7 +3070,7 @@ assist_mem_double(struct nvmm_machine *m
 		return -1;
 
 	mem.size = size;
-	(*instr->emul)(&mem, NULL, state->gprs);
+	(*instr->emul->func)(&mem, state->gprs);
 
 	return 0;
 }
@@ -2860,15 +3135,25 @@ assist_mem_single(struct nvmm_machine *m
 		default:
 			DISASSEMBLER_BUG();
 		}
+	} else if (instr->emul->read) {
+		if (instr->dst.type != STORE_REG) {
+			DISASSEMBLER_BUG();
+		}
+		if (instr->dst.disp.type != DISP_NONE) {
+			DISASSEMBLER_BUG();
+		}
+		val = state->gprs[instr->dst.u.reg->num];
+		val = __SHIFTOUT(val, instr->dst.u.reg->mask);
+		memcpy(mem.data, &val, mem.size);
 	}
 
-	(*instr->emul)(&mem, __callbacks.mem, state->gprs);
+	(*instr->emul->func)(&mem, state->gprs);
 
-	if (!mem.write) {
+	if (!instr->emul->notouch && !mem.write) {
 		if (instr->dst.type != STORE_REG) {
 			DISASSEMBLER_BUG();
 		}
-		memcpy(&val, mem.data, sizeof(uint64_t));
+		memcpy(&val, membuf, sizeof(uint64_t));
 		val = __SHIFTIN(val, instr->dst.u.reg->mask);
 		state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
 		state->gprs[instr->dst.u.reg->num] |= val;

Index: src/tests/lib/libnvmm/h_mem_assist.c
diff -u src/tests/lib/libnvmm/h_mem_assist.c:1.4 src/tests/lib/libnvmm/h_mem_assist.c:1.5
--- src/tests/lib/libnvmm/h_mem_assist.c:1.4	Fri Feb  1 06:49:58 2019
+++ src/tests/lib/libnvmm/h_mem_assist.c	Thu Feb  7 10:58:45 2019
@@ -292,10 +292,13 @@ extern uint8_t test8_begin, test8_end;
 extern uint8_t test9_begin, test9_end;
 extern uint8_t test10_begin, test10_end;
 extern uint8_t test11_begin, test11_end;
+extern uint8_t test12_begin, test12_end;
+extern uint8_t test13_begin, test13_end;
+extern uint8_t test14_begin, test14_end;
 
 static const struct test tests[] = {
 	{ "test1 - MOV", &test1_begin, &test1_end, 0x3004 },
-	{ "test2 - OR",  &test2_begin, &test2_end, 0x14FF },
+	{ "test2 - OR",  &test2_begin, &test2_end, 0x16FF },
 	{ "test3 - AND", &test3_begin, &test3_end, 0x1FC0 },
 	{ "test4 - XOR", &test4_begin, &test4_end, 0x10CF },
 	{ "test5 - Address Sizes", &test5_begin, &test5_end, 0x1F00 },
@@ -305,6 +308,9 @@ static const struct test tests[] = {
 	{ "test9 - MOVS", &test9_begin, &test9_end, 0x12345678 },
 	{ "test10 - MOVZXB", &test10_begin, &test10_end, 0x00000078 },
 	{ "test11 - MOVZXW", &test11_begin, &test11_end, 0x00005678 },
+	{ "test12 - CMP", &test12_begin, &test12_end, 0x00000001 },
+	{ "test13 - SUB", &test13_begin, &test13_end, 0x0000000F0000A0FF },
+	{ "test14 - TEST", &test14_begin, &test14_end, 0x00000001 },
 	{ NULL, NULL, NULL, -1 }
 };
 
Index: src/tests/lib/libnvmm/h_mem_assist_asm.S
diff -u src/tests/lib/libnvmm/h_mem_assist_asm.S:1.4 src/tests/lib/libnvmm/h_mem_assist_asm.S:1.5
--- src/tests/lib/libnvmm/h_mem_assist_asm.S:1.4	Wed Feb  6 15:42:31 2019
+++ src/tests/lib/libnvmm/h_mem_assist_asm.S	Thu Feb  7 10:58:45 2019
@@ -38,6 +38,9 @@
 	.globl	test9_begin, test9_end
 	.globl	test10_begin, test10_end
 	.globl	test11_begin, test11_end
+	.globl	test12_begin, test12_end
+	.globl	test13_begin, test13_end
+	.globl	test14_begin, test14_end
 	.text
 	.code64
 
@@ -74,6 +77,10 @@ test2_begin:
 	movq	$0x0400,%rcx
 	orw	%cx,(%rax)
 
+	movq	$0x0200,%rcx
+	orq	(%rax),%rcx
+	movq	%rcx,(%rax)
+
 	TEST_END
 test2_end:
 
@@ -202,3 +209,85 @@ test11_begin:
 
 	TEST_END
 test11_end:
+
+	.align	64
+test12_begin:
+	movq	$0x1000,%rax
+	movq	$0xFFFFFFFFF2345678,(%rax)
+
+	cmpb	$0x78,(%rax)
+	jne	.L12_failure
+	cmpb	$0x77,(%rax)
+	jl	.L12_failure
+	cmpb	$0x79,(%rax)
+	jg	.L12_failure
+
+	cmpw	$0x5678,(%rax)
+	jne	.L12_failure
+	cmpw	$0x5677,(%rax)
+	jl	.L12_failure
+	cmpw	$0x5679,(%rax)
+	jg	.L12_failure
+
+	cmpl	$0xF2345678,(%rax)
+	jne	.L12_failure
+	cmpl	$0xF2345677,(%rax)
+	jl	.L12_failure
+	cmpl	$0xF2345679,(%rax)
+	jg	.L12_failure
+
+	cmpq	$0xFFFFFFFFF2345678,(%rax)
+	jne	.L12_failure
+	cmpq	$0xFFFFFFFFF2345677,(%rax)
+	jl	.L12_failure
+	cmpq	$0xFFFFFFFFF2345679,(%rax)
+	jg	.L12_failure
+
+.L12_success:
+	movq	$1,(%rax)
+	TEST_END
+.L12_failure:
+	movq	$0,(%rax)
+	TEST_END
+test12_end:
+
+	.align	64
+test13_begin:
+	movq	$0x1000,%rax
+	movq	$0x000000001000A0FF,(%rax)
+
+	movq	$0xFFFF,%rcx
+	subb	%cl,(%rax)
+
+	movq	$0xA000,%rcx
+	subw	%cx,(%rax)
+
+	movq	$0x0000000F1000A0FF,%rcx
+	subq	(%rax),%rcx
+
+	movq	%rcx,(%rax)
+
+	TEST_END
+test13_end:
+
+	.align	64
+test14_begin:
+	movq	$0x1000,%rax
+	movq	$0xA0FF,(%rax)
+
+	testb	$0x0F,(%rax)
+	jz	.L14_failure
+
+	testw	$0x0F00,(%rax)
+	jnz	.L14_failure
+
+	testl	$0xA000,(%rax)
+	jz	.L14_failure
+
+.L14_success:
+	movq	$1,(%rax)
+	TEST_END
+.L14_failure:
+	movq	$0,(%rax)
+	TEST_END
+test14_end:

Reply via email to