Re: [PATCH] target/i386: fix ADOX followed by ADCX

2023-01-31 Thread Richard Henderson

On 1/30/23 22:54, Paolo Bonzini wrote:

When ADCX is followed by ADOX or vice versa, the second instruction's
carry comes from EFLAGS.  This is handled by this bit of gen_ADCOX:

 tcg_gen_extract_tl(carry_in, cpu_cc_src,
 ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1);

Unfortunately, in this case cc_op has been overwritten by the previous
"if" statement to CC_OP_ADCOX.  This works by chance when the first
instruction is ADCX; however, if the first instruction is ADOX,
ADCX will incorrectly take its carry from OF instead of CF.

Fix by moving the computation of the new cc_op at the end of the function.
The included exhaustive test case fails without this patch and passes
afterwards.

Because ADCX/ADOX need not be invoked through the VEX prefix, this
regression bisects to commit 16fc5726a6e2 ("target/i386: reimplement
0x0f 0x38, add AVX", 2022-10-18).  However, the mistake happened a
little earlier, when BMI instructions were rewritten using the new
decoder framework.

Resolves:https://gitlab.com/qemu-project/qemu/-/issues/1471
Reported-by: Paul Jolly
Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new 
decoder", 2022-10-18)
Cc:qemu-sta...@nongnu.org
Signed-off-by: Paolo Bonzini
---
  target/i386/tcg/emit.c.inc   | 20 +
  tests/tcg/i386/Makefile.target   |  6 ++-
  tests/tcg/i386/test-i386-adcox.c | 75 
  3 files changed, 91 insertions(+), 10 deletions(-)
  create mode 100644 tests/tcg/i386/test-i386-adcox.c


Reviewed-by: Richard Henderson 

r~



[PATCH] target/i386: fix ADOX followed by ADCX

2023-01-31 Thread Paolo Bonzini
When ADCX is followed by ADOX or vice versa, the second instruction's
carry comes from EFLAGS.  This is handled by this bit of gen_ADCOX:

tcg_gen_extract_tl(carry_in, cpu_cc_src,
ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1);

Unfortunately, in this case cc_op has been overwritten by the previous
"if" statement to CC_OP_ADCOX.  This works by chance when the first
instruction is ADCX; however, if the first instruction is ADOX,
ADCX will incorrectly take its carry from OF instead of CF.

Fix by moving the computation of the new cc_op at the end of the function.
The included exhaustive test case fails without this patch and passes
afterwards.

Because ADCX/ADOX need not be invoked through the VEX prefix, this
regression bisects to commit 16fc5726a6e2 ("target/i386: reimplement
0x0f 0x38, add AVX", 2022-10-18).  However, the mistake happened a
little earlier, when BMI instructions were rewritten using the new
decoder framework.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1471
Reported-by: Paul Jolly 
Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to 
new decoder", 2022-10-18)
Cc: qemu-sta...@nongnu.org
Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/emit.c.inc   | 20 +
 tests/tcg/i386/Makefile.target   |  6 ++-
 tests/tcg/i386/test-i386-adcox.c | 75 
 3 files changed, 91 insertions(+), 10 deletions(-)
 create mode 100644 tests/tcg/i386/test-i386-adcox.c

diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index e33688f672a2..5a1d3803f901 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -866,6 +866,7 @@ VSIB_AVX(VPGATHERQ, vpgatherq)
 
 static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
 {
+int opposite_cc_op;
 TCGv carry_in = NULL;
 TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
 TCGv zero;
@@ -873,14 +874,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, 
MemOp ot, int cc_op)
 if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
 /* Re-use the carry-out from a previous round.  */
 carry_in = carry_out;
-cc_op = s->cc_op;
-} else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) {
-/* Merge with the carry-out from the opposite instruction.  */
-cc_op = CC_OP_ADCOX;
-}
-
-/* If we don't have a carry-in, get it out of EFLAGS.  */
-if (!carry_in) {
+} else {
+/* We don't have a carry-in, get it out of EFLAGS.  */
 if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
 gen_compute_eflags(s);
 }
@@ -904,7 +899,14 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, 
MemOp ot, int cc_op)
 tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
 break;
 }
-set_cc_op(s, cc_op);
+
+opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX;
+if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) {
+/* Merge with the carry-out from the opposite instruction.  */
+set_cc_op(s, CC_OP_ADCOX);
+} else {
+set_cc_op(s, cc_op);
+}
 }
 
 static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index 3273aa8061f8..ac443995447f 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target
@@ -14,7 +14,7 @@ config-cc.mak: Makefile
 I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
 ALL_X86_TESTS=$(I386_SRCS:.c=)
 SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx
-X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
+X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), 
$(ALL_X86_TESTS))
 
 test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse
 run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max
@@ -28,6 +28,10 @@ test-i386-bmi2: CFLAGS=-O2
 run-test-i386-bmi2: QEMU_OPTS += -cpu max
 run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max
 
+test-i386-adcox: CFLAGS=-O2
+run-test-i386-adcox: QEMU_OPTS += -cpu max
+run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max
+
 #
 # hello-i386 is a barebones app
 #
diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c
new file mode 100644
index ..16169efff823
--- /dev/null
+++ b/tests/tcg/i386/test-i386-adcox.c
@@ -0,0 +1,75 @@
+/* See if various BMI2 instructions give expected results */
+#include 
+#include 
+#include 
+
+#define CC_C 1
+#define CC_O (1 << 11)
+
+#ifdef __x86_64__
+#define REG uint64_t
+#else
+#define REG uint32_t
+#endif
+
+void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG 
adox_operand)
+{
+REG flags;
+REG out_adcx, out_adox;
+
+asm("pushf; pop %0" : "=r"(flags));
+flags &= ~(CC_C | CC_O);
+flags |= (in_c ? CC_C : 0);
+flags |= (in_o ? CC_O : 0);
+
+out_adcx = adcx_operand;
+