[Qemu-devel] [PATCH] target-i386: Preserve the Z bit for bt/bts/btr/btc
Older Intel manuals (pre-2010) and current AMD manuals describe Z as undefined, but newer Intel manuals describe Z as unchanged. Cc: qemu-sta...@nongnu.org Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 40 +++- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 02625e3..032b0fd 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -6708,41 +6708,63 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } bt_op: tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1); +tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); switch(op) { case 0: -tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]); -tcg_gen_movi_tl(cpu_cc_dst, 0); break; case 1: -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); tcg_gen_movi_tl(cpu_tmp0, 1); tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); break; case 2: -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); tcg_gen_movi_tl(cpu_tmp0, 1); tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); -tcg_gen_not_tl(cpu_tmp0, cpu_tmp0); -tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0); +tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_tmp0); break; default: case 3: -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); tcg_gen_movi_tl(cpu_tmp0, 1); tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); break; } -set_cc_op(s, CC_OP_SARB + ot); if (op != 0) { if (mod != 3) { gen_op_st_v(s, ot, cpu_T[0], cpu_A0); } else { gen_op_mov_reg_v(ot, rm, cpu_T[0]); } +} + +/* Delay all CC updates until after the store above. Note that + C is the result of the test, Z is unchanged, and the others + are all undefined. */ +switch (s->cc_op) { +case CC_OP_MULB ... CC_OP_MULQ: +case CC_OP_ADDB ... CC_OP_ADDQ: +case CC_OP_ADCB ... CC_OP_ADCQ: +case CC_OP_SUBB ... CC_OP_SUBQ: +case CC_OP_SBBB ... CC_OP_SBBQ: +case CC_OP_LOGICB ... CC_OP_LOGICQ: +case CC_OP_INCB ... CC_OP_INCQ: +case CC_OP_DECB ... CC_OP_DECQ: +case CC_OP_SHLB ... CC_OP_SHLQ: +case CC_OP_SARB ... CC_OP_SARQ: +case CC_OP_BMILGB ... CC_OP_BMILGQ: +/* Z was going to be computed from the non-zero status of CC_DST. + We can get that same Z value (and the new C value) by leaving + CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the + same width. */ tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); -tcg_gen_movi_tl(cpu_cc_dst, 0); +set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB); +break; +default: +/* Otherwise, generate EFLAGS and replace the C bit. */ +gen_compute_eflags(s); +tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4, + ctz32(CC_C), 1); +break; } break; case 0x1bc: /* bsf / tzcnt */ -- 1.9.0
Re: [Qemu-devel] [PATCH] target-i386: Preserve the Z bit for bt/bts/btr/btc
Il 09/04/2014 16:56, Richard Henderson ha scritto: Older Intel manuals (pre-2010) describe Z as undefined, but AMD and newer Intel manuals describe Z as unchanged. Signed-off-by: Richard Henderson --- target-i386/translate.c | 40 +++- 1 file changed, 31 insertions(+), 9 deletions(-) --- Clemens, your patch fails to fix flags computation for bts/btr/btc, which should be done similarly to bt. And to answer your question, no, QEMU does not make any assumptions about undefined flags. We often set them to zero, just because that is easier than any other setting. r~ diff --git a/target-i386/translate.c b/target-i386/translate.c index 02625e3..032b0fd 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -6708,41 +6708,63 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } bt_op: tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1); +tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); switch(op) { case 0: -tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]); -tcg_gen_movi_tl(cpu_cc_dst, 0); break; case 1: -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); tcg_gen_movi_tl(cpu_tmp0, 1); tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); break; case 2: -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); tcg_gen_movi_tl(cpu_tmp0, 1); tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); -tcg_gen_not_tl(cpu_tmp0, cpu_tmp0); -tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0); +tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_tmp0); break; default: case 3: -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); tcg_gen_movi_tl(cpu_tmp0, 1); tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); break; } -set_cc_op(s, CC_OP_SARB + ot); if (op != 0) { if (mod != 3) { gen_op_st_v(s, ot, cpu_T[0], cpu_A0); } else { gen_op_mov_reg_v(ot, rm, cpu_T[0]); } +} + +/* Delay all CC updates until after the store above. Note that + C is the result of the test, Z is unchanged, and the others + are all undefined. */ +switch (s->cc_op) { +case CC_OP_MULB ... CC_OP_MULQ: +case CC_OP_ADDB ... CC_OP_ADDQ: +case CC_OP_ADCB ... CC_OP_ADCQ: +case CC_OP_SUBB ... CC_OP_SUBQ: +case CC_OP_SBBB ... CC_OP_SBBQ: +case CC_OP_LOGICB ... CC_OP_LOGICQ: +case CC_OP_INCB ... CC_OP_INCQ: +case CC_OP_DECB ... CC_OP_DECQ: +case CC_OP_SHLB ... CC_OP_SHLQ: +case CC_OP_SARB ... CC_OP_SARQ: +case CC_OP_BMILGB ... CC_OP_BMILGQ: +/* Z was going to be computed from the non-zero status of CC_DST. + We can get that same Z value (and the new C value) by leaving + CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the + same width. */ tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); -tcg_gen_movi_tl(cpu_cc_dst, 0); +set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB); +break; +default: +/* Otherwise, generate EFLAGS and replace the C bit. */ +gen_compute_eflags(s); +tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4, + ctz32(CC_C), 1); +break; } break; case 0x1bc: /* bsf / tzcnt */ Cc: qemu-sta...@nongnu.org Reviewed-by: Paolo Bonzini Hmm, actually the comments aren't following the common style of asterisks-on-every-line. Just fix it up without posting v2. Paolo
Re: [Qemu-devel] [PATCH] target-i386: Preserve the Z bit for bt/bts/btr/btc
Great, thanks for the feedback and the fix Richard! On Wed, Apr 9, 2014 at 1:56 PM, Richard Henderson wrote: > Older Intel manuals (pre-2010) describe Z as undefined, but AMD and > newer Intel manuals describe Z as unchanged. > > Signed-off-by: Richard Henderson > --- > target-i386/translate.c | 40 +++- > 1 file changed, 31 insertions(+), 9 deletions(-) > > --- > Clemens, your patch fails to fix flags computation for bts/btr/btc, > which should be done similarly to bt. > > And to answer your question, no, QEMU does not make any assumptions > about undefined flags. We often set them to zero, just because that > is easier than any other setting. > > > r~ > > > > diff --git a/target-i386/translate.c b/target-i386/translate.c > index 02625e3..032b0fd 100644 > --- a/target-i386/translate.c > +++ b/target-i386/translate.c > @@ -6708,41 +6708,63 @@ static target_ulong disas_insn(CPUX86State *env, > DisasContext *s, > } > bt_op: > tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1); > +tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); > switch(op) { > case 0: > -tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]); > -tcg_gen_movi_tl(cpu_cc_dst, 0); > break; > case 1: > -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); > tcg_gen_movi_tl(cpu_tmp0, 1); > tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); > tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); > break; > case 2: > -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); > tcg_gen_movi_tl(cpu_tmp0, 1); > tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); > -tcg_gen_not_tl(cpu_tmp0, cpu_tmp0); > -tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0); > +tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_tmp0); > break; > default: > case 3: > -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); > tcg_gen_movi_tl(cpu_tmp0, 1); > tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); > tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); > break; > } > -set_cc_op(s, CC_OP_SARB + ot); > if (op != 0) { > if (mod != 3) { > gen_op_st_v(s, ot, cpu_T[0], cpu_A0); > } else { > gen_op_mov_reg_v(ot, rm, cpu_T[0]); > } > +} > + > +/* Delay all CC updates until after the store above. Note that > + C is the result of the test, Z is unchanged, and the others > + are all undefined. */ > +switch (s->cc_op) { > +case CC_OP_MULB ... CC_OP_MULQ: > +case CC_OP_ADDB ... CC_OP_ADDQ: > +case CC_OP_ADCB ... CC_OP_ADCQ: > +case CC_OP_SUBB ... CC_OP_SUBQ: > +case CC_OP_SBBB ... CC_OP_SBBQ: > +case CC_OP_LOGICB ... CC_OP_LOGICQ: > +case CC_OP_INCB ... CC_OP_INCQ: > +case CC_OP_DECB ... CC_OP_DECQ: > +case CC_OP_SHLB ... CC_OP_SHLQ: > +case CC_OP_SARB ... CC_OP_SARQ: > +case CC_OP_BMILGB ... CC_OP_BMILGQ: > +/* Z was going to be computed from the non-zero status of > CC_DST. > + We can get that same Z value (and the new C value) by > leaving > + CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the > + same width. */ > tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); > -tcg_gen_movi_tl(cpu_cc_dst, 0); > +set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB); > +break; > +default: > +/* Otherwise, generate EFLAGS and replace the C bit. */ > +gen_compute_eflags(s); > +tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4, > + ctz32(CC_C), 1); > +break; > } > break; > case 0x1bc: /* bsf / tzcnt */ > -- > 1.9.0 > > -- Clemens Kolbitsch Security Researcher kolbit...@lastline.com Mobile +1 (206) 356-7745 Land +1 (805) 456-7076 Lastline, Inc. 6950 Hollister Avenue, Suite 101 Goleta, CA 93117 www.lastline.com
[Qemu-devel] [PATCH] target-i386: Preserve the Z bit for bt/bts/btr/btc
Older Intel manuals (pre-2010) describe Z as undefined, but AMD and newer Intel manuals describe Z as unchanged. Signed-off-by: Richard Henderson --- target-i386/translate.c | 40 +++- 1 file changed, 31 insertions(+), 9 deletions(-) --- Clemens, your patch fails to fix flags computation for bts/btr/btc, which should be done similarly to bt. And to answer your question, no, QEMU does not make any assumptions about undefined flags. We often set them to zero, just because that is easier than any other setting. r~ diff --git a/target-i386/translate.c b/target-i386/translate.c index 02625e3..032b0fd 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -6708,41 +6708,63 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } bt_op: tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1); +tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); switch(op) { case 0: -tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]); -tcg_gen_movi_tl(cpu_cc_dst, 0); break; case 1: -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); tcg_gen_movi_tl(cpu_tmp0, 1); tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); break; case 2: -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); tcg_gen_movi_tl(cpu_tmp0, 1); tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); -tcg_gen_not_tl(cpu_tmp0, cpu_tmp0); -tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0); +tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_tmp0); break; default: case 3: -tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); tcg_gen_movi_tl(cpu_tmp0, 1); tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]); tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); break; } -set_cc_op(s, CC_OP_SARB + ot); if (op != 0) { if (mod != 3) { gen_op_st_v(s, ot, cpu_T[0], cpu_A0); } else { gen_op_mov_reg_v(ot, rm, cpu_T[0]); } +} + +/* Delay all CC updates until after the store above. Note that + C is the result of the test, Z is unchanged, and the others + are all undefined. */ +switch (s->cc_op) { +case CC_OP_MULB ... CC_OP_MULQ: +case CC_OP_ADDB ... CC_OP_ADDQ: +case CC_OP_ADCB ... CC_OP_ADCQ: +case CC_OP_SUBB ... CC_OP_SUBQ: +case CC_OP_SBBB ... CC_OP_SBBQ: +case CC_OP_LOGICB ... CC_OP_LOGICQ: +case CC_OP_INCB ... CC_OP_INCQ: +case CC_OP_DECB ... CC_OP_DECQ: +case CC_OP_SHLB ... CC_OP_SHLQ: +case CC_OP_SARB ... CC_OP_SARQ: +case CC_OP_BMILGB ... CC_OP_BMILGQ: +/* Z was going to be computed from the non-zero status of CC_DST. + We can get that same Z value (and the new C value) by leaving + CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the + same width. */ tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); -tcg_gen_movi_tl(cpu_cc_dst, 0); +set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB); +break; +default: +/* Otherwise, generate EFLAGS and replace the C bit. */ +gen_compute_eflags(s); +tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4, + ctz32(CC_C), 1); +break; } break; case 0x1bc: /* bsf / tzcnt */ -- 1.9.0