On 10/20/24 08:53, Paolo Bonzini wrote:
Most uses of CC_OP_DYNAMIC are for CMP/JB/JE or similar sequences.
We can optimize many of them to avoid computation of the flags.
This eliminates both TCG ops to set up the new cc_op, and helper
instructions because evaluating just ZF is much cheaper.
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
target/i386/helper.h | 1 +
target/i386/tcg/cc_helper.c | 20 ++++++++++++++++++++
target/i386/tcg/translate.c | 10 +++++++---
3 files changed, 28 insertions(+), 3 deletions(-)
diff --git a/target/i386/helper.h b/target/i386/helper.h
index eeb8df56eaa..3f67098f11f 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -1,5 +1,6 @@
DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
+DEF_HELPER_FLAGS_3(cc_compute_nz, TCG_CALL_NO_RWG_SE, tl, tl, tl, int)
DEF_HELPER_3(write_eflags, void, env, tl, i32)
DEF_HELPER_1(read_eflags, tl, env)
diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c
index 40583c04cf9..c24e6a14c07 100644
--- a/target/i386/tcg/cc_helper.c
+++ b/target/i386/tcg/cc_helper.c
@@ -95,6 +95,26 @@ static target_ulong compute_all_adcox(target_ulong dst,
target_ulong src1,
return (src1 & ~(CC_C | CC_O)) | (dst * CC_C) | (src2 * CC_O);
}
+target_ulong helper_cc_compute_nz(target_ulong dst, target_ulong src1,
+ int op)
+{
+ target_ulong mask;
+
+ if (CC_OP_HAS_EFLAGS(op)) {
+ return ~src1 & CC_Z;
+ } else {
+ MemOp size = cc_op_size(op);
+
+ if (size == MO_TL) {
+ /* Avoid shift count overflow when computing the mask below. */
+ return dst;
+ }
+
+ mask = (1ull << (8 << size)) - 1;
FWIW, MAKE_64BIT_MASK(0, 8 << size) does not have the overflow problem.
r~