This patch is my proposed fix for (the regression aspects of) PR123238,
a code quality regression on x86_64 triggered by the generation of
VCOND_MASK. The regression is actually just bad luck. From gimple,
VCOND_MASK(a==b,c,d) is equivalent to VCOND_MASK(a!=b,d,c), and which
form gets generated was previously arbitrary. This is reasonable for
many (most?) targets, but on x86_64 there's an asymmetry, equality
can be performed in 1 instruction, but inequality requires three.
Teaching the middle-end's expand pass which form is preferred could
in theory be done with a new (very specific) target hook, that would
require documentation, but a more generic solution is for expand's
expand_vec_cond_mask_optab_fn to make use of rtx_costs, and reverse
the sense of VCOND_MASK if that would be an improvement. This has
the convenient property that the default rtx_costs of all comparison
operators is the same, resulting in no change unless explicitly
specified by the backend.
This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures. Ok for mainline? The i386 bits and the middle-end
bits?
2026-02-11 Roger Sayle <[email protected]>
gcc/ChangeLog
PR target/123238
* expr.cc (convert_tree_comp_to_rtx): Make global.
* expr.h (convert_tree_comp_to_rtx): Prototype here.
* internal-fn.cc (expand_vec_cond_mask_optab_fn): Use rtx_costs
to determine whether swapping operands would result in better
code.
* config/i386/i386-expand.cc (ix86_expand_int_vec_cmp): On
AVX512 targets use a ternlog instead of a comparison to negate
the mask (requires one instruction instead of two).
* config/i386/i386.cc (ix86_rtx_costs): Refactor code for UNSPEC.
Provide costs for UNSPEC_BLENDV and UNSPEC_MOVMSK. Provide
costs for comparison operators of integer vector modes.
gcc/testsuite/ChangeLog
PR target/123238
* gcc.target/i386/pr123238.c: New test case.
Roger
--
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a82bb4399c9..366ad513da9 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -5282,11 +5282,17 @@ ix86_expand_int_vec_cmp (rtx operands[])
return false;
if (negate)
- cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
- CONST0_RTX (GET_MODE (cmp)),
- NULL, NULL, &negate);
-
- gcc_assert (!negate);
+ {
+ if (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (cmp)) >= 16)
+ cmp = gen_rtx_XOR (GET_MODE (cmp), cmp, CONSTM1_RTX (GET_MODE (cmp)));
+ else
+ {
+ cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
+ CONST0_RTX (GET_MODE (cmp)),
+ NULL, NULL, &negate);
+ gcc_assert (!negate);
+ }
+ }
if (operands[0] != cmp)
emit_move_insn (operands[0], cmp);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 42ae9ccb051..e434ebb835e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -23024,36 +23024,71 @@ ix86_rtx_costs (rtx x, machine_mode mode, int
outer_code_i, int opno,
return false;
case UNSPEC:
- if (XINT (x, 1) == UNSPEC_TP)
- *total = 0;
- else if (XINT (x, 1) == UNSPEC_VTERNLOG)
+ switch (XINT (x, 1))
{
+ case UNSPEC_TP:
+ *total = 0;
+ break;
+
+ case UNSPEC_VTERNLOG:
*total = cost->sse_op;
- *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
- *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
- *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
+ if (!REG_P (XVECEXP (x, 0, 0)))
+ *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
+ if (!REG_P (XVECEXP (x, 0, 1)))
+ *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
+ if (!REG_P (XVECEXP (x, 0, 2)))
+ *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
return true;
- }
- else if (XINT (x, 1) == UNSPEC_PTEST)
- {
+
+ case UNSPEC_PTEST:
+ {
+ *total = cost->sse_op;
+ rtx test_op0 = XVECEXP (x, 0, 0);
+ if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
+ return false;
+ if (GET_CODE (test_op0) == AND)
+ {
+ rtx and_op0 = XEXP (test_op0, 0);
+ if (GET_CODE (and_op0) == NOT)
+ and_op0 = XEXP (and_op0, 0);
+ *total += rtx_cost (and_op0, GET_MODE (and_op0),
+ AND, 0, speed)
+ + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
+ AND, 1, speed);
+ }
+ else
+ *total = rtx_cost (test_op0, GET_MODE (test_op0),
+ UNSPEC, 0, speed);
+ }
+ return true;
+
+ case UNSPEC_BLENDV:
*total = cost->sse_op;
- rtx test_op0 = XVECEXP (x, 0, 0);
- if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
- return false;
- if (GET_CODE (test_op0) == AND)
+ if (!REG_P (XVECEXP (x, 0, 0)))
+ *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
+ if (!REG_P (XVECEXP (x, 0, 1)))
+ *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
+ if (!REG_P (XVECEXP (x, 0, 2)))
{
- rtx and_op0 = XEXP (test_op0, 0);
- if (GET_CODE (and_op0) == NOT)
- and_op0 = XEXP (and_op0, 0);
- *total += rtx_cost (and_op0, GET_MODE (and_op0),
- AND, 0, speed)
- + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
- AND, 1, speed);
+ rtx cond = XVECEXP (x, 0, 2);
+ if ((GET_CODE (cond) == LT || GET_CODE (cond) == GT)
+ && CONST_VECTOR_P (XEXP (cond, 1)))
+ {
+ /* avx2_blendvpd256_gt and friends. */
+ if (!REG_P (XEXP (cond, 0)))
+ *total += rtx_cost (XEXP (cond, 0), mode, code, 2, speed);
+ }
+ else
+ *total += rtx_cost (cond, mode, code, 2, speed);
}
- else
- *total = rtx_cost (test_op0, GET_MODE (test_op0),
- UNSPEC, 0, speed);
return true;
+
+ case UNSPEC_MOVMSK:
+ *total = cost->sse_op;
+ return true;
+
+ default:
+ break;
}
return false;
@@ -23270,6 +23305,70 @@ ix86_rtx_costs (rtx x, machine_mode mode, int
outer_code_i, int opno,
}
return false;
+ case EQ:
+ case GT:
+ case GTU:
+ case LT:
+ case LTU:
+ if (TARGET_SSE2
+ && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ && GET_MODE_SIZE (mode) >= 8)
+ {
+ /* vpcmpeq */
+ *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (4);
+ if (!REG_P (XEXP (x, 0)))
+ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+ if (!REG_P (XEXP (x, 1)))
+ *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+ return true;
+ }
+ if (TARGET_XOP
+ && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ && GET_MODE_SIZE (mode) <= 16)
+ {
+ /* vpcomeq */
+ *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
+ if (!REG_P (XEXP (x, 0)))
+ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+ if (!REG_P (XEXP (x, 1)))
+ *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+ return true;
+ }
+ return false;
+
+ case NE:
+ case GE:
+ case GEU:
+ if (TARGET_XOP
+ && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ && GET_MODE_SIZE (mode) <= 16)
+ {
+ /* vpcomneq */
+ *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
+ if (!REG_P (XEXP (x, 0)))
+ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+ if (!REG_P (XEXP (x, 1)))
+ *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+ return true;
+ }
+ if (TARGET_SSE2
+ && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ && GET_MODE_SIZE (mode) >= 8)
+ {
+ if (TARGET_AVX512F && GET_MODE_SIZE (mode) >= 16)
+ /* vpcmpeq + vpternlog */
+ *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (11);
+ else
+ /* vpcmpeq + pxor + vpcmpeq */
+ *total = speed ? COSTS_N_INSNS (3) : COSTS_N_BYTES (12);
+ if (!REG_P (XEXP (x, 0)))
+ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+ if (!REG_P (XEXP (x, 1)))
+ *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+ return true;
+ }
+ return false;
+
default:
return false;
}
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 4d1a6f3dd1c..203c116f2fa 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -9121,7 +9121,7 @@ highest_pow2_factor_for_target (const_tree target,
const_tree exp)
/* Convert the tree comparison code TCODE to the rtl one where the
signedness is UNSIGNEDP. */
-static enum rtx_code
+enum rtx_code
convert_tree_comp_to_rtx (enum tree_code tcode, int unsignedp)
{
enum rtx_code code;
diff --git a/gcc/expr.h b/gcc/expr.h
index ddd47cb4ecc..1e89a142d8c 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -338,6 +338,7 @@ extern tree string_constant (tree, tree *, tree *, tree *);
a constant. */
extern tree byte_representation (tree, tree *, tree *, tree *);
+extern enum rtx_code convert_tree_comp_to_rtx (enum tree_code, int);
extern enum tree_code maybe_optimize_mod_cmp (enum tree_code, tree *, tree *);
extern void maybe_optimize_sub_cmp_0 (enum tree_code, tree *, tree *);
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index d879568c6e3..1d20da0e185 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -3229,6 +3229,58 @@ expand_vec_cond_mask_optab_fn (internal_fn, gcall *stmt,
convert_optab optab)
gcc_assert (icode != CODE_FOR_nothing);
+ /* Find the comparison generating the mask OP0. */
+ tree cmp_op0 = NULL_TREE;
+ tree cmp_op1 = NULL_TREE;
+ enum tree_code cmp_code = TREE_CODE (op0);
+ if (TREE_CODE_CLASS (cmp_code) == tcc_comparison)
+ {
+ cmp_op0 = TREE_OPERAND (op0, 0);
+ cmp_op1 = TREE_OPERAND (op0, 1);
+ }
+ else if (cmp_code == SSA_NAME)
+ {
+ gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
+ if (is_gimple_assign (def_stmt))
+ {
+ cmp_code = gimple_assign_rhs_code (def_stmt);
+ if (TREE_CODE_CLASS (cmp_code) == tcc_comparison)
+ {
+ cmp_op0 = gimple_assign_rhs1 (def_stmt);
+ cmp_op1 = gimple_assign_rhs2 (def_stmt);
+ }
+ }
+ }
+
+ /* Decide whether to invert comparison based on rtx_cost. */
+ if (cmp_op0)
+ {
+ enum tree_code rev_code;
+ tree op_type = TREE_TYPE (cmp_op0);
+ int unsignedp = TYPE_UNSIGNED (op_type);
+ rev_code = invert_tree_comparison (cmp_code, HONOR_NANS (op_type));
+
+ if (rev_code != ERROR_MARK)
+ {
+ tree cmp_type = TREE_TYPE (op0);
+ machine_mode cmp_mode = TYPE_MODE (cmp_type);
+ machine_mode op_mode = TYPE_MODE (op_type);
+ bool speed_p = optimize_insn_for_speed_p ();
+ rtx reg = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
+ rtx veccmp = gen_rtx_fmt_ee (convert_tree_comp_to_rtx (cmp_code,
unsignedp),
+ cmp_mode, reg, reg);
+ int old_cost = rtx_cost (veccmp, cmp_mode, SET, 0, speed_p);
+ PUT_CODE (veccmp, convert_tree_comp_to_rtx (rev_code, unsignedp));
+ int new_cost = rtx_cost (veccmp, cmp_mode, SET, 0, speed_p);
+ if (new_cost < old_cost)
+ {
+ op0 = fold_build2_loc (EXPR_LOCATION (op0), rev_code,
+ cmp_type, cmp_op0, cmp_op1);
+ std::swap (op1, op2);
+ }
+ }
+ }
+
mask = expand_normal (op0);
rtx_op1 = expand_normal (op1);
rtx_op2 = expand_normal (op2);
diff --git a/gcc/testsuite/gcc.target/i386/pr123238.c
b/gcc/testsuite/gcc.target/i386/pr123238.c
new file mode 100644
index 00000000000..63906ae0fb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr123238.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+void f(char c[])
+{
+ for (int i = 0; i < 8; i++)
+ c[i] = c[i] ? 'a' : 'c';
+}
+
+/* { dg-final { scan-assembler-times "pcmpeqb" 1 } } */