This patch implements Alexander Monakov's suggestion from PR 123238.
Traditionally, the x86_64 backend implements VCOND_MASK using a three
instruction sequence of pand, pandn and por (requiring three registers),
however when op_true and op_false are both constant vectors, this can
be done using just two instructions, pand and pxor (requiring only two
registers).  This requires delaying forcing const_vector operands to
memory (the constant pool) as late as possible, including changing the
predicates on the define_expand patterns that call ix86_expand_sse_movcc
to (consistently) accept vector_or_const_vector_operand.

void f(char c[])
{
    for (int i = 0; i < 8; i++)
        c[i] = c[i] ? 'a' : 'c';
}

Before with -O2 (11 instructions):
f:      movq    (%rdi), %xmm0
        pxor    %xmm1, %xmm1
        movq    .LC1(%rip), %xmm2       // {'c','c','c'...}
        pcmpeqb %xmm1, %xmm0
        pcmpeqb %xmm1, %xmm0
        movq    .LC0(%rip), %xmm1       // {'a','a','a'...}
        pand    %xmm0, %xmm1
        pandn   %xmm2, %xmm0
        por     %xmm1, %xmm0
        movq    %xmm0, (%rdi)
        ret

After with -O2 (10 instructions):
f:      movq    (%rdi), %xmm0
        pxor    %xmm1, %xmm1
        pcmpeqb %xmm1, %xmm0
        pcmpeqb %xmm1, %xmm0
        movq    .LC2(%rip), %xmm1       // {2,2,2...}
        pand    %xmm1, %xmm0
        movq    .LC1(%rip), %xmm1       // {'c','c','c'...}
        pxor    %xmm1, %xmm0
        movq    %xmm0, (%rdi)
        ret


This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for stage1?  I'm posting this now so the
suggestion doesn't get lost, if/when PR 123238 is closed after the
regression is fixed.


2026-02-11  Roger Sayle  <[email protected]>

gcc/ChangeLog
        PR target/123238
        * config/i386/i386-expand.cc: Delay calling force_reg on
        op_true and op_false.  Generate an AND the XOR sequence
        if op_true and op_false are both CONST_VECTOR_P.
        * config/i386/mmx.md (vcond_mask_<mode>v4hi): Allow operands
        1 and 2 to be vector_or_const_vector_operand.
        (vcond_mask_<mode>v2hi): Likewise.
        (vcond_mask_<mode><mmxintvecmodelower>): Likewise.
        (vcond_mask_<mode><mode>): Likewise.
        * config/i386/sse.md (vcond_mask_<mode><sseintvecmodelower>):
        Likewise.
        (vcond_mask_<mode><sseintvecmodelower>): Likewise.
        (vcond_mask_v1tiv1ti): Likewise.
        (vcond_mask_<mode><sseintvecmodelower>): Likewise.
        (vcond_mask_<mode><sseintvecmodelower>): Likewise.

gcc/testsuite/ChangeLog
        PR target/123238
        * gcc.target/i386/pr123238-2.c: New test case.


Roger
--

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a82bb4399c9..c11115df01a 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -4462,12 +4462,6 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
   rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
   machine_mode blend_mode = mode;
 
-  if (GET_MODE_SIZE (mode) < 16
-      || !vector_operand (op_true, mode))
-    op_true = force_reg (mode, op_true);
-
-  op_false = force_reg (mode, op_false);
-
   switch (mode)
     {
     case E_V2SFmode:
@@ -4580,6 +4574,11 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
 
   if (gen != NULL)
     {
+      if (GET_MODE_SIZE (mode) < 16
+          || !vector_operand (op_true, mode))
+       op_true = force_reg (mode, op_true);
+      op_false = force_reg (mode, op_false);
+
       if (blend_mode == mode)
        x = dest;
       else
@@ -4595,15 +4594,25 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, 
rtx op_false)
       if (x != dest)
        emit_move_insn (dest, gen_lowpart (mode, x));
     }
+  else if (CONST_VECTOR_P (op_true) && CONST_VECTOR_P (op_false))
+    {
+      rtx tmp = expand_simple_binop (mode, XOR, op_true, op_false,
+                                    NULL, 1, OPTAB_DIRECT);
+      tmp = expand_simple_binop (mode, AND, cmp, tmp,
+                                NULL, 1, OPTAB_DIRECT);
+      tmp = expand_simple_binop (mode, XOR, tmp, op_false,
+                                dest, 1, OPTAB_DIRECT);
+      if (tmp != dest)
+       emit_move_insn (dest, tmp);
+    }
   else
     {
-      rtx t2, t3;
+      rtx t2 = expand_simple_binop (mode, AND, cmp, op_true,
+                                   NULL, 1, OPTAB_DIRECT);
 
-      t2 = expand_simple_binop (mode, AND, op_true, cmp,
-                               NULL, 1, OPTAB_DIRECT);
-
-      t3 = gen_reg_rtx (mode);
+      rtx t3 = gen_reg_rtx (mode);
       x = gen_rtx_NOT (mode, cmp);
+      op_false = force_reg (mode, op_false);
       ix86_emit_vec_binop (AND, mode, t3, x, op_false);
 
       x = expand_simple_binop (mode, IOR, t3, t2,
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index a23474716a6..afa8e4c5a01 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2310,8 +2310,8 @@
 (define_expand "vcond_mask_<mode>v4hi"
   [(set (match_operand:V4F_64 0 "register_operand")
        (vec_merge:V4F_64
-         (match_operand:V4F_64 1 "register_operand")
-         (match_operand:V4F_64 2 "register_operand")
+         (match_operand:V4F_64 1 "vector_or_const_vector_operand")
+         (match_operand:V4F_64 2 "vector_or_const_vector_operand")
          (match_operand:V4HI 3  "register_operand")))]
   "TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
 {
@@ -2359,8 +2359,8 @@
 (define_expand "vcond_mask_<mode>v2hi"
   [(set (match_operand:V2F_32 0 "register_operand")
        (vec_merge:V2F_32
-         (match_operand:V2F_32 1 "register_operand")
-         (match_operand:V2F_32 2 "register_operand")
+         (match_operand:V2F_32 1 "vector_or_const_vector_operand")
+         (match_operand:V2F_32 2 "vector_or_const_vector_operand")
          (match_operand:V2HI 3 "register_operand")))]
   "TARGET_SSE4_1"
 {
@@ -4309,8 +4309,8 @@
 (define_expand "vcond_mask_<mode><mmxintvecmodelower>"
   [(set (match_operand:MMXMODE124 0 "register_operand")
        (vec_merge:MMXMODE124
-         (match_operand:MMXMODE124 1 "register_operand")
-         (match_operand:MMXMODE124 2 "register_operand")
+         (match_operand:MMXMODE124 1 "vector_or_const_vector_operand")
+         (match_operand:MMXMODE124 2 "vector_or_const_vector_operand")
          (match_operand:<mmxintvecmode> 3 "register_operand")))]
   "TARGET_MMX_WITH_SSE"
 {
@@ -4322,8 +4322,8 @@
 (define_expand "vcond_mask_<mode><mode>"
   [(set (match_operand:VI_16_32 0 "register_operand")
        (vec_merge:VI_16_32
-         (match_operand:VI_16_32 1 "register_operand")
-         (match_operand:VI_16_32 2 "register_operand")
+         (match_operand:VI_16_32 1 "vector_or_const_vector_operand")
+         (match_operand:VI_16_32 2 "vector_or_const_vector_operand")
          (match_operand:VI_16_32 3 "register_operand")))]
   "TARGET_SSE2"
 {
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index cfe7a046f42..2edc86d18ba 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5493,8 +5493,8 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VI_256_AVX2 0 "register_operand")
        (vec_merge:VI_256_AVX2
-         (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand")
-         (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
+         (match_operand:VI_256_AVX2 1 "vector_or_const_vector_operand")
+         (match_operand:VI_256_AVX2 2 "vector_or_const_vector_operand")
          (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_AVX"
 {
@@ -5506,8 +5506,8 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VI_128 0 "register_operand")
        (vec_merge:VI_128
-         (match_operand:VI_128 1 "vector_or_0_or_1s_operand")
-         (match_operand:VI_128 2 "nonimm_or_0_operand")
+         (match_operand:VI_128 1 "vector_or_const_vector_operand")
+         (match_operand:VI_128 2 "vector_or_const_vector_operand")
          (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_SSE2"
 {
@@ -5519,8 +5519,8 @@
 (define_expand "vcond_mask_v1tiv1ti"
   [(set (match_operand:V1TI 0 "register_operand")
        (vec_merge:V1TI
-         (match_operand:V1TI 1 "vector_or_0_or_1s_operand")
-         (match_operand:V1TI 2 "nonimm_or_0_operand")
+         (match_operand:V1TI 1 "vector_or_const_vector_operand")
+         (match_operand:V1TI 2 "vector_or_const_vector_operand")
          (match_operand:V1TI 3 "register_operand")))]
   "TARGET_SSE2"
 {
@@ -5532,8 +5532,8 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VF_256 0 "register_operand")
        (vec_merge:VF_256
-         (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand")
-         (match_operand:VF_256 2 "nonimm_or_0_operand")
+         (match_operand:VF_256 1 "vector_or_const_vector_operand")
+         (match_operand:VF_256 2 "vector_or_const_vector_operand")
          (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_AVX"
 {
@@ -5545,8 +5545,8 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VF_128 0 "register_operand")
        (vec_merge:VF_128
-         (match_operand:VF_128 1 "vector_or_0_or_1s_operand")
-         (match_operand:VF_128 2 "nonimm_or_0_operand")
+         (match_operand:VF_128 1 "vector_or_const_vector_operand")
+         (match_operand:VF_128 2 "vector_or_const_vector_operand")
          (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_SSE"
 {
diff --git a/gcc/testsuite/gcc.target/i386/pr123238-2.c 
b/gcc/testsuite/gcc.target/i386/pr123238-2.c
new file mode 100644
index 00000000000..6cf5b3e35c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr123238-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+void f(char c[])
+{
+    for (int i = 0; i < 8; i++)
+        c[i] = c[i] ? 'a' : 'c';
+}
+
+/* { dg-final { scan-assembler-not "pandn" } } */

Reply via email to