This patch implements Alexander Monakov's suggestion from PR 123238.
Traditionally, the x86_64 backend implements VCOND_MASK using a three
instruction sequence of pand, pandn and por (requiring three registers),
however when op_true and op_false are both constant vectors, this can
be done using just two instructions, pand and pxor (requiring only two
registers). This requires delaying forcing const_vector operands to
memory (the constant pool) as late as possible, including changing the
predicates on the define_expand patterns that call ix86_expand_sse_movcc
to (consistently) accept vector_or_const_vector_operand.
void f(char c[])
{
for (int i = 0; i < 8; i++)
c[i] = c[i] ? 'a' : 'c';
}
Before with -O2 (11 instructions):
f: movq (%rdi), %xmm0
pxor %xmm1, %xmm1
movq .LC1(%rip), %xmm2 // {'c','c','c'...}
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm1, %xmm0
movq .LC0(%rip), %xmm1 // {'a','a','a'...}
pand %xmm0, %xmm1
pandn %xmm2, %xmm0
por %xmm1, %xmm0
movq %xmm0, (%rdi)
ret
After with -O2 (10 instructions):
f: movq (%rdi), %xmm0
pxor %xmm1, %xmm1
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm1, %xmm0
movq .LC2(%rip), %xmm1 // {2,2,2...}
pand %xmm1, %xmm0
movq .LC1(%rip), %xmm1 // {'c','c','c'...}
pxor %xmm1, %xmm0
movq %xmm0, (%rdi)
ret
This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures. Ok for stage1? I'm posting this now so the
suggestion doesn't get lost, if/when PR 123238 is closed after the
regression is fixed.
2026-02-11 Roger Sayle <[email protected]>
gcc/ChangeLog
PR target/123238
* config/i386/i386-expand.cc: Delay calling force_reg on
op_true and op_false. Generate an AND the XOR sequence
if op_true and op_false are both CONST_VECTOR_P.
* config/i386/mmx.md (vcond_mask_<mode>v4hi): Allow operands
1 and 2 to be vector_or_const_vector_operand.
(vcond_mask_<mode>v2hi): Likewise.
(vcond_mask_<mode><mmxintvecmodelower>): Likewise.
(vcond_mask_<mode><mode>): Likewise.
* config/i386/sse.md (vcond_mask_<mode><sseintvecmodelower>):
Likewise.
(vcond_mask_<mode><sseintvecmodelower>): Likewise.
(vcond_mask_v1tiv1ti): Likewise.
(vcond_mask_<mode><sseintvecmodelower>): Likewise.
(vcond_mask_<mode><sseintvecmodelower>): Likewise.
gcc/testsuite/ChangeLog
PR target/123238
* gcc.target/i386/pr123238-2.c: New test case.
Roger
--
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a82bb4399c9..c11115df01a 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -4462,12 +4462,6 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true,
rtx op_false)
rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
machine_mode blend_mode = mode;
- if (GET_MODE_SIZE (mode) < 16
- || !vector_operand (op_true, mode))
- op_true = force_reg (mode, op_true);
-
- op_false = force_reg (mode, op_false);
-
switch (mode)
{
case E_V2SFmode:
@@ -4580,6 +4574,11 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true,
rtx op_false)
if (gen != NULL)
{
+ if (GET_MODE_SIZE (mode) < 16
+ || !vector_operand (op_true, mode))
+ op_true = force_reg (mode, op_true);
+ op_false = force_reg (mode, op_false);
+
if (blend_mode == mode)
x = dest;
else
@@ -4595,15 +4594,25 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true,
rtx op_false)
if (x != dest)
emit_move_insn (dest, gen_lowpart (mode, x));
}
+ else if (CONST_VECTOR_P (op_true) && CONST_VECTOR_P (op_false))
+ {
+ rtx tmp = expand_simple_binop (mode, XOR, op_true, op_false,
+ NULL, 1, OPTAB_DIRECT);
+ tmp = expand_simple_binop (mode, AND, cmp, tmp,
+ NULL, 1, OPTAB_DIRECT);
+ tmp = expand_simple_binop (mode, XOR, tmp, op_false,
+ dest, 1, OPTAB_DIRECT);
+ if (tmp != dest)
+ emit_move_insn (dest, tmp);
+ }
else
{
- rtx t2, t3;
+ rtx t2 = expand_simple_binop (mode, AND, cmp, op_true,
+ NULL, 1, OPTAB_DIRECT);
- t2 = expand_simple_binop (mode, AND, op_true, cmp,
- NULL, 1, OPTAB_DIRECT);
-
- t3 = gen_reg_rtx (mode);
+ rtx t3 = gen_reg_rtx (mode);
x = gen_rtx_NOT (mode, cmp);
+ op_false = force_reg (mode, op_false);
ix86_emit_vec_binop (AND, mode, t3, x, op_false);
x = expand_simple_binop (mode, IOR, t3, t2,
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index a23474716a6..afa8e4c5a01 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2310,8 +2310,8 @@
(define_expand "vcond_mask_<mode>v4hi"
[(set (match_operand:V4F_64 0 "register_operand")
(vec_merge:V4F_64
- (match_operand:V4F_64 1 "register_operand")
- (match_operand:V4F_64 2 "register_operand")
+ (match_operand:V4F_64 1 "vector_or_const_vector_operand")
+ (match_operand:V4F_64 2 "vector_or_const_vector_operand")
(match_operand:V4HI 3 "register_operand")))]
"TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
{
@@ -2359,8 +2359,8 @@
(define_expand "vcond_mask_<mode>v2hi"
[(set (match_operand:V2F_32 0 "register_operand")
(vec_merge:V2F_32
- (match_operand:V2F_32 1 "register_operand")
- (match_operand:V2F_32 2 "register_operand")
+ (match_operand:V2F_32 1 "vector_or_const_vector_operand")
+ (match_operand:V2F_32 2 "vector_or_const_vector_operand")
(match_operand:V2HI 3 "register_operand")))]
"TARGET_SSE4_1"
{
@@ -4309,8 +4309,8 @@
(define_expand "vcond_mask_<mode><mmxintvecmodelower>"
[(set (match_operand:MMXMODE124 0 "register_operand")
(vec_merge:MMXMODE124
- (match_operand:MMXMODE124 1 "register_operand")
- (match_operand:MMXMODE124 2 "register_operand")
+ (match_operand:MMXMODE124 1 "vector_or_const_vector_operand")
+ (match_operand:MMXMODE124 2 "vector_or_const_vector_operand")
(match_operand:<mmxintvecmode> 3 "register_operand")))]
"TARGET_MMX_WITH_SSE"
{
@@ -4322,8 +4322,8 @@
(define_expand "vcond_mask_<mode><mode>"
[(set (match_operand:VI_16_32 0 "register_operand")
(vec_merge:VI_16_32
- (match_operand:VI_16_32 1 "register_operand")
- (match_operand:VI_16_32 2 "register_operand")
+ (match_operand:VI_16_32 1 "vector_or_const_vector_operand")
+ (match_operand:VI_16_32 2 "vector_or_const_vector_operand")
(match_operand:VI_16_32 3 "register_operand")))]
"TARGET_SSE2"
{
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index cfe7a046f42..2edc86d18ba 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5493,8 +5493,8 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VI_256_AVX2 0 "register_operand")
(vec_merge:VI_256_AVX2
- (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand")
- (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
+ (match_operand:VI_256_AVX2 1 "vector_or_const_vector_operand")
+ (match_operand:VI_256_AVX2 2 "vector_or_const_vector_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_AVX"
{
@@ -5506,8 +5506,8 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VI_128 0 "register_operand")
(vec_merge:VI_128
- (match_operand:VI_128 1 "vector_or_0_or_1s_operand")
- (match_operand:VI_128 2 "nonimm_or_0_operand")
+ (match_operand:VI_128 1 "vector_or_const_vector_operand")
+ (match_operand:VI_128 2 "vector_or_const_vector_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_SSE2"
{
@@ -5519,8 +5519,8 @@
(define_expand "vcond_mask_v1tiv1ti"
[(set (match_operand:V1TI 0 "register_operand")
(vec_merge:V1TI
- (match_operand:V1TI 1 "vector_or_0_or_1s_operand")
- (match_operand:V1TI 2 "nonimm_or_0_operand")
+ (match_operand:V1TI 1 "vector_or_const_vector_operand")
+ (match_operand:V1TI 2 "vector_or_const_vector_operand")
(match_operand:V1TI 3 "register_operand")))]
"TARGET_SSE2"
{
@@ -5532,8 +5532,8 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VF_256 0 "register_operand")
(vec_merge:VF_256
- (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand")
- (match_operand:VF_256 2 "nonimm_or_0_operand")
+ (match_operand:VF_256 1 "vector_or_const_vector_operand")
+ (match_operand:VF_256 2 "vector_or_const_vector_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_AVX"
{
@@ -5545,8 +5545,8 @@
(define_expand "vcond_mask_<mode><sseintvecmodelower>"
[(set (match_operand:VF_128 0 "register_operand")
(vec_merge:VF_128
- (match_operand:VF_128 1 "vector_or_0_or_1s_operand")
- (match_operand:VF_128 2 "nonimm_or_0_operand")
+ (match_operand:VF_128 1 "vector_or_const_vector_operand")
+ (match_operand:VF_128 2 "vector_or_const_vector_operand")
(match_operand:<sseintvecmode> 3 "register_operand")))]
"TARGET_SSE"
{
diff --git a/gcc/testsuite/gcc.target/i386/pr123238-2.c
b/gcc/testsuite/gcc.target/i386/pr123238-2.c
new file mode 100644
index 00000000000..6cf5b3e35c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr123238-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+void f(char c[])
+{
+ for (int i = 0; i < 8; i++)
+ c[i] = c[i] ? 'a' : 'c';
+}
+
+/* { dg-final { scan-assembler-not "pandn" } } */