https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113345

            Bug ID: 113345
           Summary: miss optimization for psign{b,w,d}.
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: liuhongt at gcc dot gnu.org
  Target Milestone: ---

void
foo (short* __restrict a, short* b, short* c)
{
    for (int i = 0; i != 1000; i++)
      {
        a[i] = c[i] < 0 ? -b[i] : b[i];
      }
}

gcc -O2 -mavx2

foo(char*, char*, char*):
  xorl %eax, %eax
  vpxor %xmm2, %xmm2, %xmm2
.L2:
  vmovq (%rsi,%rax), %xmm0
  vmovq (%rdx,%rax), %xmm1
  vpsubb %xmm0, %xmm2, %xmm3
  vpcmpgtb %xmm1, %xmm2, %xmm1
  vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
  vmovq %xmm0, (%rdi,%rax)
  addq $8, %rax
  cmpq $1000, %rax
  jne .L2
  ret

it can be optimized with psignw.


22115(define_insn "<ssse3_avx2>_psign<mode>3"
22116  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
22117        (unspec:VI124_AVX2
22118          [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
22119           (match_operand:VI124_AVX2 2 "vector_operand" "xja,xjm")]
22120          UNSPEC_PSIGN))]


maybe we can just refactor the pattern as blow, then combine can generate the
pattern for us.

22115(define_insn "<ssse3_avx2>_psign<mode>3"
22116  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
22117        (unspec:VI124_AVX2
22118          [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
                (neg:VI124:(match_dup 1)
22119           (match_operand:VI124_AVX2 2 "vector_operand" "xja,xjm")]
22120          UNSPEC_PBLENDV))]

Reply via email to