https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81366

            Bug ID: 81366
           Summary: pragma omp simd reduce(max:m) not vectorizing
           Product: gcc
           Version: 8.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ryan.burn at gmail dot com
  Target Milestone: ---

Compiling this code:

###########################################################
double max(double* x, int n) {
  double m = 0;
  int i;
#pragma omp simd linear (i) reduction(max:m)
  for (i=0; i<n; ++i) 
    m = std::max(x[i], m);
  return m;
}
###########################################################

with g++ -fopenmp-simd -march=haswell -O3 -S main.cpp

g++ -v
Using built-in specs.
COLLECT_GCC=/usr/local/bin/g++
COLLECT_LTO_WRAPPER=/usr/local/libexec/gcc/x86_64-apple-darwin15.0.0/8.0.0/lto-wrapper
Target: x86_64-apple-darwin15.0.0
Configured with: ../gcc/configure --disable-multilib --enable-languages=c++
--with-gmp=/opt/local --with-libiconv-prefix=/opt/local
Thread model: posix
gcc version 8.0.0 20170610 (experimental) (GCC)

Produces the below unvectorized assembly. Similar code with "+" will work.

############################################################
LFE1116:
        .align 4,0x90
        .globl __Z3maxPdi
__Z3maxPdi:
LFB1117:
        leaq    8(%rsp), %r10
LCFI6:
        andq    $-32, %rsp
        pushq   -8(%r10)
        pushq   %rbp
LCFI7:
        movq    %rsp, %rbp
        pushq   %r10
LCFI8:
        vmovsd  lC1(%rip), %xmm1
        vmovsd  %xmm1, -48(%rbp)
        testl   %esi, %esi
        jle     L13
        leal    -1(%rsi), %eax
        leaq    8(%rdi,%rax,8), %rax
        .align 4,0x90
L14:
        vmovsd  (%rdi), %xmm0
        vucomisd        %xmm0, %xmm1
        jbe     L20
        addq    $8, %rdi
        cmpq    %rax, %rdi
        jne     L14
L13:
        vmovsd  -48(%rbp), %xmm2
        vmaxsd  lC0(%rip), %xmm2, %xmm0
        popq    %r10
LCFI9:
        popq    %rbp
        leaq    -8(%r10), %rsp
LCFI10:
        ret
        .align 4,0x90
L20:
LCFI11:
        addq    $8, %rdi
        vmovsd  %xmm0, -48(%rbp)
        cmpq    %rax, %rdi
        je      L13
        vmovapd %xmm0, %xmm1
        jmp     L14
############################################################

Reply via email to