I'm curious why this issue comes up for you. The code was unrelated to --with-avx512-kernels=0 option.
Its relying on __AVX512F__and PETSC_HAVE_IMMINTRIN_H flags. And assumes immintrin.h has a definition for _mm512_reduce_add_pd() Is the flag __AVX512F__ always set on your machine by gcc? And does this change based on the hardware? I just tried this build [same os/compiler] on "Intel(R) Xeon(R) Gold 6130 CPU @ 2.10GHz" - and can't reproduce the issue. I do see _mm512_reduce_add_pd is missing from immintrin.h - but the flag __AVX512F__ is not set for me. Satish ---------- balay@compute004^~/junk $ cat xyz.c #if defined __AVX512F__ #error "avx512f flag set" #endif #include <immintrin.h> balay@compute004^~/junk $ gcc xyz.c -c balay@compute004^~/junk $ gcc xyz.c -E |grep _mm512_reduce_add_pd balay@compute004^~/junk $ gcc --version gcc (Ubuntu 5.4.0-6ubuntu1~16.04.10) 5.4.0 20160609 On Fri, 25 Oct 2019, Lisandro Dalcin via petsc-dev wrote: > Yes, that should work, I coded it myself to workaround the issue, but you > were faster than me. Thanks! > > On Thu, 24 Oct 2019 at 22:00, Zhang, Hong <[email protected]> wrote: > > > Hi Lisandro, > > > > Can you please check if the following patch fixes the problem? I will > > create a MR. > > > > diff --git a/src/mat/impls/aij/seq/aijperm/aijperm.c > > b/src/mat/impls/aij/seq/aijperm/aijperm.c > > index 577dfc6713..568535117a 100644 > > --- a/src/mat/impls/aij/seq/aijperm/aijperm.c > > +++ b/src/mat/impls/aij/seq/aijperm/aijperm.c > > @@ -12,7 +12,7 @@ > > > > #include <../src/mat/impls/aij/seq/aij.h> > > > > -#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && > > defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && > > !defined(PETSC_USE_64BIT_INDICES) > > +#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) > > && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && > > !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && > > !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND) > > #include <immintrin.h> > > > > #if !defined(_MM_SCALE_8) > > @@ -301,7 +301,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy) > > #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && > > defined(notworking)) > > PetscInt i,j; > > #endif > > -#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && > > defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && > > !defined(PETSC_USE_64BIT_INDICES) > > +#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) > > && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && > > !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && > > !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND) > > __m512d vec_x,vec_y,vec_vals; > > __m256i vec_idx,vec_ipos,vec_j; > > __mmask8 mask; > > @@ -401,7 +401,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy) > > #pragma _CRI prefervector > > #endif > > > > -#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && > > defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && > > !defined(PETSC_USE_64BIT_INDICES) > > +#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) > > && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && > > !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && > > !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND) > > vec_y = _mm512_setzero_pd(); > > ipos = ip[i]; > > for (j=0; j<(nz>>3); j++) { > > @@ -436,7 +436,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy) > > * worthwhile to vectorize across the rows, that is, to do the > > * matvec by operating with "columns" of the chunk. */ > > for (j=0; j<nz; j++) { > > -#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) && > > defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && > > !defined(PETSC_USE_64BIT_INDICES) > > +#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) > > && defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) && > > !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) && > > !defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND) > > vec_j = _mm256_set1_epi32(j); > > for (i=0; i<((isize>>3)<<3); i+=8) { > > vec_y = _mm512_loadu_pd(&yp[i]); > > > > > > Thanks, > > Hong > > > > On Oct 24, 2019, at 2:47 PM, Lisandro Dalcin via petsc-dev < > > [email protected]> wrote: > > > > This is with master, but I bet the issue is also in maint. > > > > * Running on Ubuntu 16 > > > > $ uname -a > > Linux flamingo 4.4.0-104-generic #127-Ubuntu SMP Mon Dec 11 12:16:42 UTC > > 2017 x86_64 x86_64 x86_64 GNU/Linux > > > > * With system gcc 5.4 > > > > $ mpicc -show > > /usr/bin/gcc-5 > > -I/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/include > > -L/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/lib > > -Wl,-rpath > > -Wl,/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/lib > > -lmpi > > > > $ mpicc --version > > gcc-5 (Ubuntu 5.4.0-6ubuntu1~16.04.11) 5.4.0 20160609 > > Copyright (C) 2015 Free Software Foundation, Inc. > > This is free software; see the source for copying conditions. There is NO > > warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. > > > > * PETSc configured to NOT USE AVX512 kernels > > > > $ grep avx arch-gnu-opt/lib/petsc/conf/reconfigure-arch-gnu-opt.py > > '--with-avx512-kernels=0', > > > > * Bang! > > > > $ touch src/mat/impls/aij/seq/aijperm/aijperm.c > > $ make -f gmakefile > > Use "/usr/bin/make V=1" to see verbose compile lines, "/usr/bin/make V=0" > > to suppress. > > CC arch-gnu-opt/obj/mat/impls/aij/seq/aijperm/aijperm.o > > /home/dalcin/Devel/petsc/src/mat/impls/aij/seq/aijperm/aijperm.c: In > > function ‘MatMult_SeqAIJPERM’: > > /home/dalcin/Devel/petsc/src/mat/impls/aij/seq/aijperm/aijperm.c:426:22: > > warning: implicit declaration of function ‘_mm512_reduce_add_pd’ > > [-Wimplicit-function-declaration] > > yp[i] += _mm512_reduce_add_pd(vec_y); > > > > > > -- > > Lisandro Dalcin > > ============ > > Research Scientist > > Extreme Computing Research Center (ECRC) > > King Abdullah University of Science and Technology (KAUST) > > http://ecrc.kaust.edu.sa/ > > > > > > > >
