Hi Lisandro,
Can you please check if the following patch fixes the problem? I will create a
MR.
diff --git a/src/mat/impls/aij/seq/aijperm/aijperm.c
b/src/mat/impls/aij/seq/aijperm/aijperm.c
index 577dfc6713..568535117a 100644
--- a/src/mat/impls/aij/seq/aijperm/aijperm.c
+++ b/src/mat/impls/aij/seq/aijperm/aijperm.c
@@ -12,7 +12,7 @@
#include <../src/mat/impls/aij/seq/aij.h>
-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) &&
defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) &&
!defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) &&
defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) &&
!defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) &&
!defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
#include <immintrin.h>
#if !defined(_MM_SCALE_8)
@@ -301,7 +301,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)
#if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJPERM) && defined(notworking))
PetscInt i,j;
#endif
-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) &&
defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) &&
!defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) &&
defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) &&
!defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) &&
!defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
__m512d vec_x,vec_y,vec_vals;
__m256i vec_idx,vec_ipos,vec_j;
__mmask8 mask;
@@ -401,7 +401,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)
#pragma _CRI prefervector
#endif
-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) &&
defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) &&
!defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) &&
defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) &&
!defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) &&
!defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
vec_y = _mm512_setzero_pd();
ipos = ip[i];
for (j=0; j<(nz>>3); j++) {
@@ -436,7 +436,7 @@ PetscErrorCode MatMult_SeqAIJPERM(Mat A,Vec xx,Vec yy)
* worthwhile to vectorize across the rows, that is, to do the
* matvec by operating with "columns" of the chunk. */
for (j=0; j<nz; j++) {
-#if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX512F__) &&
defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) &&
!defined(PETSC_USE_64BIT_INDICES)
+#if defined(PETSC_USE_AVX512_KERNELS) && defined(PETSC_HAVE_IMMINTRIN_H) &&
defined(__AVX512F__) && defined(PETSC_USE_REAL_DOUBLE) &&
!defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) &&
!defined(PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND)
vec_j = _mm256_set1_epi32(j);
for (i=0; i<((isize>>3)<<3); i+=8) {
vec_y = _mm512_loadu_pd(&yp[i]);
Thanks,
Hong
On Oct 24, 2019, at 2:47 PM, Lisandro Dalcin via petsc-dev
<[email protected]<mailto:[email protected]>> wrote:
This is with master, but I bet the issue is also in maint.
* Running on Ubuntu 16
$ uname -a
Linux flamingo 4.4.0-104-generic #127-Ubuntu SMP Mon Dec 11 12:16:42 UTC 2017
x86_64 x86_64 x86_64 GNU/Linux
* With system gcc 5.4
$ mpicc -show
/usr/bin/gcc-5
-I/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/include
-L/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/lib
-Wl,-rpath
-Wl,/sw/workstations/apps/linux-ubuntu16.04-x86_64/mpich/3.3.1/gcc-5.4.0/nvejoe25snmak6a7fnjghabxjukjkuiu/lib
-lmpi
$ mpicc --version
gcc-5 (Ubuntu 5.4.0-6ubuntu1~16.04.11) 5.4.0 20160609
Copyright (C) 2015 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* PETSc configured to NOT USE AVX512 kernels
$ grep avx arch-gnu-opt/lib/petsc/conf/reconfigure-arch-gnu-opt.py
'--with-avx512-kernels=0',
* Bang!
$ touch src/mat/impls/aij/seq/aijperm/aijperm.c
$ make -f gmakefile
Use "/usr/bin/make V=1" to see verbose compile lines, "/usr/bin/make V=0" to
suppress.
CC arch-gnu-opt/obj/mat/impls/aij/seq/aijperm/aijperm.o
/home/dalcin/Devel/petsc/src/mat/impls/aij/seq/aijperm/aijperm.c: In function
‘MatMult_SeqAIJPERM’:
/home/dalcin/Devel/petsc/src/mat/impls/aij/seq/aijperm/aijperm.c:426:22:
warning: implicit declaration of function ‘_mm512_reduce_add_pd’
[-Wimplicit-function-declaration]
yp[i] += _mm512_reduce_add_pd(vec_y);
--
Lisandro Dalcin
============
Research Scientist
Extreme Computing Research Center (ECRC)
King Abdullah University of Science and Technology (KAUST)
http://ecrc.kaust.edu.sa/