This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new 2c737bc1b8 [SYSTEMDS-3896] Improved SIMD Vectorized Counting NNZ 2c737bc1b8 is described below commit 2c737bc1b894b88eed684ced55c65be1c777aba5 Author: Matthias Boehm <mboe...@gmail.com> AuthorDate: Wed Jul 16 11:18:27 2025 +0200 [SYSTEMDS-3896] Improved SIMD Vectorized Counting NNZ This patch makes an additional performance improvement which further reduced the runtime on an 8GB matrix from 850ms to 770ms (non-vectorized 1100) by avoiding unnecessary scalar ops. Furthermore, we fix the hard-coded AVX512 vector size to the general vector length (which failed on non-Intel hardware in gitactions). --- src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java index 4f0a15d78f..3fd1dfd1a3 100644 --- a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java +++ b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java @@ -880,15 +880,17 @@ public class UtilFunctions { } public static int computeNnz(final double[] a, final int ai, final int len) { - int lnnz = 0; final int end = ai + len; final int rest = (end - ai) % vLen; + int lnnz = len; + //start from len and subtract number of zeros because + //DoubleVector defines an eq but no neq operation for(int i = ai; i < ai + rest; i++) - lnnz += (a[i] != 0.0) ? 1 : 0; - for(int i = ai + rest; i < end; i += 8) { + lnnz -= (a[i] == 0.0) ? 1 : 0; + for(int i = ai + rest; i < end; i += vLen) { DoubleVector aVec = DoubleVector.fromArray(SPECIES, a, i); - lnnz += vLen-aVec.eq(0).trueCount(); + lnnz -= aVec.eq(0).trueCount(); } return lnnz; }