This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new 7b34a67f2c [SYSTEMDS-3896] Leverage SIMD Vector API for Counting NNZ 7b34a67f2c is described below commit 7b34a67f2cd220bb4451385e872ce6a1b0940421 Author: Matthias Boehm <mboe...@gmail.com> AuthorDate: Wed Jul 16 10:44:57 2025 +0200 [SYSTEMDS-3896] Leverage SIMD Vector API for Counting NNZ This patch leverages the new Vector API for the core primitive of counting the number of non-zeros (which is still single-threaded because usually done for chunks as part of multi-threaded tasks). For single-threaded computeNnz on an 8GB dense matrix after JIT compilation, this patch improved performance from 1100ms to 850ms. --- .../apache/sysds/runtime/util/UtilFunctions.java | 29 +++++++++------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java index f233b69a11..4f0a15d78f 100644 --- a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java +++ b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java @@ -58,9 +58,15 @@ import org.apache.sysds.runtime.matrix.data.Pair; import org.apache.sysds.runtime.meta.TensorCharacteristics; import org.apache.sysds.runtime.transform.encode.ColumnEncoderRecode; +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.VectorSpecies; + public class UtilFunctions { protected static final Log LOG = LogFactory.getLog(UtilFunctions.class.getName()); + private static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED; + private static final int vLen = SPECIES.length(); + private UtilFunctions(){ // empty private constructor // making all calls static @@ -876,25 +882,14 @@ public class UtilFunctions { public static int computeNnz(final double[] a, final int ai, final int len) { int lnnz = 0; final int end = ai + len; - final int h = (end - ai) % 8; + final int rest = (end - ai) % vLen; - for(int i = ai; i < ai + h; i++) + for(int i = ai; i < ai + rest; i++) lnnz += (a[i] != 0.0) ? 1 : 0; - for(int i = ai + h; i < end; i += 8) - lnnz += computeNnzBy8(a, i); - return lnnz; - } - - private static int computeNnzBy8(final double[] a, final int i) { - int lnnz = 0; - lnnz += (a[i] != 0.0) ? 1 : 0; - lnnz += (a[i+1] != 0.0) ? 1 : 0; - lnnz += (a[i+2] != 0.0) ? 1 : 0; - lnnz += (a[i+3] != 0.0) ? 1 : 0; - lnnz += (a[i+4] != 0.0) ? 1 : 0; - lnnz += (a[i+5] != 0.0) ? 1 : 0; - lnnz += (a[i+6] != 0.0) ? 1 : 0; - lnnz += (a[i+7] != 0.0) ? 1 : 0; + for(int i = ai + rest; i < end; i += 8) { + DoubleVector aVec = DoubleVector.fromArray(SPECIES, a, i); + lnnz += vLen-aVec.eq(0).trueCount(); + } return lnnz; }