This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 7b34a67f2c [SYSTEMDS-3896] Leverage SIMD Vector API for Counting NNZ
7b34a67f2c is described below

commit 7b34a67f2cd220bb4451385e872ce6a1b0940421
Author: Matthias Boehm <mboe...@gmail.com>
AuthorDate: Wed Jul 16 10:44:57 2025 +0200

    [SYSTEMDS-3896] Leverage SIMD Vector API for Counting NNZ
    
    This patch leverages the new Vector API for the core primitive of
    counting the number of non-zeros (which is still single-threaded
    because usually done for chunks as part of multi-threaded tasks).
    
    For single-threaded computeNnz on an 8GB dense matrix after JIT
    compilation, this patch improved performance from 1100ms to 850ms.
---
 .../apache/sysds/runtime/util/UtilFunctions.java   | 29 +++++++++-------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java 
b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
index f233b69a11..4f0a15d78f 100644
--- a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
@@ -58,9 +58,15 @@ import org.apache.sysds.runtime.matrix.data.Pair;
 import org.apache.sysds.runtime.meta.TensorCharacteristics;
 import org.apache.sysds.runtime.transform.encode.ColumnEncoderRecode;
 
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.VectorSpecies;
+
 public class UtilFunctions {
        protected static final Log LOG = 
LogFactory.getLog(UtilFunctions.class.getName());
+       private static final VectorSpecies<Double> SPECIES = 
DoubleVector.SPECIES_PREFERRED;
+       private static final int vLen = SPECIES.length();
 
+       
        private UtilFunctions(){
                // empty private constructor
                // making all calls static
@@ -876,25 +882,14 @@ public class UtilFunctions {
        public static int computeNnz(final double[] a, final int ai, final int 
len) {
                int lnnz = 0;
                final int end = ai + len;
-               final int h = (end - ai) % 8;
+               final int rest = (end - ai) % vLen;
 
-               for(int i = ai; i < ai + h; i++)
+               for(int i = ai; i < ai + rest; i++)
                        lnnz += (a[i] != 0.0) ? 1 : 0;
-               for(int i = ai + h; i < end; i += 8)
-                       lnnz += computeNnzBy8(a, i);
-               return lnnz;
-       }
-
-       private static int computeNnzBy8(final double[] a, final int i) {
-               int lnnz = 0;
-               lnnz += (a[i] != 0.0) ? 1 : 0;
-               lnnz += (a[i+1] != 0.0) ? 1 : 0;
-               lnnz += (a[i+2] != 0.0) ? 1 : 0;
-               lnnz += (a[i+3] != 0.0) ? 1 : 0;
-               lnnz += (a[i+4] != 0.0) ? 1 : 0;
-               lnnz += (a[i+5] != 0.0) ? 1 : 0;
-               lnnz += (a[i+6] != 0.0) ? 1 : 0;
-               lnnz += (a[i+7] != 0.0) ? 1 : 0;
+               for(int i = ai + rest; i < end; i += 8) {
+                       DoubleVector aVec = DoubleVector.fromArray(SPECIES, a, 
i);
+                       lnnz += vLen-aVec.eq(0).trueCount();
+               }
                return lnnz;
        }
 

Reply via email to