This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 6e318c865e [MINOR] Improved performance dense/sparse isNaN (e.g., for 
CSR)
6e318c865e is described below

commit 6e318c865e2d1c2e85a01fee4d3ae849c17aa212
Author: Matthias Boehm <[email protected]>
AuthorDate: Tue Oct 25 14:13:21 2022 -0400

    [MINOR] Improved performance dense/sparse isNaN (e.g., for CSR)
    
    This patch improves the performance of isNaN operations, which return
    an empty matrix is the input matrix does contain any NaNs. So far,
    we had generic dense and sparse checks that accessed individual rows.
    Now, we specialized for CSR, where the single value array can be scanned
    in a tight loop, and similarly for dense scan entire underlying blocks.
    
    On a 1M subset of criteo in one-hot encoded form (ultra-sparse), this
    patch improved performance from 210-300ms to 105-140ms on the first call
    (interpreted) and from 5.45 to 5.1s for 100 calls (JIT compiled).
---
 .../org/apache/sysds/runtime/data/DenseBlock.java  | 20 ++++++++++++
 .../org/apache/sysds/runtime/data/SparseBlock.java | 22 +++++++++++++
 .../apache/sysds/runtime/data/SparseBlockCSR.java  | 11 +++++++
 .../sysds/runtime/matrix/data/MatrixBlock.java     | 38 +++++-----------------
 4 files changed, 62 insertions(+), 29 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java 
b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
index 5a08fb4896..0b7158cf81 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
@@ -643,6 +643,26 @@ public abstract class DenseBlock implements Serializable
         */
        public abstract long getLong(int[] ix);
 
+       /** 
+        * Checks if the block contains at least one value of the given
+        * pattern. Implementations need to handle NaN patterns as well
+        * (note that NaN==NaN yields false).
+        * 
+        * @param pattern checked pattern
+        * @return true if pattern appears at least once, otherwise false
+        */
+       public boolean contains(double pattern) {
+               boolean NaNpattern = Double.isNaN(pattern);
+               for(int i=0; i<numBlocks(); i++) {
+                       double[] vals = valuesAt(i);
+                       int len = size(i);
+                       for(int j=0; j<len; j++)
+                               if(vals[j]==pattern || (NaNpattern && 
Double.isNaN(vals[j])))
+                                       return true;
+               }
+               return false;
+       }
+       
        @Override
        public String toString() {
                StringBuilder sb = new StringBuilder();
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
index 60b34b9e78..af43124cc6 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
@@ -467,6 +467,28 @@ public abstract class SparseBlock implements Serializable
         */
        public abstract int posFIndexGT(int r, int c);
        
+       /** 
+        * Checks if the block contains at least one value of the given
+        * pattern. Implementations need to handle NaN patterns as well
+        * (note that NaN==NaN yields false).
+        * 
+        * @param pattern checked pattern
+        * @return true if pattern appears at least once, otherwise false
+        */
+       public boolean contains(double pattern) {
+               boolean NaNpattern = Double.isNaN(pattern);
+               int rlen = numRows();
+               for(int i=0; i<rlen; i++) {
+                       if( isEmpty(i) ) continue;
+                       int apos = pos(i);
+                       int alen = size(i);
+                       double[] avals = values(i);
+                       for( int j=apos; j<apos+alen; j++ )
+                               if(avals[j]==pattern || (NaNpattern && 
Double.isNaN(avals[j])))
+                                       return true;
+               }
+               return false;
+       }
        
        ////////////////////////
        //iterators
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
index 474c43694b..c33420f9f4 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
@@ -954,6 +954,17 @@ public class SparseBlockCSR extends SparseBlock
 
                return true;
        }
+       
+       @Override //specialized for CSR
+       public boolean contains(double pattern) {
+               boolean NaNpattern = Double.isNaN(pattern);
+               double[] vals = _values;
+               int len = _size;
+               for(int i=0; i<len; i++)
+                       if(vals[i]==pattern || (NaNpattern && 
Double.isNaN(vals[i])))
+                               return true;
+               return false;
+       }
 
        ///////////////////////////
        // private helper methods
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index 6c8c6c9953..07c8f3031f 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -694,30 +694,9 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                
                //make a pass over the data to determine if it includes the
                //pattern, with early abort as soon as the pattern is found
-               boolean NaNpattern = Double.isNaN(pattern);
-               if( isInSparseFormat() ) {
-                       SparseBlock sb = getSparseBlock();
-                       for(int i=0; i<rlen; i++) {
-                               if( sb.isEmpty(i) ) continue;
-                               int apos = sb.pos(i);
-                               int alen = sb.size(i);
-                               double[] avals = sb.values(i);
-                               for( int j=apos; j<apos+alen; j++ )
-                                       if(avals[j]==pattern || (NaNpattern && 
Double.isNaN(avals[j])))
-                                               return true;
-                       }
-               }
-               else {
-                       DenseBlock db = getDenseBlock();
-                       for(int i=0; i<rlen; i++) {
-                               double[] vals = db.values(i);
-                               int pos = db.pos(i);
-                               for(int j=pos; j<pos+clen; j++)
-                                       if(vals[j]==pattern || (NaNpattern && 
Double.isNaN(vals[j])))
-                                               return true;
-                       }
-               }
-               return false;
+               return isInSparseFormat() ?
+                       getSparseBlock().contains(pattern) :
+                       getDenseBlock().contains(pattern);
        }
        
        /**
@@ -2839,17 +2818,18 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                // by default, we guess result.sparsity=input.sparsity, unless 
not sparse safe
                boolean sp = this.sparse && op.sparseSafe;
                
+               //early abort for comparisons w/ special values
+               if( Builtin.isBuiltinCode(op.fn, BuiltinCode.ISNAN, 
BuiltinCode.ISNA))
+                       if( !containsValue(op.getPattern()) ) {
+                               return new MatrixBlock(rlen, clen, true); 
//avoid unnecessary allocation
+                       }
+               
                //allocate output
                int n = Builtin.isBuiltinCode(op.fn, BuiltinCode.CUMSUMPROD) ? 
1 : clen;
                if( ret == null )
                        ret = new MatrixBlock(rlen, n, sp, sp ? nonZeros : 
rlen*n);
                else
                        ret.reset(rlen, n, sp);
-
-               //early abort for comparisons w/ special values
-               if( Builtin.isBuiltinCode(op.fn, BuiltinCode.ISNAN, 
BuiltinCode.ISNA))
-                       if( !containsValue(op.getPattern()) )
-                               return ret; //avoid unnecessary allocation
                
                //core execute
                if( LibMatrixAgg.isSupportedUnaryOperator(op) ) {

Reply via email to