This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 64d5022 [MINOR] Performance replace operations (w/ pattern,
replacement)
64d5022 is described below
commit 64d5022df1c4a75931a072ef7ce73dc322a393f0
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Aug 15 15:09:35 2020 +0200
[MINOR] Performance replace operations (w/ pattern, replacement)
As more and more pre-processing techniques use replace operations for
robustness against NaN and other special values, this patch makes a
performance improvement for special cases. So far we always allocated
the output and copied values with on-the-fly replacement of the pattern
and awareness for NaNs. Now, we first probe with (early abort) if the
matrix contains the pattern and only if this is the case allocate and
copy the values; otherwise the we simply return the input.
---
.../sysds/runtime/matrix/data/MatrixBlock.java | 72 +++++++++++++++-------
1 file changed, 49 insertions(+), 23 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index fe2a7bd..a879de3 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -636,6 +636,39 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
return denseBlock.get(r, c);
}
+ public boolean containsValue(double pattern) {
+ if(isEmptyBlock(true))
+ return pattern==0;
+
+ //make a pass over the data to determine if it includes the
+ //pattern, with early abort as soon as the pattern is found
+ boolean NaNpattern = Double.isNaN(pattern);
+ if( isInSparseFormat() ) {
+ if( nonZeros < getLength() && pattern == 0 )
+ return true;
+ SparseBlock sb = getSparseBlock();
+ for(int i=0; i<rlen; i++) {
+ if( sb.isEmpty(i) ) continue;
+ int apos = sb.pos(i);
+ int alen = sb.size(i);
+ double[] avals = sb.values(i);
+ for( int j=apos; j<apos+alen; j++ )
+ if(avals[j]==pattern || (NaNpattern &&
Double.isNaN(avals[j])))
+ return true;
+ }
+ }
+ else {
+ DenseBlock db = getDenseBlock();
+ for(int i=0; i<rlen; i++) {
+ double[] vals = db.values(i);
+ for(int j=0; j<clen; j++)
+ if(vals[j]==pattern || (NaNpattern &&
Double.isNaN(vals[j])))
+ return true;
+ }
+ }
+ return false;
+ }
+
/**
* Append value is only used when values are appended at the end of
each row for the sparse representation
* This can only be called, when the caller knows the access pattern of
the block
@@ -5004,10 +5037,13 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
MatrixBlock ret = checkType(result);
examSparsity(); //ensure its in the right format
ret.reset(rlen, clen, sparse);
+ //probe early abort conditions
if( nonZeros == 0 && pattern != 0 )
- return ret; //early abort
- boolean NaNpattern = Double.isNaN(pattern);
+ return ret;
+ if( !containsValue(pattern) )
+ return this; //avoid allocation + copy
+ boolean NaNpattern = Double.isNaN(pattern);
if( sparse ) //SPARSE
{
if( pattern != 0d ) //SPARSE <- SPARSE (sparse-safe)
@@ -5017,15 +5053,13 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
SparseBlock c = ret.sparseBlock;
for( int i=0; i<rlen; i++ ) {
- if( !a.isEmpty(i) )
- {
+ if( !a.isEmpty(i) ) {
c.allocate(i);
int apos = a.pos(i);
int alen = a.size(i);
int[] aix = a.indexes(i);
double[] avals = a.values(i);
- for( int j=apos; j<apos+alen;
j++ )
- {
+ for( int j=apos; j<apos+alen;
j++ ) {
double val = avals[j];
if( val== pattern ||
(NaNpattern && Double.isNaN(val)) )
c.append(i,
aix[j], replacement);
@@ -5038,19 +5072,17 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
else //DENSE <- SPARSE
{
ret.sparse = false;
- ret.allocateDenseBlock();
+ ret.allocateDenseBlock();
SparseBlock a = sparseBlock;
double[] c = ret.getDenseBlockValues();
//initialize with replacement (since all 0
values, see SPARSITY_TURN_POINT)
- Arrays.fill(c, replacement);
+ Arrays.fill(c, replacement);
//overwrite with existing values (via scatter)
if( a != null ) //check for empty matrix
- for( int i=0, cix=0; i<rlen; i++,
cix+=clen )
- {
- if( !a.isEmpty(i) )
- {
+ for( int i=0, cix=0; i<rlen; i++,
cix+=clen ) {
+ if( !a.isEmpty(i) ) {
int apos = a.pos(i);
int alen = a.size(i);
int[] aix =
a.indexes(i);
@@ -5060,22 +5092,16 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock, Externalizab
c[
cix+aix[j] ] = avals[ j ];
}
}
- }
+ }
}
- else //DENSE <- DENSE
- {
+ else { //DENSE <- DENSE
int mn = ret.rlen * ret.clen;
ret.allocateDenseBlock();
double[] a = getDenseBlockValues();
double[] c = ret.getDenseBlockValues();
-
- for( int i=0; i<mn; i++ )
- {
- double val = a[i];
- if( val== pattern || (NaNpattern &&
Double.isNaN(val)) )
- c[i] = replacement;
- else
- c[i] = a[i];
+ for( int i=0; i<mn; i++ ) {
+ c[i] = ( a[i]== pattern || (NaNpattern &&
Double.isNaN(a[i])) ) ?
+ replacement : a[i];
}
}