Repository: incubator-systemml Updated Branches: refs/heads/master 6c215e700 -> b0a2022e9
[SYSTEMML-1591] Performance sparse-unsafe cellwise codegen operators So far, sparse-unsafe cellwise codegen operations iterated over all cells and used binary search to access the individual values of sparse matrices, which was unnecessarily inefficient. This patch modified all sparse-unsafe cellwise operations (with and without aggregations) to use a sequential scan with gap handling and consolidated the different code paths for sparse-safe and -unsafe operations over sparse matrices. Furthermore, this patch also fixes issues where sparse-safe operations passed the wrong column indexes to generated operators. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/b0a2022e Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/b0a2022e Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/b0a2022e Branch: refs/heads/master Commit: b0a2022e9fa7653fe56816edf4103d7f366f4d69 Parents: 6c215e7 Author: Matthias Boehm <mboe...@gmail.com> Authored: Sun May 7 22:27:20 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Sun May 7 22:31:29 2017 -0700 ---------------------------------------------------------------------- .../sysml/runtime/codegen/SpoofCellwise.java | 209 +++++++++++-------- 1 file changed, 124 insertions(+), 85 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b0a2022e/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java index 12d14ad..b01914a 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java @@ -363,6 +363,9 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl private double executeSparseAndAgg(SparseBlock sblock, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { + if( sparseSafe && sblock == null ) + return 0; + ValueFunction vfun = getAggFunction(); double ret = 0; @@ -371,22 +374,30 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl KahanObject kbuff = new KahanObject(0, 0); KahanFunction kplus = (KahanFunction) vfun; - if( !sparseSafe ) { - for(int i=rl; i<ru; i++) - for(int j=0; j<n; j++) { - double valij = (sblock != null) ? sblock.get(i, j) : 0; - kplus.execute2( kbuff, genexec(valij, b, scalars, m, n, i, j)); + //note: sequential scan algorithm for both sparse-safe and -unsafe + //in order to avoid binary search for sparse-unsafe + for(int i=rl; i<ru; i++) { + int lastj = -1; + //handle non-empty rows + if( sblock != null && !sblock.isEmpty(i) ) { + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + for(int k=apos; k<apos+alen; k++) { + //process zeros before current non-zero + if( !sparseSafe ) + for(int j=lastj+1; j<aix[k]; j++) + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + //process current non-zero + lastj = aix[k]; + kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj)); } - } - else if( sblock != null ) { - for( int i=rl; i<ru; i++ ) - if( !sblock.isEmpty(i) ) { - int apos = sblock.pos(i); - int alen = sblock.size(i); - double[] avals = sblock.values(i); - for( int j=apos; j<apos+alen; j++ ) - kplus.execute2( kbuff, genexec(avals[j], b, scalars, m, n, i, j)); - } + } + //process empty rows or remaining zeros + if( !sparseSafe ) + for(int j=lastj+1; j<n; j++) + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); } ret = kbuff._sum; } @@ -394,24 +405,34 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl //note: sparse safe with zero value as min/max handled outside else { ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; - if( !sparseSafe ) { - for(int i=rl; i<ru; i++) - for(int j=0; j<n; j++) { - double valij = (sblock != null) ? sblock.get(i, j) : 0; - ret = vfun.execute( ret, genexec(valij, b, scalars, m, n, i, j)); + ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret; + + //note: sequential scan algorithm for both sparse-safe and -unsafe + //in order to avoid binary search for sparse-unsafe + for(int i=rl; i<ru; i++) { + int lastj = -1; + //handle non-empty rows + if( sblock != null && !sblock.isEmpty(i) ) { + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + for(int k=apos; k<apos+alen; k++) { + //process zeros before current non-zero + if( !sparseSafe ) + for(int j=lastj+1; j<aix[k]; j++) + ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j)); + //process current non-zero + lastj = aix[k]; + ret = vfun.execute(ret, genexec(avals[k], b, scalars, m, n, i, lastj)); } + } + //process empty rows or remaining zeros + if( !sparseSafe ) + for(int j=lastj+1; j<n; j++) + ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j)); } - else if( sblock != null ) { - for( int i=rl; i<ru; i++ ) - if( !sblock.isEmpty(i) ) { - int apos = sblock.pos(i); - int alen = sblock.size(i); - double[] avals = sblock.values(i); - for( int j=apos; j<apos+alen; j++ ) - ret = vfun.execute( ret, genexec(avals[j], b, scalars, m, n, i, j)); - } - } - } + } return ret; } @@ -419,31 +440,36 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl private long executeSparse(SparseBlock sblock, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { + if( sparseSafe && sblock == null ) + return 0; + long lnnz = 0; if( _type == CellType.NO_AGG ) { - if( sparseSafe ) { - if( sblock != null ) { - for( int i=rl; i<ru; i++ ) - if( !sblock.isEmpty(i) ) { - int apos = sblock.pos(i); - int alen = sblock.size(i); - double[] avals = sblock.values(i); - for( int j=apos; j<apos+alen; j++ ) { - double val = genexec(avals[j], b, scalars, m, n, i, j); - c[i*n+sblock.indexes(i)[j]] = val; - lnnz += (val!=0) ? 1 : 0; - } - } - } - } - else { //sparse-unsafe - for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) - for(int j=0; j<n; j++) { - double valij = (sblock != null) ? sblock.get(i, j) : 0; - c[cix+j] = genexec(valij, b, scalars, m, n, i, j); - lnnz += (c[cix+j]!=0) ? 1 : 0; + //note: sequential scan algorithm for both sparse-safe and -unsafe + //in order to avoid binary search for sparse-unsafe + for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) { + int lastj = -1; + //handle non-empty rows + if( sblock != null && !sblock.isEmpty(i) ) { + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + for(int k=apos; k<apos+alen; k++) { + //process zeros before current non-zero + if( !sparseSafe ) + for(int j=lastj+1; j<aix[k]; j++) + lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; + //process current non-zero + lastj = aix[k]; + lnnz += ((c[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0; } + } + //process empty rows or remaining zeros + if( !sparseSafe ) + for(int j=lastj+1; j<n; j++) + lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; } } else if( _type == CellType.ROW_AGG ) @@ -454,51 +480,64 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl KahanObject kbuff = new KahanObject(0, 0); KahanFunction kplus = (KahanFunction) vfun; - if( !sparseSafe ) { - for(int i=rl; i<ru; i++) { - kbuff.set(0, 0); - for(int j=0; j<n; j++) - kplus.execute2( kbuff, genexec( (sblock != null) ? - sblock.get(i, j) : 0, b, scalars, m, n, i, j)); - lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0; - } - } - else if( sblock != null ) { //general case - for( int i=rl; i<ru; i++ ) { - if( sblock.isEmpty(i) ) continue; - kbuff.set(0, 0); + //note: sequential scan algorithm for both sparse-safe and -unsafe + //in order to avoid binary search for sparse-unsafe + for(int i=rl; i<ru; i++) { + kbuff.set(0, 0); + int lastj = -1; + //handle non-empty rows + if( sblock != null && !sblock.isEmpty(i) ) { int apos = sblock.pos(i); int alen = sblock.size(i); + int[] aix = sblock.indexes(i); double[] avals = sblock.values(i); - for( int j=apos; j<apos+alen; j++ ) - kplus.execute2(kbuff, genexec(avals[j], b, scalars, m, n, i, j)); - lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0; + for(int k=apos; k<apos+alen; k++) { + //process zeros before current non-zero + if( !sparseSafe ) + for(int j=lastj+1; j<aix[k]; j++) + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + //process current non-zero + lastj = aix[k]; + kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj)); + } } + //process empty rows or remaining zeros + if( !sparseSafe ) + for(int j=lastj+1; j<n; j++) + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0; } } else { double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; - if( !sparseSafe ) { - for(int i=rl; i<ru; i++) { - double tmp = initialVal; - for(int j=0; j<n; j++) - tmp = vfun.execute( tmp, genexec( (sblock != null) ? - sblock.get(i, j) : 0, b, scalars, m, n, i, j)); - lnnz += ((c[i] = tmp)!=0) ? 1 : 0; - } - } - else if( sblock != null ) { //general case - for( int i=rl; i<ru; i++ ) { - if( sblock.isEmpty(i) ) continue; + + //note: sequential scan algorithm for both sparse-safe and -unsafe + //in order to avoid binary search for sparse-unsafe + for(int i=rl; i<ru; i++) { + double tmp = (sparseSafe && sblock.size(i) < n) ? 0 : initialVal; + int lastj = -1; + //handle non-empty rows + if( sblock != null && !sblock.isEmpty(i) ) { int apos = sblock.pos(i); int alen = sblock.size(i); + int[] aix = sblock.indexes(i); double[] avals = sblock.values(i); - double tmp = (alen < n) ? 0 : initialVal; - for( int j=apos; j<apos+alen; j++ ) - tmp = vfun.execute(tmp, genexec(avals[j], b, scalars, m, n, i, j)); - lnnz += ((c[i] = tmp)!=0) ? 1 : 0; + for(int k=apos; k<apos+alen; k++) { + //process zeros before current non-zero + if( !sparseSafe ) + for(int j=lastj+1; j<aix[k]; j++) + tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j)); + //process current non-zero + lastj = aix[k]; + tmp = vfun.execute( tmp, genexec(avals[k], b, scalars, m, n, i, lastj)); + } } - } + //process empty rows or remaining zeros + if( !sparseSafe ) + for(int j=lastj+1; j<n; j++) + tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j)); + lnnz += ((c[i] = tmp)!=0) ? 1 : 0; + } } }