Repository: incubator-systemml Updated Branches: refs/heads/master c697c30eb -> ab1c70508
[SYSTEMML-1592] Improved codegen cellwise operations w/ sparse outputs This patch improves codegen cellwise operations with the ability to directly create sparse outputs instead of dense intermediates and a subsequent conversion to sparse. We apply this for sparse inputs with sparse-safe cellwise operations. Furthermore, this also includes a refactoring of the cellwise operator skeleton for better instruction locality. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ab1c7050 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ab1c7050 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ab1c7050 Branch: refs/heads/master Commit: ab1c7050890bcfc7032c33df3c3d8244a906c8a3 Parents: c697c30 Author: Matthias Boehm <[email protected]> Authored: Thu May 25 20:40:52 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Thu May 25 20:40:52 2017 -0700 ---------------------------------------------------------------------- .../sysml/runtime/codegen/SpoofCellwise.java | 607 ++++++++++--------- 1 file changed, 335 insertions(+), 272 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ab1c7050/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java index b01914a..fe23a04 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java @@ -188,28 +188,28 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl k = 1; //serial execution } - //result allocation and preparations - out.reset(inputs.get(0).getNumRows(), _type == CellType.NO_AGG ? - inputs.get(0).getNumColumns() : 1, false); - out.allocateDenseBlock(); - double[] c = out.getDenseBlock(); - //input preparation double[][] b = prepInputMatrices(inputs); double[] scalars = prepInputScalars(scalarObjects); final int m = inputs.get(0).getNumRows(); - final int n = inputs.get(0).getNumColumns(); + final int n = inputs.get(0).getNumColumns(); //sparse safe check - boolean sparseSafe = isSparseSafe() || (b.length == 0 + boolean sparseSafe = isSparseSafe() || (b.length == 0 && genexec( 0, b, scalars, m, n, 0, 0 ) == 0); + //result allocation and preparations + boolean sparseOut = sparseSafe && inputs.get(0).isInSparseFormat() && _type == CellType.NO_AGG; + out.reset(inputs.get(0).getNumRows(), _type == CellType.NO_AGG ? + inputs.get(0).getNumColumns() : 1, sparseOut); + out.allocateDenseOrSparseBlock(); + long lnnz = 0; if( k <= 1 ) //SINGLE-THREADED { lnnz = (!inputs.get(0).isInSparseFormat()) ? - executeDense(inputs.get(0).getDenseBlock(), b, scalars, c, m, n, sparseSafe, 0, m) : - executeSparse(inputs.get(0).getSparseBlock(), b, scalars, c, m, n, sparseSafe, 0, m); + executeDense(inputs.get(0).getDenseBlock(), b, scalars, out, m, n, sparseSafe, 0, m) : + executeSparse(inputs.get(0).getSparseBlock(), b, scalars, out, m, n, sparseSafe, 0, m); } else //MULTI-THREADED { @@ -219,7 +219,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl int nk = UtilFunctions.roundToNext(Math.min(8*k,m/32), k); int blklen = (int)(Math.ceil((double)m/nk)); for( int i=0; i<nk & i*blklen<m; i++ ) - tasks.add(new ParExecTask(inputs.get(0), b, scalars, c, + tasks.add(new ParExecTask(inputs.get(0), b, scalars, out, m, n, sparseSafe, i*blklen, Math.min((i+1)*blklen, m))); //execute tasks List<Future<Long>> taskret = pool.invokeAll(tasks); @@ -235,129 +235,56 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } //post-processing - out.setNonZeros(lnnz); - out.examSparsity(); + out.setNonZeros(lnnz); + out.examSparsity(); } - private double executeDenseAndAgg(double[] a, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException + private long executeDense(double[] a, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException { - ValueFunction vfun = getAggFunction(); - double ret = 0; + double[] c = out.getDenseBlock(); - //numerically stable aggregation for sum/sum_sq - if( vfun instanceof KahanFunction ) { - KahanObject kbuff = new KahanObject(0, 0); - KahanFunction kplus = (KahanFunction) vfun; - - if( a == null && !sparseSafe ) { //empty - for( int i=rl; i<ru; i++ ) - for( int j=0; j<n; j++ ) - kplus.execute2(kbuff, genexec( 0, b, scalars, m, n, i, j )); - } - else if( a != null ) { //general case - for( int i=rl, ix=rl*n; i<ru; i++ ) - for( int j=0; j<n; j++, ix++ ) - if( a[ix] != 0 || !sparseSafe) - kplus.execute2(kbuff, genexec( a[ix], b, scalars, m, n, i, j )); - } - ret = kbuff._sum; + if( _type == CellType.NO_AGG ) { + return executeDenseNoAgg(a, b, scalars, c, m, n, sparseSafe, rl, ru); } - //safe aggregation for min/max w/ handling of zero entries - //note: sparse safe with zero value as min/max handled outside - else { - ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; - if( a == null && !sparseSafe ) { //empty - for( int i=rl; i<ru; i++ ) - for( int j=0; j<n; j++ ) - ret = vfun.execute(ret, genexec( 0, b, scalars, m, n, i, j )); - } - else if( a != null ) { //general case - for( int i=rl, ix=rl*n; i<ru; i++ ) - for( int j=0; j<n; j++, ix++ ) - if( a[ix] != 0 || !sparseSafe) - ret = vfun.execute(ret, genexec( a[ix], b, scalars, m, n, i, j )); - } + else if( _type == CellType.ROW_AGG ) { + if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) + return executeDenseRowAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru); + else + return executeDenseRowAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru); } - - return ret; + return -1; } - private long executeDense(double[] a, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + private double executeDenseAndAgg(double[] a, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException + { + //numerically stable aggregation for sum/sum_sq + if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) + return executeDenseAggSum(a, b, scalars, m, n, sparseSafe, rl, ru); + else + return executeDenseAggMxx(a, b, scalars, m, n, sparseSafe, rl, ru); + } + + private long executeSparse(SparseBlock sblock, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - long lnnz = 0; + if( sparseSafe && sblock == null ) + return 0; - if( _type == CellType.NO_AGG ) - { - if( a == null && !sparseSafe ) { //empty - //note: we can't determine sparse-safeness by executing the operator once - //as the output might change with different row indices - for( int i=rl, ix=rl*n; i<ru; i++ ) - for( int j=0; j<n; j++, ix++ ) { - c[ix] = genexec( 0, b, scalars, m, n, i, j ); - lnnz += (c[ix]!=0) ? 1 : 0; - } - } - else if( a != null ) { //general case - for( int i=rl, ix=rl*n; i<ru; i++ ) - for( int j=0; j<n; j++, ix++ ) - if( a[ix] != 0 || !sparseSafe) { - c[ix] = genexec( a[ix], b, scalars, m, n, i, j); - lnnz += (c[ix]!=0) ? 1 : 0; - } - } + if( _type == CellType.NO_AGG ) { + if( out.isInSparseFormat() ) + return executeSparseNoAggSparse(sblock, b, scalars, out, m, n, sparseSafe, rl, ru); + else + return executeSparseNoAggDense(sblock, b, scalars, out, m, n, sparseSafe, rl, ru); } - else if( _type == CellType.ROW_AGG ) - { - ValueFunction vfun = getAggFunction(); - - if( vfun instanceof KahanFunction ) { - KahanObject kbuff = new KahanObject(0, 0); - KahanFunction kplus = (KahanFunction) vfun; - - if( a == null && !sparseSafe ) { //empty - for( int i=rl; i<ru; i++ ) { - kbuff.set(0, 0); - for( int j=0; j<n; j++ ) - kplus.execute2(kbuff, genexec( 0, b, scalars, m, n, i, j )); - lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0; - } - } - else if( a != null ) { //general case - for( int i=rl, ix=rl*n; i<ru; i++ ) { - kbuff.set(0, 0); - for( int j=0; j<n; j++, ix++ ) - if( a[ix] != 0 || !sparseSafe) - kplus.execute2(kbuff, genexec( a[ix], b, scalars, m, n, i, j )); - lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0; - } - } - } - else { - double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; - if( a == null && !sparseSafe ) { //empty - for( int i=rl; i<ru; i++ ) { - double tmp = initialVal; - for( int j=0; j<n; j++ ) - tmp = vfun.execute(tmp, genexec( 0, b, scalars, m, n, i, j )); - lnnz += ((c[i] = tmp)!=0) ? 1 : 0; - } - } - else if( a != null ) { //general case - for( int i=rl, ix=rl*n; i<ru; i++ ) { - double tmp = initialVal; - for( int j=0; j<n; j++, ix++ ) - if( a[ix] != 0 || !sparseSafe) - tmp = vfun.execute(tmp, genexec( a[ix], b, scalars, m, n, i, j )); - if( sparseSafe && UtilFunctions.containsZero(a, ix-n, n) ) - tmp = vfun.execute(tmp, 0); - lnnz += ((c[i] = tmp)!=0) ? 1 : 0; - } - } - } + else if( _type == CellType.ROW_AGG ) { + if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) + return executeSparseRowAggSum(sblock, b, scalars, out, m, n, sparseSafe, rl, ru); + else + return executeSparseRowAggMxx(sblock, b, scalars, out, m, n, sparseSafe, rl, ru); } - return lnnz; + return -1; } private double executeSparseAndAgg(SparseBlock sblock, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) @@ -366,184 +293,320 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl if( sparseSafe && sblock == null ) return 0; - ValueFunction vfun = getAggFunction(); - double ret = 0; + if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) + return executeSparseAggSum(sblock, b, scalars, m, n, sparseSafe, rl, ru); + else + return executeSparseAggMxx(sblock, b, scalars, m, n, sparseSafe, rl, ru); + } + + private long executeSparseNoAggSparse(SparseBlock sblock, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException + { + //note: sequential scan algorithm for both sparse-safe and -unsafe + //in order to avoid binary search for sparse-unsafe + SparseBlock c = out.getSparseBlock(); + long lnnz = 0; + for(int i=rl; i<ru; i++) { + int lastj = -1; + //handle non-empty rows + if( sblock != null && !sblock.isEmpty(i) ) { + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + c.allocate(i, sparseSafe ? alen : n); + for(int k=apos; k<apos+alen; k++) { + //process zeros before current non-zero + if( !sparseSafe ) + for(int j=lastj+1; j<aix[k]; j++) + c.append(i, j, genexec(0, b, scalars, m, n, i, j)); + //process current non-zero + lastj = aix[k]; + c.append(i, lastj, genexec(avals[k], b, scalars, m, n, i, lastj)); + } + } + //process empty rows or remaining zeros + if( !sparseSafe ) + for(int j=lastj+1; j<n; j++) + c.append(i, j, genexec(0, b, scalars, m, n, i, j)); + lnnz += c.size(i); + } - //numerically stable aggregation for sum/sum_sq - if( vfun instanceof KahanFunction ) { - KahanObject kbuff = new KahanObject(0, 0); - KahanFunction kplus = (KahanFunction) vfun; + return lnnz; + } - //note: sequential scan algorithm for both sparse-safe and -unsafe - //in order to avoid binary search for sparse-unsafe - for(int i=rl; i<ru; i++) { - int lastj = -1; - //handle non-empty rows - if( sblock != null && !sblock.isEmpty(i) ) { - int apos = sblock.pos(i); - int alen = sblock.size(i); - int[] aix = sblock.indexes(i); - double[] avals = sblock.values(i); - for(int k=apos; k<apos+alen; k++) { - //process zeros before current non-zero - if( !sparseSafe ) - for(int j=lastj+1; j<aix[k]; j++) - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); - //process current non-zero - lastj = aix[k]; - kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj)); - } + private long executeSparseNoAggDense(SparseBlock sblock, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException + { + //note: sequential scan algorithm for both sparse-safe and -unsafe + //in order to avoid binary search for sparse-unsafe + double[] c = out.getDenseBlock(); + long lnnz = 0; + for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) { + int lastj = -1; + //handle non-empty rows + if( sblock != null && !sblock.isEmpty(i) ) { + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + for(int k=apos; k<apos+alen; k++) { + //process zeros before current non-zero + if( !sparseSafe ) + for(int j=lastj+1; j<aix[k]; j++) + lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; + //process current non-zero + lastj = aix[k]; + lnnz += ((c[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0; } - //process empty rows or remaining zeros - if( !sparseSafe ) - for(int j=lastj+1; j<n; j++) - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); } - ret = kbuff._sum; + //process empty rows or remaining zeros + if( !sparseSafe ) + for(int j=lastj+1; j<n; j++) + lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; } - //safe aggregation for min/max w/ handling of zero entries - //note: sparse safe with zero value as min/max handled outside - else { - ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; - ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret; - - //note: sequential scan algorithm for both sparse-safe and -unsafe - //in order to avoid binary search for sparse-unsafe - for(int i=rl; i<ru; i++) { - int lastj = -1; - //handle non-empty rows - if( sblock != null && !sblock.isEmpty(i) ) { - int apos = sblock.pos(i); - int alen = sblock.size(i); - int[] aix = sblock.indexes(i); - double[] avals = sblock.values(i); - for(int k=apos; k<apos+alen; k++) { - //process zeros before current non-zero - if( !sparseSafe ) - for(int j=lastj+1; j<aix[k]; j++) - ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j)); - //process current non-zero - lastj = aix[k]; - ret = vfun.execute(ret, genexec(avals[k], b, scalars, m, n, i, lastj)); - } + return lnnz; + } + + private long executeSparseRowAggSum(SparseBlock sblock, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException + { + KahanFunction kplus = (KahanFunction) getAggFunction(); + KahanObject kbuff = new KahanObject(0, 0); + + //note: sequential scan algorithm for both sparse-safe and -unsafe + //in order to avoid binary search for sparse-unsafe + double[] c = out.getDenseBlock(); + long lnnz = 0; + for(int i=rl; i<ru; i++) { + kbuff.set(0, 0); + int lastj = -1; + //handle non-empty rows + if( sblock != null && !sblock.isEmpty(i) ) { + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + for(int k=apos; k<apos+alen; k++) { + //process zeros before current non-zero + if( !sparseSafe ) + for(int j=lastj+1; j<aix[k]; j++) + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + //process current non-zero + lastj = aix[k]; + kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj)); } - //process empty rows or remaining zeros - if( !sparseSafe ) - for(int j=lastj+1; j<n; j++) - ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j)); } + //process empty rows or remaining zeros + if( !sparseSafe ) + for(int j=lastj+1; j<n; j++) + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0; } - - return ret; + return lnnz; } - private long executeSparse(SparseBlock sblock, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + private long executeSparseRowAggMxx(SparseBlock sblock, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - if( sparseSafe && sblock == null ) - return 0; + double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; + ValueFunction vfun = getAggFunction(); + //note: sequential scan algorithm for both sparse-safe and -unsafe + //in order to avoid binary search for sparse-unsafe + double[] c = out.getDenseBlock(); long lnnz = 0; - if( _type == CellType.NO_AGG ) - { - //note: sequential scan algorithm for both sparse-safe and -unsafe - //in order to avoid binary search for sparse-unsafe - for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) { - int lastj = -1; - //handle non-empty rows - if( sblock != null && !sblock.isEmpty(i) ) { - int apos = sblock.pos(i); - int alen = sblock.size(i); - int[] aix = sblock.indexes(i); - double[] avals = sblock.values(i); - for(int k=apos; k<apos+alen; k++) { - //process zeros before current non-zero - if( !sparseSafe ) - for(int j=lastj+1; j<aix[k]; j++) - lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; - //process current non-zero - lastj = aix[k]; - lnnz += ((c[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0; - } + for(int i=rl; i<ru; i++) { + double tmp = (sparseSafe && sblock.size(i) < n) ? 0 : initialVal; + int lastj = -1; + //handle non-empty rows + if( sblock != null && !sblock.isEmpty(i) ) { + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + for(int k=apos; k<apos+alen; k++) { + //process zeros before current non-zero + if( !sparseSafe ) + for(int j=lastj+1; j<aix[k]; j++) + tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j)); + //process current non-zero + lastj = aix[k]; + tmp = vfun.execute( tmp, genexec(avals[k], b, scalars, m, n, i, lastj)); } - //process empty rows or remaining zeros - if( !sparseSafe ) - for(int j=lastj+1; j<n; j++) - lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; } + //process empty rows or remaining zeros + if( !sparseSafe ) + for(int j=lastj+1; j<n; j++) + tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j)); + lnnz += ((c[i] = tmp)!=0) ? 1 : 0; } - else if( _type == CellType.ROW_AGG ) - { - ValueFunction vfun = getAggFunction(); - - if( vfun instanceof KahanFunction ) { - KahanObject kbuff = new KahanObject(0, 0); - KahanFunction kplus = (KahanFunction) vfun; + return lnnz; + } + + private double executeSparseAggSum(SparseBlock sblock, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException + { + KahanFunction kplus = (KahanFunction) getAggFunction(); + KahanObject kbuff = new KahanObject(0, 0); - //note: sequential scan algorithm for both sparse-safe and -unsafe - //in order to avoid binary search for sparse-unsafe - for(int i=rl; i<ru; i++) { - kbuff.set(0, 0); - int lastj = -1; - //handle non-empty rows - if( sblock != null && !sblock.isEmpty(i) ) { - int apos = sblock.pos(i); - int alen = sblock.size(i); - int[] aix = sblock.indexes(i); - double[] avals = sblock.values(i); - for(int k=apos; k<apos+alen; k++) { - //process zeros before current non-zero - if( !sparseSafe ) - for(int j=lastj+1; j<aix[k]; j++) - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); - //process current non-zero - lastj = aix[k]; - kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj)); - } - } - //process empty rows or remaining zeros + //note: sequential scan algorithm for both sparse-safe and -unsafe + //in order to avoid binary search for sparse-unsafe + for(int i=rl; i<ru; i++) { + int lastj = -1; + //handle non-empty rows + if( sblock != null && !sblock.isEmpty(i) ) { + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + for(int k=apos; k<apos+alen; k++) { + //process zeros before current non-zero if( !sparseSafe ) - for(int j=lastj+1; j<n; j++) + for(int j=lastj+1; j<aix[k]; j++) kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); - lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0; + //process current non-zero + lastj = aix[k]; + kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj)); } } - else { - double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; - - //note: sequential scan algorithm for both sparse-safe and -unsafe - //in order to avoid binary search for sparse-unsafe - for(int i=rl; i<ru; i++) { - double tmp = (sparseSafe && sblock.size(i) < n) ? 0 : initialVal; - int lastj = -1; - //handle non-empty rows - if( sblock != null && !sblock.isEmpty(i) ) { - int apos = sblock.pos(i); - int alen = sblock.size(i); - int[] aix = sblock.indexes(i); - double[] avals = sblock.values(i); - for(int k=apos; k<apos+alen; k++) { - //process zeros before current non-zero - if( !sparseSafe ) - for(int j=lastj+1; j<aix[k]; j++) - tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j)); - //process current non-zero - lastj = aix[k]; - tmp = vfun.execute( tmp, genexec(avals[k], b, scalars, m, n, i, lastj)); - } - } - //process empty rows or remaining zeros + //process empty rows or remaining zeros + if( !sparseSafe ) + for(int j=lastj+1; j<n; j++) + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + } + return kbuff._sum; + } + + private double executeSparseAggMxx(SparseBlock sblock, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException + { + double ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; + ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret; + ValueFunction vfun = getAggFunction(); + + //note: sequential scan algorithm for both sparse-safe and -unsafe + //in order to avoid binary search for sparse-unsafe + for(int i=rl; i<ru; i++) { + int lastj = -1; + //handle non-empty rows + if( sblock != null && !sblock.isEmpty(i) ) { + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + for(int k=apos; k<apos+alen; k++) { + //process zeros before current non-zero if( !sparseSafe ) - for(int j=lastj+1; j<n; j++) - tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j)); - lnnz += ((c[i] = tmp)!=0) ? 1 : 0; + for(int j=lastj+1; j<aix[k]; j++) + ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j)); + //process current non-zero + lastj = aix[k]; + ret = vfun.execute(ret, genexec(avals[k], b, scalars, m, n, i, lastj)); } } + //process empty rows or remaining zeros + if( !sparseSafe ) + for(int j=lastj+1; j<n; j++) + ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j)); } - + return ret; + } + + private long executeDenseNoAgg(double[] a, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException + { + long lnnz = 0; + for( int i=rl, ix=rl*n; i<ru; i++ ) + for( int j=0; j<n; j++, ix++ ) { + double aval = (a != null) ? a[ix] : 0; + if( aval != 0 || !sparseSafe) { + c[ix] = genexec( aval, b, scalars, m, n, i, j); + lnnz += (c[ix]!=0) ? 1 : 0; + } + } return lnnz; } - + + private long executeDenseRowAggSum(double[] a, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException + { + KahanFunction kplus = (KahanFunction) getAggFunction(); + KahanObject kbuff = new KahanObject(0, 0); + long lnnz = 0; + for( int i=rl, ix=rl*n; i<ru; i++ ) { + kbuff.set(0, 0); + for( int j=0; j<n; j++, ix++ ) { + double aval = (a != null) ? a[ix] : 0; + if( aval != 0 || !sparseSafe) + kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j)); + } + lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0; + } + return lnnz; + } + + private long executeDenseRowAggMxx(double[] a, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException + { + double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; + ValueFunction vfun = getAggFunction(); + long lnnz = 0; + if( a == null && !sparseSafe ) { //empty + for( int i=rl; i<ru; i++ ) { + double tmp = initialVal; + for( int j=0; j<n; j++ ) + tmp = vfun.execute(tmp, genexec( 0, b, scalars, m, n, i, j )); + lnnz += ((c[i] = tmp)!=0) ? 1 : 0; + } + } + else if( a != null ) { //general case + for( int i=rl, ix=rl*n; i<ru; i++ ) { + double tmp = initialVal; + for( int j=0; j<n; j++, ix++ ) + if( a[ix] != 0 || !sparseSafe) + tmp = vfun.execute(tmp, genexec( a[ix], b, scalars, m, n, i, j )); + if( sparseSafe && UtilFunctions.containsZero(a, ix-n, n) ) + tmp = vfun.execute(tmp, 0); + lnnz += ((c[i] = tmp)!=0) ? 1 : 0; + } + } + return lnnz; + } + + private double executeDenseAggSum(double[] a, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException + { + KahanFunction kplus = (KahanFunction) getAggFunction(); + KahanObject kbuff = new KahanObject(0, 0); + + for( int i=rl, ix=rl*n; i<ru; i++ ) + for( int j=0; j<n; j++, ix++ ) { + double aval = (a != null) ? a[ix] : 0; + if( aval != 0 || !sparseSafe) + kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j)); + } + return kbuff._sum; + } + + private double executeDenseAggMxx(double[] a, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException + { + //safe aggregation for min/max w/ handling of zero entries + //note: sparse safe with zero value as min/max handled outside + double ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; + ValueFunction vfun = getAggFunction(); + + for( int i=rl, ix=rl*n; i<ru; i++ ) + for( int j=0; j<n; j++, ix++ ) { + double aval = (a != null) ? a[ix] : 0; + if( aval != 0 || !sparseSafe) + ret = vfun.execute(ret, genexec(aval, b, scalars, m, n, i, j)); + } + return ret; + } + + protected abstract double genexec( double a, double[][] b, double[] scalars, int m, int n, int rowIndex, int colIndex); private class ParAggTask implements Callable<Double> @@ -582,14 +645,14 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl private final MatrixBlock _a; private final double[][] _b; private final double[] _scalars; - private final double[] _c; + private final MatrixBlock _c; private final int _rlen; private final int _clen; private final boolean _safe; private final int _rl; private final int _ru; - protected ParExecTask( MatrixBlock a, double[][] b, double[] scalars, double[] c, + protected ParExecTask( MatrixBlock a, double[][] b, double[] scalars, MatrixBlock c, int rlen, int clen, boolean sparseSafe, int rl, int ru ) { _a = a; _b = b;
