[SYSTEMML-2265] Fix correctness sparse aggregate w/ sparse-unsafe ops The recently modified sparse block estimates (see SYSTEMML-2263) changed the used data format for a number of tests, which revealed long existing but hidden issues. This particular patch fixes an issue of incorrect results for sparse aggregate operations with sparse-unsafe operations such as mean and var over sparse inputs.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/54c52ab3 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/54c52ab3 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/54c52ab3 Branch: refs/heads/master Commit: 54c52ab3c4b8c1d176a17ada497209a8477077f2 Parents: f3b8b88 Author: Matthias Boehm <[email protected]> Authored: Fri Apr 20 18:24:23 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Fri Apr 20 18:24:23 2018 -0700 ---------------------------------------------------------------------- .../sysml/runtime/codegen/SpoofOperator.java | 8 ++-- .../sysml/runtime/matrix/data/LibMatrixAgg.java | 46 ++++++++++---------- 2 files changed, 27 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/54c52ab3/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java index f6b225d..4b75b2e 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java @@ -301,10 +301,10 @@ public abstract class SpoofOperator implements Serializable //move to next row if necessary if( rowIndex > currRowIndex ) { currRowIndex = rowIndex; - currColPos = mdat.getSparseBlock().pos(currRowIndex); - currLen = mdat.getSparseBlock().size(currRowIndex) + currColPos; - indexes = mdat.getSparseBlock().indexes(currRowIndex); - values = mdat.getSparseBlock().values(currRowIndex); + currColPos = sblock.pos(currRowIndex); + currLen = sblock.size(currRowIndex) + currColPos; + indexes = sblock.indexes(currRowIndex); + values = sblock.values(currRowIndex); } //move to next colpos if necessary while( currColPos < currLen && indexes[currColPos]<colIndex ) http://git-wip-us.apache.org/repos/asf/systemml/blob/54c52ab3/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java index 6d2b768..8de89c3 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java @@ -29,6 +29,8 @@ import java.util.concurrent.Future; import org.apache.sysml.lops.PartialAggregate.CorrectionLocationType; import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.codegen.SpoofOperator.SideInput; +import org.apache.sysml.runtime.codegen.SpoofOperator.SideInputSparseCell; import org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType; import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; import org.apache.sysml.runtime.functionobjects.Builtin; @@ -897,37 +899,35 @@ public class LibMatrixAgg cmValues[i][j] = new CM_COV_Object(); //column vector or matrix - if( target.sparse ) //SPARSE target - { + if( target.sparse ) { //SPARSE target + //note: we create a sparse side input for a linear scan (w/o binary search) + //over the sparse representation despite the sparse-unsafe operations SparseBlock a = target.sparseBlock; + SideInputSparseCell sa = new SideInputSparseCell( + new SideInput(null, target, target.clen)); - for( int i=0; i < groups.getNumRows(); i++ ) - { + for( int i=0; i < groups.getNumRows(); i++ ) { int g = (int) groups.quickGetValue(i, 0); - if ( g > numGroups ) + if( g > numGroups ) continue; + + //sparse unsafe correction empty row + if( a.isEmpty(i) ){ + w = (weights != null) ? weights.quickGetValue(i,0) : w; + for( int j=cl; j<cu; j++ ) + cmFn.execute(cmValues[g-1][j-cl], 0, w); continue; + } - if( !a.isEmpty(i) ) - { - int pos = a.pos(i); - int len = a.size(i); - int[] aix = a.indexes(i); - double[] avals = a.values(i); - int j = (cl==0) ? 0 : a.posFIndexGTE(i,cl); - j = (j >= 0) ? pos+j : pos+len; - - for( ; j<pos+len && aix[j]<cu; j++ ) //for each nnz - { - if ( weights != null ) - w = weights.quickGetValue(i, 0); - cmFn.execute(cmValues[g-1][aix[j]-cl], avals[j], w); - } - //TODO sparse unsafe correction + //process non-empty row + for( int j=cl; j<cu; j++ ) { + double d = sa.getValue(i, j); + if ( weights != null ) + w = weights.quickGetValue(i,0); + cmFn.execute(cmValues[g-1][j-cl], d, w); } } } - else //DENSE target - { + else { //DENSE target DenseBlock a = target.getDenseBlock(); for( int i=0; i < groups.getNumRows(); i++ ) { int g = (int) groups.quickGetValue(i, 0);
