[SYSTEMML-2135] Fix aggregate correctness over matrices w/ zero dims This patch fixes issues of incorrect results of full aggregates over matrices with zero rows or columns. While sum returns 0, others like min/max are defined by their initial values, and yet other are undefined and result in NaNs.
Furthermore, this also includes a fix of simplification rewrites for empty aggregates (awareness of zero rows/cols) and a general cleanup of the initial values for min and max, which otherwise would lead to incorrect results in distributed mr operations. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/7e11deaa Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/7e11deaa Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/7e11deaa Branch: refs/heads/master Commit: 7e11deaa8b22a5749251e629ca1c4b972a6ef054 Parents: 5983e96 Author: Matthias Boehm <[email protected]> Authored: Thu Feb 8 20:43:56 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Thu Feb 8 22:04:16 2018 -0800 ---------------------------------------------------------------------- .../java/org/apache/sysml/hops/UnaryOp.java | 13 ++--- .../RewriteAlgebraicSimplificationDynamic.java | 38 ++++++------- .../runtime/codegen/LibSpoofPrimitives.java | 4 +- .../sysml/runtime/codegen/SpoofCellwise.java | 18 +++--- .../runtime/codegen/SpoofMultiAggregate.java | 4 +- .../sysml/runtime/compress/ColGroupOffset.java | 7 +-- .../sysml/runtime/compress/ColGroupValue.java | 8 ++- .../runtime/compress/CompressedMatrixBlock.java | 3 +- .../runtime/instructions/InstructionUtils.java | 24 ++++---- .../instructions/spark/SpoofSPInstruction.java | 4 +- .../sysml/runtime/matrix/data/LibMatrixAgg.java | 40 ++++++++++---- .../sysml/runtime/matrix/data/MatrixBlock.java | 6 +- .../runtime/transform/encode/EncoderBin.java | 4 +- .../functions/misc/ZeroRowsColsMatrixTest.java | 58 ++++++++++---------- .../functions/misc/ZeroMatrix_Aggregates.R | 8 ++- .../functions/misc/ZeroMatrix_Aggregates.dml | 13 +++-- 16 files changed, 136 insertions(+), 116 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/hops/UnaryOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/UnaryOp.java b/src/main/java/org/apache/sysml/hops/UnaryOp.java index c844432..e16cb4c 100644 --- a/src/main/java/org/apache/sysml/hops/UnaryOp.java +++ b/src/main/java/org/apache/sysml/hops/UnaryOp.java @@ -529,14 +529,13 @@ public class UnaryOp extends Hop implements MultiThreadedHop } } - private double getCumulativeInitValue() - { + private double getCumulativeInitValue() { switch( _op ) { - case CUMSUM: return 0; - case CUMPROD: return 1; - case CUMMIN: return Double.MAX_VALUE; - case CUMMAX: return -Double.MAX_VALUE; - default: return Double.NaN; + case CUMSUM: return 0; + case CUMPROD: return 1; + case CUMMIN: return Double.POSITIVE_INFINITY; + case CUMMAX: return Double.NEGATIVE_INFINITY; + default: return Double.NaN; } } http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java index d64e4b8..f07a379 100644 --- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java +++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java @@ -767,27 +767,25 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule AggUnaryOp uhi = (AggUnaryOp)hi; Hop input = uhi.getInput().get(0); - if( HopRewriteUtils.isValidOp(uhi.getOp(), LOOKUP_VALID_EMPTY_AGGREGATE) ){ - - if( HopRewriteUtils.isEmpty(input) ) - { - Hop hnew = null; - if( uhi.getDirection() == Direction.RowCol ) - hnew = new LiteralOp(0.0); - else if( uhi.getDirection() == Direction.Col ) - hnew = HopRewriteUtils.createDataGenOp(uhi, input, 0); //nrow(uhi)=1 - else //if( uhi.getDirection() == Direction.Row ) - hnew = HopRewriteUtils.createDataGenOp(input, uhi, 0); //ncol(uhi)=1 - - //add new child to parent input - HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos); - hi = hnew; - - LOG.debug("Applied simplifyEmptyAggregate"); - } - } + //check for valid empty aggregates, except for matrices with zero rows/cols + if( HopRewriteUtils.isValidOp(uhi.getOp(), LOOKUP_VALID_EMPTY_AGGREGATE) + && HopRewriteUtils.isEmpty(input) + && input.getDim1()>=1 && input.getDim2() >= 1 ) + { + Hop hnew = null; + if( uhi.getDirection() == Direction.RowCol ) + hnew = new LiteralOp(0.0); + else if( uhi.getDirection() == Direction.Col ) + hnew = HopRewriteUtils.createDataGenOp(uhi, input, 0); //nrow(uhi)=1 + else //if( uhi.getDirection() == Direction.Row ) + hnew = HopRewriteUtils.createDataGenOp(input, uhi, 0); //ncol(uhi)=1 + + //add new child to parent input + HopRewriteUtils.replaceChildReference(parent, hi, hnew, pos); + hi = hnew; + LOG.debug("Applied simplifyEmptyAggregate"); + } } - return hi; } http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java index 8d76e14..473d849 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java @@ -302,7 +302,7 @@ public class LibSpoofPrimitives } public static double vectMin(double[] a, int ai, int len) { - double val = Double.MAX_VALUE; + double val = Double.POSITIVE_INFINITY; for( int i = ai; i < ai+len; i++ ) val = Math.min(a[i], val); return val; @@ -314,7 +314,7 @@ public class LibSpoofPrimitives } public static double vectMax(double[] a, int ai, int len) { - double val = -Double.MAX_VALUE; + double val = Double.NEGATIVE_INFINITY; for( int i = ai; i < ai+len; i++ ) val = Math.max(a[i], val); return val; http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java index 7c2ac35..5080e6c 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java @@ -487,7 +487,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl { double[] lc = c.valuesAt(0); //single block - double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; + double initialVal = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; ValueFunction vfun = getAggFunction(); long lnnz = 0; if( a == null && !sparseSafe ) { //empty @@ -559,7 +559,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl { double[] lc = c.valuesAt(0); //single block - double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; + double initialVal = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; ValueFunction vfun = getAggFunction(); Arrays.fill(lc, initialVal); @@ -622,7 +622,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl { //safe aggregation for min/max w/ handling of zero entries //note: sparse safe with zero value as min/max handled outside - double ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; + double ret = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; ValueFunction vfun = getAggFunction(); if( a == null && !sparseSafe ) { @@ -764,7 +764,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; + double initialVal = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; ValueFunction vfun = getAggFunction(); //note: sequential scan algorithm for both sparse-safe and -unsafe @@ -852,7 +852,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; + double initialVal = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; ValueFunction vfun = getAggFunction(); double[] c = out.getDenseBlockValues(); Arrays.fill(c, initialVal); @@ -929,7 +929,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; + double ret = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret; ValueFunction vfun = getAggFunction(); @@ -1019,7 +1019,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl double[] c, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - Arrays.fill(c, rl, ru, (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE); + Arrays.fill(c, rl, ru, (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY); ValueFunction vfun = getAggFunction(); long lnnz = 0; Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe); @@ -1056,7 +1056,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl double[] c, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - Arrays.fill(c, rl, ru, (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE); + Arrays.fill(c, rl, ru, (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY); ValueFunction vfun = getAggFunction(); long lnnz = 0; Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe); @@ -1112,7 +1112,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl { //safe aggregation for min/max w/ handling of zero entries //note: sparse safe with zero value as min/max handled outside - double ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; + double ret = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; ValueFunction vfun = getAggFunction(); Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe); http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java index 85c894a..72f7764 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java @@ -224,8 +224,8 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria switch( aggop ) { case SUM: case SUM_SQ: return 0; - case MIN: return Double.MAX_VALUE; - case MAX: return -Double.MAX_VALUE; + case MIN: return Double.POSITIVE_INFINITY; + case MAX: return Double.NEGATIVE_INFINITY; } return 0; } http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java index 3603b9e..a312419 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java @@ -239,12 +239,11 @@ public abstract class ColGroupOffset extends ColGroupValue LinearAlgebraUtils.vectMultiplyAdd(b[i], _values, c, off, 0, numVals); } - protected final double mxxValues(int bitmapIx, Builtin builtin) - { + protected final double mxxValues(int bitmapIx, Builtin builtin) { final int numCols = getNumCols(); final int valOff = bitmapIx * numCols; - - double val = Double.MAX_VALUE * ((builtin.getBuiltinCode()==BuiltinCode.MAX)?-1:1); + double val = (builtin.getBuiltinCode()==BuiltinCode.MAX) ? + Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; for( int i = 0; i < numCols; i++ ) val = builtin.execute2(val, _values[valOff+i]); http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java index 12bce84..a416c5e 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupValue.java @@ -266,7 +266,8 @@ public abstract class ColGroupValue extends ColGroup protected void computeMxx(MatrixBlock result, Builtin builtin, boolean zeros) { //init and 0-value handling - double val = Double.MAX_VALUE * ((builtin.getBuiltinCode()==BuiltinCode.MAX)?-1:1); + double val = (builtin.getBuiltinCode()==BuiltinCode.MAX) ? + Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; if( zeros ) val = builtin.execute2(val, 0); @@ -296,10 +297,11 @@ public abstract class ColGroupValue extends ColGroup //init and 0-value handling double[] vals = new double[numCols]; - Arrays.fill(vals, Double.MAX_VALUE * ((builtin.getBuiltinCode()==BuiltinCode.MAX)?-1:1)); + Arrays.fill(vals, (builtin.getBuiltinCode()==BuiltinCode.MAX) ? + Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY); if( zeros ) { for( int j = 0; j < numCols; j++ ) - vals[j] = builtin.execute2(vals[j], 0); + vals[j] = builtin.execute2(vals[j], 0); } //iterate over all values only http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java index d9cfc72..e5f8827 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java @@ -1185,7 +1185,8 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable //special handling init value for rowmins/rowmax if( op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin ) { - double val = Double.MAX_VALUE * ((((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MAX)?-1:1); + double val = (((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MAX) ? + Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; ret.getDenseBlock().set(val); } http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/runtime/instructions/InstructionUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/InstructionUtils.java b/src/main/java/org/apache/sysml/runtime/instructions/InstructionUtils.java index ff85aa7..172aa15 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/InstructionUtils.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/InstructionUtils.java @@ -352,11 +352,11 @@ public class InstructionUtils aggun = new AggregateUnaryOperator(agg, ReduceAll.getReduceAllFnObject(), numThreads); } else if ( opcode.equalsIgnoreCase("uamax") ) { - AggregateOperator agg = new AggregateOperator(-Double.MAX_VALUE, Builtin.getBuiltinFnObject("max")); + AggregateOperator agg = new AggregateOperator(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject("max")); aggun = new AggregateUnaryOperator(agg, ReduceAll.getReduceAllFnObject(), numThreads); } else if ( opcode.equalsIgnoreCase("uamin") ) { - AggregateOperator agg = new AggregateOperator(Double.MAX_VALUE, Builtin.getBuiltinFnObject("min")); + AggregateOperator agg = new AggregateOperator(Double.POSITIVE_INFINITY, Builtin.getBuiltinFnObject("min")); aggun = new AggregateUnaryOperator(agg, ReduceAll.getReduceAllFnObject(), numThreads); } else if ( opcode.equalsIgnoreCase("uatrace") ) { @@ -368,27 +368,27 @@ public class InstructionUtils aggun = new AggregateUnaryOperator(agg, ReduceDiag.getReduceDiagFnObject(), numThreads); } else if ( opcode.equalsIgnoreCase("uarmax") ) { - AggregateOperator agg = new AggregateOperator(-Double.MAX_VALUE, Builtin.getBuiltinFnObject("max")); + AggregateOperator agg = new AggregateOperator(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject("max")); aggun = new AggregateUnaryOperator(agg, ReduceCol.getReduceColFnObject(), numThreads); } else if (opcode.equalsIgnoreCase("uarimax") ) { - AggregateOperator agg = new AggregateOperator(-Double.MAX_VALUE, Builtin.getBuiltinFnObject("maxindex"), true, CorrectionLocationType.LASTCOLUMN); + AggregateOperator agg = new AggregateOperator(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject("maxindex"), true, CorrectionLocationType.LASTCOLUMN); aggun = new AggregateUnaryOperator(agg, ReduceCol.getReduceColFnObject(), numThreads); } else if ( opcode.equalsIgnoreCase("uarmin") ) { - AggregateOperator agg = new AggregateOperator(Double.MAX_VALUE, Builtin.getBuiltinFnObject("min")); + AggregateOperator agg = new AggregateOperator(Double.POSITIVE_INFINITY, Builtin.getBuiltinFnObject("min")); aggun = new AggregateUnaryOperator(agg, ReduceCol.getReduceColFnObject(), numThreads); } else if (opcode.equalsIgnoreCase("uarimin") ) { - AggregateOperator agg = new AggregateOperator(Double.MAX_VALUE, Builtin.getBuiltinFnObject("minindex"), true, CorrectionLocationType.LASTCOLUMN); + AggregateOperator agg = new AggregateOperator(Double.POSITIVE_INFINITY, Builtin.getBuiltinFnObject("minindex"), true, CorrectionLocationType.LASTCOLUMN); aggun = new AggregateUnaryOperator(agg, ReduceCol.getReduceColFnObject(), numThreads); } else if ( opcode.equalsIgnoreCase("uacmax") ) { - AggregateOperator agg = new AggregateOperator(-Double.MAX_VALUE, Builtin.getBuiltinFnObject("max")); + AggregateOperator agg = new AggregateOperator(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject("max")); aggun = new AggregateUnaryOperator(agg, ReduceRow.getReduceRowFnObject(), numThreads); } else if ( opcode.equalsIgnoreCase("uacmin") ) { - AggregateOperator agg = new AggregateOperator(Double.MAX_VALUE, Builtin.getBuiltinFnObject("min")); + AggregateOperator agg = new AggregateOperator(Double.POSITIVE_INFINITY, Builtin.getBuiltinFnObject("min")); aggun = new AggregateUnaryOperator(agg, ReduceRow.getReduceRowFnObject(), numThreads); } @@ -430,16 +430,16 @@ public class InstructionUtils agg = new AggregateOperator(1, Multiply.getMultiplyFnObject()); } else if (opcode.equalsIgnoreCase("arimax")){ - agg = new AggregateOperator(-Double.MAX_VALUE, Builtin.getBuiltinFnObject("maxindex"), true, CorrectionLocationType.LASTCOLUMN); + agg = new AggregateOperator(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject("maxindex"), true, CorrectionLocationType.LASTCOLUMN); } else if ( opcode.equalsIgnoreCase("amax") ) { - agg = new AggregateOperator(-Double.MAX_VALUE, Builtin.getBuiltinFnObject("max")); + agg = new AggregateOperator(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject("max")); } else if ( opcode.equalsIgnoreCase("amin") ) { - agg = new AggregateOperator(Double.MAX_VALUE, Builtin.getBuiltinFnObject("min")); + agg = new AggregateOperator(Double.POSITIVE_INFINITY, Builtin.getBuiltinFnObject("min")); } else if (opcode.equalsIgnoreCase("arimin")){ - agg = new AggregateOperator(Double.MAX_VALUE, Builtin.getBuiltinFnObject("minindex"), true, CorrectionLocationType.LASTCOLUMN); + agg = new AggregateOperator(Double.POSITIVE_INFINITY, Builtin.getBuiltinFnObject("minindex"), true, CorrectionLocationType.LASTCOLUMN); } else if ( opcode.equalsIgnoreCase("amean") ) { boolean lcorrExists = (corrExists==null) ? true : Boolean.parseBoolean(corrExists); http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java index da39471..4022201 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/SpoofSPInstruction.java @@ -695,9 +695,9 @@ public class SpoofSPInstruction extends SPInstruction { if( aggop == AggOp.SUM || aggop == AggOp.SUM_SQ ) return new AggregateOperator(0, KahanPlus.getKahanPlusFnObject(), true, CorrectionLocationType.NONE); else if( aggop == AggOp.MIN ) - return new AggregateOperator(Double.MAX_VALUE, Builtin.getBuiltinFnObject(BuiltinCode.MIN), false, CorrectionLocationType.NONE); + return new AggregateOperator(Double.POSITIVE_INFINITY, Builtin.getBuiltinFnObject(BuiltinCode.MIN), false, CorrectionLocationType.NONE); else if( aggop == AggOp.MAX ) - return new AggregateOperator(-Double.MAX_VALUE, Builtin.getBuiltinFnObject(BuiltinCode.MAX), false, CorrectionLocationType.NONE); + return new AggregateOperator(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject(BuiltinCode.MAX), false, CorrectionLocationType.NONE); return null; } } http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java index b56397e..ac8a185 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java @@ -1331,13 +1331,13 @@ public class LibMatrixAgg } case CUM_MIN: case CUM_MAX: { - double init = Double.MAX_VALUE * ((optype==AggType.CUM_MAX)?-1:1); + double init = (optype==AggType.CUM_MAX) ? Double.NEGATIVE_INFINITY:Double.POSITIVE_INFINITY; d_ucummxx(in.getDenseBlockValues(), null, out.getDenseBlockValues(), n, init, (Builtin)vFn, rl, ru); break; } case MIN: case MAX: { //MAX/MIN - double init = Double.MAX_VALUE * ((optype==AggType.MAX)?-1:1); + double init = (optype==AggType.MAX) ? Double.NEGATIVE_INFINITY:Double.POSITIVE_INFINITY; if( ixFn instanceof ReduceAll ) // MIN/MAX d_uamxx(a, c, n, init, (Builtin)vFn, rl, ru); else if( ixFn instanceof ReduceCol ) //ROWMIN/ROWMAX @@ -1347,13 +1347,13 @@ public class LibMatrixAgg break; } case MAX_INDEX: { - double init = -Double.MAX_VALUE; + double init = Double.NEGATIVE_INFINITY; if( ixFn instanceof ReduceCol ) //ROWINDEXMAX d_uarimxx(a, c, n, init, (Builtin)vFn, rl, ru); break; } case MIN_INDEX: { - double init = Double.MAX_VALUE; + double init = Double.POSITIVE_INFINITY; if( ixFn instanceof ReduceCol ) //ROWINDEXMIN d_uarimin(a, c, n, init, (Builtin)vFn, rl, ru); break; @@ -1435,13 +1435,13 @@ public class LibMatrixAgg } case CUM_MIN: case CUM_MAX: { - double init = Double.MAX_VALUE * ((optype==AggType.CUM_MAX)?-1:1); + double init = (optype==AggType.CUM_MAX) ? Double.NEGATIVE_INFINITY:Double.POSITIVE_INFINITY; s_ucummxx(a, null, out.getDenseBlockValues(), n, init, (Builtin)vFn, rl, ru); break; } case MIN: case MAX: { //MAX/MIN - double init = Double.MAX_VALUE * ((optype==AggType.MAX)?-1:1); + double init = (optype==AggType.MAX) ? Double.NEGATIVE_INFINITY:Double.POSITIVE_INFINITY; if( ixFn instanceof ReduceAll ) // MIN/MAX s_uamxx(a, c, n, init, (Builtin)vFn, rl, ru); else if( ixFn instanceof ReduceCol ) //ROWMIN/ROWMAX @@ -1451,13 +1451,13 @@ public class LibMatrixAgg break; } case MAX_INDEX: { - double init = -Double.MAX_VALUE; + double init = Double.NEGATIVE_INFINITY; if( ixFn instanceof ReduceCol ) //ROWINDEXMAX s_uarimxx(a, c, n, init, (Builtin)vFn, rl, ru); break; } case MIN_INDEX: { - double init = Double.MAX_VALUE; + double init = Double.POSITIVE_INFINITY; if( ixFn instanceof ReduceCol ) //ROWINDEXMAX s_uarimin(a, c, n, init, (Builtin)vFn, rl, ru); break; @@ -1516,7 +1516,7 @@ public class LibMatrixAgg } case CUM_MIN: case CUM_MAX: { - double init = Double.MAX_VALUE * ((optype==AggType.CUM_MAX)?-1:1); + double init = (optype==AggType.CUM_MAX)? Double.NEGATIVE_INFINITY:Double.POSITIVE_INFINITY; d_ucummxx(a, agg, c, n, init, (Builtin)vFn, rl, ru); break; } @@ -1548,7 +1548,7 @@ public class LibMatrixAgg } case CUM_MIN: case CUM_MAX: { - double init = Double.MAX_VALUE * ((optype==AggType.CUM_MAX)?-1:1); + double init = (optype==AggType.CUM_MAX) ? Double.NEGATIVE_INFINITY:Double.POSITIVE_INFINITY; s_ucummxx(a, agg, c, n, init, (Builtin)vFn, rl, ru); break; } @@ -1560,7 +1560,25 @@ public class LibMatrixAgg private static MatrixBlock aggregateUnaryMatrixEmpty(MatrixBlock in, MatrixBlock out, AggType optype, IndexFunction ixFn) throws DMLRuntimeException { - //do nothing for pseudo sparse-safe operations + //handle all full aggregates over matrices with zero rows or columns + if( ixFn instanceof ReduceAll && (in.getNumRows() == 0 || in.getNumColumns() == 0) ) { + double val = Double.NaN; + switch( optype ) { + case KAHAN_SUM: + case KAHAN_SUM_SQ: val = 0; break; + case MIN: val = Double.POSITIVE_INFINITY; break; + case MAX: val = Double.NEGATIVE_INFINITY; break; + case MEAN: + case VAR: + case MIN_INDEX: + case MAX_INDEX: + default: val = Double.NaN; break; + } + out.quickSetValue(0, 0, val); + return out; + } + + //handle pseudo sparse-safe operations over empty inputs if(optype==AggType.KAHAN_SUM || optype==AggType.KAHAN_SUM_SQ || optype==AggType.MIN || optype==AggType.MAX || optype==AggType.PROD || optype == AggType.CUM_KAHAN_SUM || optype == AggType.CUM_PROD http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index c59ec51..b10e153 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -798,7 +798,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab return -1; //NOTE: usually this method is only applied on dense vectors and hence not really tuned yet. - double min = Double.MAX_VALUE; + double min = Double.POSITIVE_INFINITY; for( int i=0; i<rlen; i++ ) for( int j=0; j<clen; j++ ){ double val = quickGetValue(i, j); @@ -819,7 +819,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab throws DMLRuntimeException { //construct operator - AggregateOperator aop = new AggregateOperator(Double.MAX_VALUE, Builtin.getBuiltinFnObject("min")); + AggregateOperator aop = new AggregateOperator(Double.POSITIVE_INFINITY, Builtin.getBuiltinFnObject("min")); AggregateUnaryOperator auop = new AggregateUnaryOperator( aop, ReduceAll.getReduceAllFnObject()); //execute operation @@ -839,7 +839,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab throws DMLRuntimeException { //construct operator - AggregateOperator aop = new AggregateOperator(-Double.MAX_VALUE, Builtin.getBuiltinFnObject("max")); + AggregateOperator aop = new AggregateOperator(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject("max")); AggregateUnaryOperator auop = new AggregateUnaryOperator( aop, ReduceAll.getReduceAllFnObject()); //execute operation http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderBin.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderBin.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderBin.java index e70a392..016adb4 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderBin.java +++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderBin.java @@ -79,9 +79,9 @@ public class EncoderBin extends Encoder // initialize internal transformation metadata _min = new double[_colList.length]; - Arrays.fill(_min, Double.MAX_VALUE); + Arrays.fill(_min, Double.POSITIVE_INFINITY); _max = new double[_colList.length]; - Arrays.fill(_max, -Double.MAX_VALUE); + Arrays.fill(_max, Double.NEGATIVE_INFINITY); } } http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/test/java/org/apache/sysml/test/integration/functions/misc/ZeroRowsColsMatrixTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/misc/ZeroRowsColsMatrixTest.java b/src/test/java/org/apache/sysml/test/integration/functions/misc/ZeroRowsColsMatrixTest.java index 960eac5..7574fb6 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/misc/ZeroRowsColsMatrixTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/misc/ZeroRowsColsMatrixTest.java @@ -143,35 +143,35 @@ public class ZeroRowsColsMatrixTest extends AutomatedTestBase runEmptyMatrixTest(TEST_NAME3, true, ExecType.SPARK); } -// @Test -// public void testEmptyMatrixAggregatesNoRewritesCP() { -// runEmptyMatrixTest(TEST_NAME4, false, ExecType.CP); -// } -// -// @Test -// public void testEmptyMatrixAggregatesRewritesCP() { -// runEmptyMatrixTest(TEST_NAME4, true, ExecType.CP); -// } -// -// @Test -// public void testEmptyMatrixAggregatesNoRewritesMR() { -// runEmptyMatrixTest(TEST_NAME4, false, ExecType.MR); -// } -// -// @Test -// public void testEmptyMatrixAggregatesRewritesMR() { -// runEmptyMatrixTest(TEST_NAME4, true, ExecType.MR); -// } -// -// @Test -// public void testEmptyMatrixAggregatesNoRewritesSP() { -// runEmptyMatrixTest(TEST_NAME4, false, ExecType.SPARK); -// } -// -// @Test -// public void testEmptyMatrixAggregatesRewritesSP() { -// runEmptyMatrixTest(TEST_NAME4, true, ExecType.SPARK); -// } + @Test + public void testEmptyMatrixAggregatesNoRewritesCP() { + runEmptyMatrixTest(TEST_NAME4, false, ExecType.CP); + } + + @Test + public void testEmptyMatrixAggregatesRewritesCP() { + runEmptyMatrixTest(TEST_NAME4, true, ExecType.CP); + } + + @Test + public void testEmptyMatrixAggregatesNoRewritesMR() { + runEmptyMatrixTest(TEST_NAME4, false, ExecType.MR); + } + + @Test + public void testEmptyMatrixAggregatesRewritesMR() { + runEmptyMatrixTest(TEST_NAME4, true, ExecType.MR); + } + + @Test + public void testEmptyMatrixAggregatesNoRewritesSP() { + runEmptyMatrixTest(TEST_NAME4, false, ExecType.SPARK); + } + + @Test + public void testEmptyMatrixAggregatesRewritesSP() { + runEmptyMatrixTest(TEST_NAME4, true, ExecType.SPARK); + } private void runEmptyMatrixTest( String testname, boolean rewrites, ExecType et ) { http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/test/scripts/functions/misc/ZeroMatrix_Aggregates.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/misc/ZeroMatrix_Aggregates.R b/src/test/scripts/functions/misc/ZeroMatrix_Aggregates.R index a7f00ba..bc105e6 100644 --- a/src/test/scripts/functions/misc/ZeroMatrix_Aggregates.R +++ b/src/test/scripts/functions/misc/ZeroMatrix_Aggregates.R @@ -26,9 +26,11 @@ library("Matrix") n = as.integer(args[1]); X = matrix(0, n, 0); -R = rbind(rbind( +R = rbind(rbind(rbind(rbind( as.matrix(sum(X)==0), as.matrix(min(X)==(1.0/0.0))), - as.matrix(max(X)==(-1.0/0.0))); + as.matrix(max(X)==(-1.0/0.0))), + as.matrix(is.nan(mean(X)))), + as.matrix(is.na(sd(X)))); -writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); +writeMM(as(R, "CsparseMatrix"), paste(args[2], "R", sep="")); http://git-wip-us.apache.org/repos/asf/systemml/blob/7e11deaa/src/test/scripts/functions/misc/ZeroMatrix_Aggregates.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/misc/ZeroMatrix_Aggregates.dml b/src/test/scripts/functions/misc/ZeroMatrix_Aggregates.dml index 690c7b6..f26f516 100644 --- a/src/test/scripts/functions/misc/ZeroMatrix_Aggregates.dml +++ b/src/test/scripts/functions/misc/ZeroMatrix_Aggregates.dml @@ -21,11 +21,12 @@ X = matrix(0, $1, 0); -print(min(X)) -R = rbind( - as.matrix(sum(X)==0), - as.matrix(min(X)==(1.0/0.0)), - as.matrix(max(X)==(-1.0/0.0)) -); +# nary rbind not applicable because not supported in MR +R = rbind(rbind(rbind(rbind( + as.matrix(sum(X)==0), # 0 + as.matrix(min(X)==(1.0/0.0))), # INF + as.matrix(max(X)==(-1.0/0.0))), # -INF + as.matrix(mean(X)!=mean(X))), # NaN + as.matrix(sd(X)!=sd(X))); # NaN write(R, $2);
