Repository: systemml Updated Branches: refs/heads/master 7cc70b669 -> e42133fec
[SYSTEMML-1733] Fix correctness seq w/ small increment (num stability) This patch fixes correctness issues (wrong output dimensions) of seq with very small increment values in the compiler and runtime. The original formulation of "1 + floor((to-from)/incr)" was prone to round-off errors for very small from and increment values due to the subtraction of values of very different magnitude. We now use a more formulation that is more robust in that regard. This patch also consolidates all compiler and runtime calls via a new primitive as well as fixes a related issue of the recompiler. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/10c57fd6 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/10c57fd6 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/10c57fd6 Branch: refs/heads/master Commit: 10c57fd65187629e083879994ac820e08d53a771 Parents: 70f8f14 Author: Matthias Boehm <[email protected]> Authored: Fri Jun 23 01:13:31 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sat Jun 24 13:50:35 2017 -0700 ---------------------------------------------------------------------- .../java/org/apache/sysml/hops/DataGenOp.java | 4 ++- .../apache/sysml/hops/recompile/Recompiler.java | 5 +-- .../sysml/parser/BuiltinFunctionExpression.java | 5 +-- .../instructions/spark/RandSPInstruction.java | 27 +++++++--------- .../apache/sysml/runtime/matrix/DataGenMR.java | 3 +- .../runtime/matrix/data/LibMatrixDatagen.java | 2 +- .../sysml/runtime/util/UtilFunctions.java | 34 +++++++++++++------- 7 files changed, 46 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/hops/DataGenOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/DataGenOp.java b/src/main/java/org/apache/sysml/hops/DataGenOp.java index 3b3dc44..3f29358 100644 --- a/src/main/java/org/apache/sysml/hops/DataGenOp.java +++ b/src/main/java/org/apache/sysml/hops/DataGenOp.java @@ -37,6 +37,7 @@ import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.parser.Statement; import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter; +import org.apache.sysml.runtime.util.UtilFunctions; /** * A DataGenOp can be rand (or matrix constructor), sequence, and sample - @@ -344,7 +345,8 @@ public class DataGenOp extends Hop implements MultiThreadedHop } if ( fromKnown && toKnown && incrKnown ) { - setDim1(1 + (long)Math.floor(((double)(to-from))/incr)); + //TODO fix parser exception handling and enable check by default + setDim1(UtilFunctions.getSeqLength(from, to, incr, false)); setDim2(1); _incr = incr; } http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java index f156182..15376ae 100644 --- a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java +++ b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java @@ -101,6 +101,7 @@ import org.apache.sysml.runtime.matrix.data.FrameBlock; import org.apache.sysml.runtime.matrix.data.InputInfo; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.util.MapReduceTool; +import org.apache.sysml.runtime.util.UtilFunctions; import org.apache.sysml.utils.Explain; import org.apache.sysml.utils.Explain.ExplainType; import org.apache.sysml.utils.JSONHelper; @@ -1569,11 +1570,11 @@ public class Recompiler //special case increment if ( from!=Double.MAX_VALUE && to!=Double.MAX_VALUE ) { - incr = ( from >= to && incr==1 ) ? -1.0 : 1.0; + incr *= ((from > to && incr > 0) || (from < to && incr < 0)) ? -1.0 : 1.0; } if ( from!=Double.MAX_VALUE && to!=Double.MAX_VALUE && incr!=Double.MAX_VALUE ) { - d.setDim1( 1 + (long)Math.floor((to-from)/incr) ); + d.setDim1( UtilFunctions.getSeqLength(from, to, incr) ); d.setDim2( 1 ); d.setIncrementValue( incr ); } http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java index d133ff2..c906583 100644 --- a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java +++ b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java @@ -25,6 +25,7 @@ import java.util.HashSet; import org.apache.sysml.parser.LanguageException.LanguageErrorCodes; import org.apache.sysml.runtime.util.ConvolutionUtils; +import org.apache.sysml.runtime.util.UtilFunctions; public class BuiltinFunctionExpression extends DataIdentifier { @@ -1002,8 +1003,8 @@ public class BuiltinFunctionExpression extends DataIdentifier // Both end points of the range must included i.e., [from,to] both inclusive. // Note that, "to" is included only if (to-from) is perfectly divisible by incr - // For example, seq(0,1,0.5) produces (0.0 0.5 1.0) whereas seq(0,1,0.6) produces only (0.0 0.6) but not (0.0 0.6 1.0) - dim1 = 1 + (long)Math.floor((to-from)/incr); + // For example, seq(0,1,0.5) produces (0.0 0.5 1.0) whereas seq(0,1,0.6) produces only (0.0 0.6) but not (0.0 0.6 1.0) + dim1 = UtilFunctions.getSeqLength(from, to, incr); } output.setDataType(DataType.MATRIX); output.setValueType(ValueType.DOUBLE); http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java index b075076..e96cd63 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java @@ -451,7 +451,7 @@ public class RandSPInstruction extends UnarySPInstruction //step 1: offset generation JavaRDD<Double> offsetsRDD = null; - long nnz = (long) Math.abs(Math.round((seq_to - seq_from)/seq_incr)) + 1; + long nnz = UtilFunctions.getSeqLength(seq_from, seq_to, seq_incr); double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity( nnz, 1, rowsInBlock, colsInBlock, nnz); //overestimate for on disk, ensures hdfs block per partition double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize(); @@ -806,32 +806,27 @@ public class RandSPInstruction extends UnarySPInstruction { private static final long serialVersionUID = 5779681055705756965L; - private int _brlen; - private double _global_seq_start; - private double _global_seq_end; - private double _seq_incr; - + private final double _global_seq_start; + private final double _global_seq_end; + private final double _seq_incr; + private final int _brlen; public GenerateSequenceBlock(int brlen, double global_seq_start, double global_seq_end, double seq_incr) { - _brlen = brlen; _global_seq_start = global_seq_start; _global_seq_end = global_seq_end; _seq_incr = seq_incr; + _brlen = brlen; } @Override public Tuple2<MatrixIndexes, MatrixBlock> call(Double seq_from) throws Exception { - double seq_to; - if(_seq_incr > 0) { - seq_to = Math.min(_global_seq_end, seq_from + _seq_incr*(_brlen-1)); - } - else { - seq_to = Math.max(_global_seq_end, seq_from + _seq_incr*(_brlen+1)); - } - long globalRow = (long) ((seq_from-_global_seq_start)/_seq_incr + 1); - long rowIndex = (long) Math.ceil((double)globalRow/(double)_brlen); + double seq_to = (_seq_incr > 0) ? + Math.min(_global_seq_end, seq_from + _seq_incr*(_brlen-1)) : + Math.max(_global_seq_end, seq_from + _seq_incr*(_brlen+1)); + long globalRow = (long)Math.round((seq_from-_global_seq_start)/_seq_incr)+1; + long rowIndex = UtilFunctions.computeBlockIndex(globalRow, _brlen); MatrixIndexes indx = new MatrixIndexes(rowIndex, 1); MatrixBlock blk = MatrixBlock.seqOperations(seq_from, seq_to, _seq_incr); http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/runtime/matrix/DataGenMR.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/DataGenMR.java b/src/main/java/org/apache/sysml/runtime/matrix/DataGenMR.java index 2aecec2..14ca825 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/DataGenMR.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/DataGenMR.java @@ -61,6 +61,7 @@ import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration; import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.ConvertTarget; import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups; import org.apache.sysml.runtime.util.MapReduceTool; +import org.apache.sysml.runtime.util.UtilFunctions; import org.apache.sysml.yarn.DMLAppMasterUtils; import org.apache.sysml.yarn.ropt.YarnClusterAnalyzer; @@ -206,7 +207,7 @@ public class DataGenMR throw new DMLRuntimeException("Wrong sign for the increment in a call to seq()"); // Compute the number of rows in the sequence - long numrows = 1 + (long)Math.floor((to-from)/incr); + long numrows = UtilFunctions.getSeqLength(from, to, incr); if ( rlens[i] > 0 ) { if ( numrows != rlens[i] ) throw new DMLRuntimeException("Unexpected error while processing sequence instruction. Expected number of rows does not match given number: " + rlens[i] + " != " + numrows); http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java index bb0a9a8..4dc9460 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java @@ -393,7 +393,7 @@ public class LibMatrixDatagen throw new DMLRuntimeException("Wrong sequence increment: from="+from+", to="+to+ ", incr="+incr); //prepare output matrix - int rows = 1 + (int)Math.floor((to-from)/incr); + int rows = (int) UtilFunctions.getSeqLength(from, to, incr); int cols = 1; // sequence vector always dense out.reset(rows, cols, false); out.allocateDenseBlock(); http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java index 42863a8..f76d37b 100644 --- a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java +++ b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java @@ -284,25 +284,37 @@ public class UtilFunctions return ret; } - public static int toInt( double val ) - { + public static int toInt( double val ) { return (int) Math.floor( val + DOUBLE_EPS ); } - public static long toLong( double val ) - { + public static long toLong( double val ) { return (long) Math.floor( val + DOUBLE_EPS ); } - public static int toInt(Object obj) - { - if( obj instanceof Long ) - return ((Long)obj).intValue(); - else - return ((Integer)obj).intValue(); + public static int toInt(Object obj) { + return (obj instanceof Long) ? + ((Long)obj).intValue() : ((Integer)obj).intValue(); + } + + public static long getSeqLength(double from, double to, double incr) { + return getSeqLength(from, to, incr, true); + } + + public static long getSeqLength(double from, double to, double incr, boolean check) { + //Computing the length of a sequence with 1 + floor((to-from)/incr) + //can lead to incorrect results due to round-off errors in case of + //a very small increment. Hence, we use a different formulation + //that exhibits better numerical stability by avoiding the subtraction + //of numbers of different magnitude. + if( check && (Double.isNaN(from) || Double.isNaN(to) || Double.isNaN(incr) + || (from > to && incr > 0) || (from < to && incr < 0)) ) { + throw new RuntimeException("Invalid seq parameters: ("+from+", "+to+", "+incr+")"); + } + return 1L + (long) Math.floor(to/incr - from/incr); } - public static int roundToNext(int val, int factor) { + public static int roundToNext(int val, int factor) { //round up to next non-zero multiple of factor int pval = Math.max(val, factor); return ((pval + factor-1) / factor) * factor;
