Repository: systemml
Updated Branches:
  refs/heads/master 7cc70b669 -> e42133fec


[SYSTEMML-1733] Fix correctness seq w/ small increment (num stability)

This patch fixes correctness issues (wrong output dimensions) of seq
with very small increment values in the compiler and runtime. The
original formulation of "1 + floor((to-from)/incr)" was prone to
round-off errors for very small from and increment values due to the
subtraction of values of very different magnitude. We now use a more
formulation that is more robust in that regard. This patch also
consolidates all compiler and runtime calls via a new primitive as well
as fixes a related issue of the recompiler. 


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/10c57fd6
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/10c57fd6
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/10c57fd6

Branch: refs/heads/master
Commit: 10c57fd65187629e083879994ac820e08d53a771
Parents: 70f8f14
Author: Matthias Boehm <[email protected]>
Authored: Fri Jun 23 01:13:31 2017 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Sat Jun 24 13:50:35 2017 -0700

----------------------------------------------------------------------
 .../java/org/apache/sysml/hops/DataGenOp.java   |  4 ++-
 .../apache/sysml/hops/recompile/Recompiler.java |  5 +--
 .../sysml/parser/BuiltinFunctionExpression.java |  5 +--
 .../instructions/spark/RandSPInstruction.java   | 27 +++++++---------
 .../apache/sysml/runtime/matrix/DataGenMR.java  |  3 +-
 .../runtime/matrix/data/LibMatrixDatagen.java   |  2 +-
 .../sysml/runtime/util/UtilFunctions.java       | 34 +++++++++++++-------
 7 files changed, 46 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/hops/DataGenOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/DataGenOp.java 
b/src/main/java/org/apache/sysml/hops/DataGenOp.java
index 3b3dc44..3f29358 100644
--- a/src/main/java/org/apache/sysml/hops/DataGenOp.java
+++ b/src/main/java/org/apache/sysml/hops/DataGenOp.java
@@ -37,6 +37,7 @@ import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.parser.Statement;
 import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
+import org.apache.sysml.runtime.util.UtilFunctions;
 
 /**
  * A DataGenOp can be rand (or matrix constructor), sequence, and sample -
@@ -344,7 +345,8 @@ public class DataGenOp extends Hop implements 
MultiThreadedHop
                        }
                        
                        if ( fromKnown && toKnown && incrKnown ) {
-                               setDim1(1 + 
(long)Math.floor(((double)(to-from))/incr));
+                               //TODO fix parser exception handling and enable 
check by default
+                               setDim1(UtilFunctions.getSeqLength(from, to, 
incr, false));
                                setDim2(1);
                                _incr = incr;
                        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java 
b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
index f156182..15376ae 100644
--- a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
+++ b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
@@ -101,6 +101,7 @@ import org.apache.sysml.runtime.matrix.data.FrameBlock;
 import org.apache.sysml.runtime.matrix.data.InputInfo;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.util.MapReduceTool;
+import org.apache.sysml.runtime.util.UtilFunctions;
 import org.apache.sysml.utils.Explain;
 import org.apache.sysml.utils.Explain.ExplainType;
 import org.apache.sysml.utils.JSONHelper;
@@ -1569,11 +1570,11 @@ public class Recompiler
                                
                                //special case increment 
                                if ( from!=Double.MAX_VALUE && 
to!=Double.MAX_VALUE ) {
-                                       incr = ( from >= to && incr==1 ) ? -1.0 
: 1.0;
+                                       incr *= ((from > to && incr > 0) || 
(from < to && incr < 0)) ? -1.0 : 1.0;
                                }
                                
                                if ( from!=Double.MAX_VALUE && 
to!=Double.MAX_VALUE && incr!=Double.MAX_VALUE ) {
-                                       d.setDim1( 1 + 
(long)Math.floor((to-from)/incr) );
+                                       d.setDim1( 
UtilFunctions.getSeqLength(from, to, incr) );
                                        d.setDim2( 1 );
                                        d.setIncrementValue( incr );
                                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java 
b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
index d133ff2..c906583 100644
--- a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
+++ b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
@@ -25,6 +25,7 @@ import java.util.HashSet;
 
 import org.apache.sysml.parser.LanguageException.LanguageErrorCodes;
 import org.apache.sysml.runtime.util.ConvolutionUtils;
+import org.apache.sysml.runtime.util.UtilFunctions;
 
 public class BuiltinFunctionExpression extends DataIdentifier 
 {
@@ -1002,8 +1003,8 @@ public class BuiltinFunctionExpression extends 
DataIdentifier
                                
                                // Both end points of the range must included 
i.e., [from,to] both inclusive.
                                // Note that, "to" is included only if 
(to-from) is perfectly divisible by incr
-                               // For example, seq(0,1,0.5) produces (0.0 0.5 
1.0) whereas seq(0,1,0.6) produces only (0.0 0.6) but not (0.0 0.6 1.0) 
-                               dim1 = 1 + (long)Math.floor((to-from)/incr); 
+                               // For example, seq(0,1,0.5) produces (0.0 0.5 
1.0) whereas seq(0,1,0.6) produces only (0.0 0.6) but not (0.0 0.6 1.0)
+                               dim1 = UtilFunctions.getSeqLength(from, to, 
incr);
                        }
                        output.setDataType(DataType.MATRIX);
                        output.setValueType(ValueType.DOUBLE);

http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
index b075076..e96cd63 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
@@ -451,7 +451,7 @@ public class RandSPInstruction extends UnarySPInstruction
                
                //step 1: offset generation 
                JavaRDD<Double> offsetsRDD = null;
-               long nnz = (long) Math.abs(Math.round((seq_to - 
seq_from)/seq_incr)) + 1;
+               long nnz = UtilFunctions.getSeqLength(seq_from, seq_to, 
seq_incr);
                double totalSize = 
OptimizerUtils.estimatePartitionedSizeExactSparsity( nnz, 1, rowsInBlock, 
                                colsInBlock, nnz); //overestimate for on disk, 
ensures hdfs block per partition
                double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
@@ -806,32 +806,27 @@ public class RandSPInstruction extends UnarySPInstruction
        {
                private static final long serialVersionUID = 
5779681055705756965L;
                
-               private int _brlen; 
-               private double _global_seq_start;
-               private double _global_seq_end; 
-               private double _seq_incr;
-               
+               private final double _global_seq_start;
+               private final double _global_seq_end;
+               private final double _seq_incr;
+               private final int _brlen;
                
                public GenerateSequenceBlock(int brlen, double 
global_seq_start, double global_seq_end, double seq_incr) {
-                       _brlen = brlen;
                        _global_seq_start = global_seq_start;
                        _global_seq_end = global_seq_end;
                        _seq_incr = seq_incr;
+                       _brlen = brlen;
                }
 
                @Override
                public Tuple2<MatrixIndexes, MatrixBlock> call(Double seq_from) 
                        throws Exception 
                {
-                       double seq_to;
-                       if(_seq_incr > 0) {
-                               seq_to = Math.min(_global_seq_end, seq_from + 
_seq_incr*(_brlen-1));
-                       }
-                       else {
-                               seq_to = Math.max(_global_seq_end, seq_from + 
_seq_incr*(_brlen+1));
-                       }
-                       long globalRow = (long) 
((seq_from-_global_seq_start)/_seq_incr + 1);
-                       long rowIndex = (long) 
Math.ceil((double)globalRow/(double)_brlen);
+                       double seq_to = (_seq_incr > 0) ?
+                               Math.min(_global_seq_end, seq_from + 
_seq_incr*(_brlen-1)) :
+                               Math.max(_global_seq_end, seq_from + 
_seq_incr*(_brlen+1));
+                       long globalRow = 
(long)Math.round((seq_from-_global_seq_start)/_seq_incr)+1;                    
+                       long rowIndex = 
UtilFunctions.computeBlockIndex(globalRow, _brlen);
                        
                        MatrixIndexes indx = new MatrixIndexes(rowIndex, 1);
                        MatrixBlock blk = MatrixBlock.seqOperations(seq_from, 
seq_to, _seq_incr);

http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/runtime/matrix/DataGenMR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/DataGenMR.java 
b/src/main/java/org/apache/sysml/runtime/matrix/DataGenMR.java
index 2aecec2..14ca825 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/DataGenMR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/DataGenMR.java
@@ -61,6 +61,7 @@ import 
org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
 import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.ConvertTarget;
 import 
org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups;
 import org.apache.sysml.runtime.util.MapReduceTool;
+import org.apache.sysml.runtime.util.UtilFunctions;
 import org.apache.sysml.yarn.DMLAppMasterUtils;
 import org.apache.sysml.yarn.ropt.YarnClusterAnalyzer;
 
@@ -206,7 +207,7 @@ public class DataGenMR
                                        throw new DMLRuntimeException("Wrong 
sign for the increment in a call to seq()");
                                
                                // Compute the number of rows in the sequence
-                               long numrows = 1 + 
(long)Math.floor((to-from)/incr);
+                               long numrows = UtilFunctions.getSeqLength(from, 
to, incr);
                                if ( rlens[i] > 0 ) {
                                        if ( numrows != rlens[i] )
                                                throw new 
DMLRuntimeException("Unexpected error while processing sequence instruction. 
Expected number of rows does not match given number: " + rlens[i] + " != " + 
numrows);

http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
index bb0a9a8..4dc9460 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
@@ -393,7 +393,7 @@ public class LibMatrixDatagen
                        throw new DMLRuntimeException("Wrong sequence 
increment: from="+from+", to="+to+ ", incr="+incr);
                
                //prepare output matrix
-               int rows = 1 + (int)Math.floor((to-from)/incr);
+               int rows = (int) UtilFunctions.getSeqLength(from, to, incr);
                int cols = 1; // sequence vector always dense
                out.reset(rows, cols, false);
                out.allocateDenseBlock();

http://git-wip-us.apache.org/repos/asf/systemml/blob/10c57fd6/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java 
b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
index 42863a8..f76d37b 100644
--- a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
@@ -284,25 +284,37 @@ public class UtilFunctions
                return ret;
        }
        
-       public static int toInt( double val )
-       {
+       public static int toInt( double val ) {
                return (int) Math.floor( val + DOUBLE_EPS );
        }
        
-       public static long toLong( double val )
-       {
+       public static long toLong( double val ) {
                return (long) Math.floor( val + DOUBLE_EPS );
        }
        
-       public static int toInt(Object obj)
-       {
-               if( obj instanceof Long )
-                       return ((Long)obj).intValue();
-               else
-                       return ((Integer)obj).intValue();
+       public static int toInt(Object obj) {
+               return (obj instanceof Long) ?
+                       ((Long)obj).intValue() : ((Integer)obj).intValue();
+       }
+       
+       public static long getSeqLength(double from, double to, double incr) {
+               return getSeqLength(from, to, incr, true);
+       }
+       
+       public static long getSeqLength(double from, double to, double incr, 
boolean check) {
+               //Computing the length of a sequence with 1 + 
floor((to-from)/incr) 
+               //can lead to incorrect results due to round-off errors in case 
of 
+               //a very small increment. Hence, we use a different formulation 
+               //that exhibits better numerical stability by avoiding the 
subtraction
+               //of numbers of different magnitude.
+               if( check && (Double.isNaN(from) || Double.isNaN(to) || 
Double.isNaN(incr) 
+                       || (from > to && incr > 0) || (from < to && incr < 0)) 
) {
+                       throw new RuntimeException("Invalid seq parameters: 
("+from+", "+to+", "+incr+")");
+               }
+               return 1L + (long) Math.floor(to/incr - from/incr);
        }
        
-       public static int roundToNext(int val, int factor) {
+       public static int roundToNext(int val, int factor) {
                //round up to next non-zero multiple of factor
                int pval = Math.max(val, factor);
                return ((pval + factor-1) / factor) * factor;

Reply via email to