Repository: incubator-systemml
Updated Branches:
  refs/heads/master 31d2cda55 -> 42ebc9620


[SYSTEMML-540] [SYSTEMML-515] Allow an expression for sparsity

- This PR also improves the performance of dropout.

Closes #351.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/42ebc962
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/42ebc962
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/42ebc962

Branch: refs/heads/master
Commit: 42ebc9620e62fc7458c2d7177439e61569b89b9e
Parents: 31d2cda
Author: Niketan Pansare <[email protected]>
Authored: Thu Jan 19 12:07:22 2017 -0800
Committer: Niketan Pansare <[email protected]>
Committed: Thu Jan 19 12:07:22 2017 -0800

----------------------------------------------------------------------
 scripts/staging/SystemML-NN/nn/layers/dropout.dml    | 10 ++++++++--
 src/main/java/org/apache/sysml/hops/DataGenOp.java   | 12 ++++++++----
 src/main/java/org/apache/sysml/lops/DataGen.java     | 15 +++++++--------
 .../java/org/apache/sysml/parser/DataExpression.java |  5 +----
 .../instructions/cp/DataGenCPInstruction.java        |  5 ++++-
 .../instructions/spark/RandSPInstruction.java        |  5 ++++-
 6 files changed, 32 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/42ebc962/scripts/staging/SystemML-NN/nn/layers/dropout.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/dropout.dml 
b/scripts/staging/SystemML-NN/nn/layers/dropout.dml
index 6c0b0d0..6b46305 100644
--- a/scripts/staging/SystemML-NN/nn/layers/dropout.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/dropout.dml
@@ -42,10 +42,16 @@ forward = function(matrix[double] X, double p, int seed)
    *  - out: Ouptuts, of same shape as X.
    *  - mask: Dropout mask used to compute the output.
    */
+  # Normally, we might use something like
+  #    `mask = rand(rows=nrow(X), cols=ncol(X), min=0, max=1, seed=seed) <= p`
+  # to create a dropout mask.  Fortunately, SystemML has a `sparsity` 
parameter on
+  # the `rand` function that allows use to create a mask directly.
   if (seed == -1) {
-    seed = as.integer(floor(as.scalar(rand(rows=1, cols=1, min=1, 
max=100000))))
+       mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p)
+  }
+  else {
+       mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p, 
seed=seed)
   }
-  mask = rand(rows=nrow(X), cols=ncol(X), min=0, max=1, seed=seed) <= p
   out = X * mask / p
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/42ebc962/src/main/java/org/apache/sysml/hops/DataGenOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/DataGenOp.java 
b/src/main/java/org/apache/sysml/hops/DataGenOp.java
index d560539..c9eecb7 100644
--- a/src/main/java/org/apache/sysml/hops/DataGenOp.java
+++ b/src/main/java/org/apache/sysml/hops/DataGenOp.java
@@ -100,8 +100,10 @@ public class DataGenOp extends Hop implements 
MultiThreadedHop
                        _paramIndexMap.put(s, index);
                        index++;
                }
-               if ( mthd == DataGenMethod.RAND )
-                       _sparsity = 
Double.valueOf(((LiteralOp)inputParameters.get(DataExpression.RAND_SPARSITY)).getName());
+               
+               Hop sparsityOp = 
inputParameters.get(DataExpression.RAND_SPARSITY);
+               if ( mthd == DataGenMethod.RAND && sparsityOp instanceof 
LiteralOp)
+                       _sparsity = 
Double.valueOf(((LiteralOp)sparsityOp).getName());
                
                //generate base dir
                String scratch = ConfigurationManager.getScratchSpace();
@@ -199,7 +201,7 @@ public class DataGenOp extends Hop implements 
MultiThreadedHop
        {               
                double ret = 0;
                
-               if ( _op == DataGenMethod.RAND ) {
+               if ( _op == DataGenMethod.RAND && _sparsity != -1 ) {
                        if( hasConstantValue(0.0) ) { //if empty block
                                ret = 
OptimizerUtils.estimateSizeEmptyBlock(dim1, dim2);
                        }
@@ -237,7 +239,7 @@ public class DataGenOp extends Hop implements 
MultiThreadedHop
                {
                        long dim1 = 
computeDimParameterInformation(getInput().get(_paramIndexMap.get(DataExpression.RAND_ROWS)),
 memo);
                        long dim2 = 
computeDimParameterInformation(getInput().get(_paramIndexMap.get(DataExpression.RAND_COLS)),
 memo);
-                       long nnz = (long)(_sparsity * dim1 * dim2);
+                       long nnz = _sparsity >= 0 ? (long)(_sparsity * dim1 * 
dim2) : -1;
                        if( dim1>0 && dim2>0 )
                                return new long[]{ dim1, dim2, nnz };
                }
@@ -355,6 +357,8 @@ public class DataGenOp extends Hop implements 
MultiThreadedHop
                        _nnz = 0;
                else if ( dimsKnown() && _sparsity>=0 ) //general case
                        _nnz = (long) (_sparsity * _dim1 * _dim2);
+               else
+                       _nnz = -1;
        }
        
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/42ebc962/src/main/java/org/apache/sysml/lops/DataGen.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/DataGen.java 
b/src/main/java/org/apache/sysml/lops/DataGen.java
index 2321b19..d30efe2 100644
--- a/src/main/java/org/apache/sysml/lops/DataGen.java
+++ b/src/main/java/org/apache/sysml/lops/DataGen.java
@@ -199,12 +199,11 @@ public class DataGen extends Lop
                sb.append(iLop.prepScalarLabel());
                sb.append(OPERAND_DELIMITOR);
                
-               iLop = 
_inputParams.get(DataExpression.RAND_SPARSITY.toString()); //no variable support
+               iLop = 
_inputParams.get(DataExpression.RAND_SPARSITY.toString());
                if (iLop.isVariable())
-                       throw new LopsException(printErrorLocation()
-                                       + "Parameter " + 
DataExpression.RAND_SPARSITY
-                                       + " must be a literal for a Rand 
operation.");
-               sb.append(iLop.getOutputParameters().getLabel()); 
+                       sb.append(iLop.prepScalarLabel());
+               else
+                       sb.append(iLop.getOutputParameters().getLabel()); 
                sb.append(OPERAND_DELIMITOR);
                
                iLop = _inputParams.get(DataExpression.RAND_SEED.toString());   
        
@@ -442,9 +441,9 @@ public class DataGen extends Lop
                
                iLop = 
_inputParams.get(DataExpression.RAND_SPARSITY.toString()); //no variable support
                if (iLop.isVariable())
-                       throw new LopsException(this.printErrorLocation() + 
"Parameter " 
-                                       + DataExpression.RAND_SPARSITY + " must 
be a literal for a Rand operation.");
-               sb.append( iLop.getOutputParameters().getLabel() );
+                       sb.append(iLop.prepScalarLabel());
+               else
+                       sb.append( iLop.getOutputParameters().getLabel() );
                sb.append( OPERAND_DELIMITOR );
                
                iLop = _inputParams.get(DataExpression.RAND_SEED.toString()); 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/42ebc962/src/main/java/org/apache/sysml/parser/DataExpression.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DataExpression.java 
b/src/main/java/org/apache/sysml/parser/DataExpression.java
index f70c1b9..451a2df 100644
--- a/src/main/java/org/apache/sysml/parser/DataExpression.java
+++ b/src/main/java/org/apache/sysml/parser/DataExpression.java
@@ -1162,10 +1162,7 @@ public class DataExpression extends DataIdentifier
                                raiseValidateError("for Rand statement " + 
RAND_MIN + " has incorrect value type", conditional);
                        }
                        
-                       //parameters w/o support for variable inputs (requires 
double/int or string constants)
-                       if (!(getVarParam(RAND_SPARSITY) instanceof 
DoubleIdentifier || getVarParam(RAND_SPARSITY) instanceof IntIdentifier)) {
-                               raiseValidateError("for Rand statement " + 
RAND_SPARSITY + " has incorrect value type", conditional);
-                       }
+                       // Since sparsity can be arbitrary expression 
(SYSTEMML-515), no validation check for DoubleIdentifier/IntIdentifier required.
                        
                        if (!(getVarParam(RAND_PDF) instanceof 
StringIdentifier)) {
                                raiseValidateError("for Rand statement " + 
RAND_PDF + " has incorrect value type", conditional);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/42ebc962/src/main/java/org/apache/sysml/runtime/instructions/cp/DataGenCPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/DataGenCPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/DataGenCPInstruction.java
index e7bb0cf..7cd4f77 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/DataGenCPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/DataGenCPInstruction.java
@@ -212,7 +212,10 @@ public class DataGenCPInstruction extends 
UnaryCPInstruction
                        maxValue = Double.valueOf(s[6]).doubleValue();
                }
                
-               double sparsity = Double.parseDouble(s[7]);
+               double sparsity = -1;
+               if (!s[7].contains( Lop.VARIABLE_NAME_PLACEHOLDER)) {
+                       sparsity = Double.valueOf(s[7]);
+               }
                
                        long seed = DataGenOp.UNSPECIFIED_SEED;
                        if( !s[8].contains( Lop.VARIABLE_NAME_PLACEHOLDER)){

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/42ebc962/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
index 8df8b26..1e70526 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
@@ -245,7 +245,10 @@ public class RandSPInstruction extends UnarySPInstruction
                        maxValue = Double.valueOf(s[6]).doubleValue();
                }
                
-               double sparsity = Double.parseDouble(s[7]);
+               double sparsity = -1;
+               if (!s[7].contains( Lop.VARIABLE_NAME_PLACEHOLDER)) {
+                       sparsity = Double.valueOf(s[7]);
+               }
                        
                long seed = DataGenOp.UNSPECIFIED_SEED;
                        if (!s[8].contains( Lop.VARIABLE_NAME_PLACEHOLDER)) {

Reply via email to