Repository: incubator-systemml
Updated Branches:
  refs/heads/master 41c513151 -> 4cd982917


[SYSTEMML-540] Support bias_add operation

- Also added an external UDF for faster SGD Nesterov's update. However
since the performance improvement was only by 7%, I decided not to
introduce a fused operator for it. We can revisit this in a later PR.

- bias_add should work for both CP and GPU.

Closes #328.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4cd98291
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4cd98291
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4cd98291

Branch: refs/heads/master
Commit: 4cd98291780691082be07b1a817bd71c1024ed62
Parents: 41c5131
Author: Niketan Pansare <[email protected]>
Authored: Fri Jan 6 10:20:30 2017 -0800
Committer: Niketan Pansare <[email protected]>
Committed: Fri Jan 6 10:20:30 2017 -0800

----------------------------------------------------------------------
 .../SystemML-NN/nn/layers/conv_builtin.dml      |   5 +-
 .../org/apache/sysml/hops/ConvolutionOp.java    |  12 +-
 .../sysml/parser/BuiltinFunctionExpression.java |   3 +-
 .../apache/sysml/udf/lib/SGDNesterovUpdate.java | 162 +++++++++++++++++++
 4 files changed, 170 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4cd98291/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml 
b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
index f7bbd57..3113ccf 100644
--- a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
@@ -60,6 +60,7 @@ forward = function(matrix[double] X, matrix[double] W, 
matrix[double] b,
    */
   N = nrow(X)
   F = nrow(W)
+  # TODO: We should eliminate this in a seperate PR
   Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
   Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)
   
@@ -68,9 +69,7 @@ forward = function(matrix[double] X, matrix[double] W, 
matrix[double] b,
                stride=[strideh,stridew], padding=[padh,padw])
 
   # Add bias term to each output filter
-  # Note: Biases vector b is replicated to (F, Hout*Wout) first.
-  ones = matrix(1, rows=1, cols=Hout*Wout)
-  out = out + matrix(b %*% ones, rows=1, cols=F*Hout*Wout)
+  out = bias_add(out, b)
 }
 
 backward = function(matrix[double] dout, int Hout, int Wout,

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4cd98291/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java 
b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
index a00feb7..f1efbb1 100644
--- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
+++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
@@ -181,13 +181,11 @@ public class ConvolutionOp extends Hop  implements 
MultiThreadedHop
                
                if(op == ConvOp.BIAS_ADD) {
                        MatrixCharacteristics[] mc = 
memo.getAllInputStats(getInput());
-                       if( mc[0].rowsKnown() && mc[0].colsKnown() ) {
-                               ret = new long[3];
-                               ret[0] = mc[0].getRows();
-                               ret[1] = mc[0].getCols();
-                               ret[2] = -1;
-                               return ret;
-                       }
+                       ret = new long[3];
+                       ret[0] = mc[0].rowsKnown() ? mc[0].getRows() : -1;
+                       ret[1] = mc[0].colsKnown() ? mc[0].getCols() : -1;
+                       ret[2] = -1;
+                       return ret;
                }
        
                ConvolutionParameters params;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4cd98291/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java 
b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
index 6cd53fd..494b919 100644
--- a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
+++ b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
@@ -1105,12 +1105,11 @@ public class BuiltinFunctionExpression extends 
DataIdentifier
                
                case BIAS_ADD:
                {
-                       Identifier input_id = getFirstExpr().getOutput();
                        Expression input = _args[0];
                        Expression bias = _args[1];
                        output.setDataType(DataType.MATRIX);
                        output.setValueType(ValueType.DOUBLE);
-                       output.setDimensions(input_id.getDim1(), 
input_id.getDim2());
+                       output.setDimensions(input.getOutput().getDim1(), 
input.getOutput().getDim2());
                        
output.setBlockDimensions(input.getOutput().getRowsInBlock(), 
input.getOutput().getColumnsInBlock());
                        checkMatrixParam(input);
                        checkMatrixParam(bias);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4cd98291/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java 
b/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java
new file mode 100644
index 0000000..fa3bd0e
--- /dev/null
+++ b/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.udf.lib;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Random;
+
+import org.apache.sysml.runtime.controlprogram.caching.CacheException;
+import org.apache.sysml.runtime.matrix.data.IJV;
+import org.apache.sysml.runtime.matrix.data.InputInfo;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.OutputInfo;
+import org.apache.sysml.udf.FunctionParameter;
+import org.apache.sysml.udf.Matrix;
+import org.apache.sysml.udf.PackageFunction;
+import org.apache.sysml.udf.Scalar;
+import org.apache.sysml.udf.Matrix.ValueType;
+
+/**
+ * Use this class to perform an SGD update with Nesterov momentum in CP.
+ * Assumption: the input batch fits in CP (which is also the assumption of 
most deep learning systems).
+ * 
+ * Usage:
+ * update_nesterov = externalFunction(matrix[double] X, matrix[double] dX, 
double lr, double mu, matrix[double] v) return (matrix[double] X, 
matrix[double] v) implemented in 
(classname="org.apache.sysml.udf.lib.SGDNesterovUpdate",exectype="mem");
+ * [X, v] = update_nesterov(X, dX, lr, mu, v);
+ * 
+ * 
+ * This class eliminates the unnecessary instruction overhead as well as 
memory pressure. 
+ * 
+ */
+public class SGDNesterovUpdate extends PackageFunction {
+       private static final long serialVersionUID = -3905212831582648882L;
+
+       private Matrix updatedX;
+       private Matrix updatedV;
+       private Random rand = new Random();
+       
+       @Override
+       public int getNumFunctionOutputs() {
+               return 2;
+       }
+
+       @Override
+       public FunctionParameter getFunctionOutput(int pos) {
+               if(pos == 0)
+                       return updatedX;
+               else if(pos == 1)
+                       return updatedV;
+               
+               throw new RuntimeException("Invalid function output being 
requested");
+       }
+
+       @Override
+       public void execute() {
+               try {
+                       MatrixBlock X = ((Matrix) 
getFunctionInput(0)).getMatrixObject().acquireRead();
+                       MatrixBlock dX = ((Matrix) 
getFunctionInput(1)).getMatrixObject().acquireRead();
+                       double lr = 
Double.parseDouble(((Scalar)getFunctionInput(2)).getValue());
+                       double mu = 
Double.parseDouble(((Scalar)getFunctionInput(3)).getValue());
+                       MatrixBlock v = ((Matrix) 
getFunctionInput(4)).getMatrixObject().acquireRead();
+                       
+                       // v = mu * v - lr * dX
+                       updatedV = new Matrix( "tmp_" + rand.nextLong(), 
v.getNumRows(), v.getNumColumns(), ValueType.Double );
+                       MatrixBlock updatedVMB = 
allocateDenseMatrixBlock(updatedV);
+                       double [] updatedVData = updatedVMB.getDenseBlock();
+                       multiplyByConstant(v, mu, updatedVData);
+                       multiplyByConstant(dX, -lr, updatedVData);
+                       updatedVMB.setNonZeros(-1); // rather than 
updatedVMB.recomputeNonZeros();
+                       updatedV.setMatrixDoubleArray(updatedVMB, 
OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
+                       
+                       // X = X - mu * v_prev + (1 + mu) * v
+                       updatedX = new Matrix( "tmp_" + rand.nextLong(), 
X.getNumRows(), X.getNumColumns(), ValueType.Double );
+                       MatrixBlock updatedXMB = 
allocateDenseMatrixBlock(updatedX);
+                       double [] updatedXData = updatedXMB.getDenseBlock();
+                       copy(X, updatedXData);
+                       multiplyByConstant(v, -mu, updatedXData);
+                       multiplyByConstant(updatedVData, 1+mu, updatedXData);
+                       updatedXMB.setNonZeros(-1); // rather than 
updatedXMB.recomputeNonZeros();
+                       updatedX.setMatrixDoubleArray(updatedXMB, 
OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
+                       
+                       ((Matrix) 
getFunctionInput(0)).getMatrixObject().release();
+                       ((Matrix) 
getFunctionInput(1)).getMatrixObject().release();
+                       ((Matrix) 
getFunctionInput(4)).getMatrixObject().release();
+               } catch (CacheException e) {
+                       throw new RuntimeException("Exception while executing 
SGDNesterovUpdate", e);
+               } catch (IOException e) {
+                       throw new RuntimeException("Exception while executing 
SGDNesterovUpdate", e);
+               }
+       }
+       
+       private MatrixBlock allocateDenseMatrixBlock(Matrix mat) {
+               int rows = (int) mat.getNumRows();
+               int cols = (int) mat.getNumCols();
+               MatrixBlock mb = new MatrixBlock(rows, cols, false);
+               mb.allocateDenseBlock();
+               return mb;
+       }
+       
+       
+       // out += constant*in
+       private void multiplyByConstant(double [] in, double constant, double 
[] out) {
+               for(int i = 0; i < out.length; i++) {
+                       out[i] += in[i]*constant;
+               }
+       }
+       
+       // out += constant*in
+       private void multiplyByConstant(MatrixBlock in, double constant, double 
[] out) {
+               if(in.isInSparseFormat()) {
+                       Iterator<IJV> iter = in.getSparseBlockIterator();
+                       while(iter.hasNext()) {
+                               IJV ijv = iter.next();
+                               out[ijv.getI()*ijv.getJ()] += ijv.getV() * 
constant;
+                       }
+               }
+               else {
+                       double [] denseBlock = in.getDenseBlock();
+                       if(denseBlock != null) {
+                               // If not empty block
+                               for(int i = 0; i < out.length; i++) {
+                                       out[i] += denseBlock[i]*constant;
+                               }
+                       }
+               }
+       }
+       
+       // Assumption dest is zero-ed out.
+       private void copy(MatrixBlock src, double [] dest) {
+               if(src.isInSparseFormat()) {
+                       Iterator<IJV> iter = src.getSparseBlockIterator();
+                       while(iter.hasNext()) {
+                               IJV ijv = iter.next();
+                               dest[ijv.getI()*ijv.getJ()] = ijv.getV();
+                       }
+               }
+               else {
+                       double [] denseBlock = src.getDenseBlock();
+                       if(denseBlock != null) {
+                               // If not empty block
+                               System.arraycopy(denseBlock, 0, dest, 0, 
dest.length);
+                       }
+               }
+       }
+}

Reply via email to