Repository: incubator-systemml Updated Branches: refs/heads/master 41c513151 -> 4cd982917
[SYSTEMML-540] Support bias_add operation - Also added an external UDF for faster SGD Nesterov's update. However since the performance improvement was only by 7%, I decided not to introduce a fused operator for it. We can revisit this in a later PR. - bias_add should work for both CP and GPU. Closes #328. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4cd98291 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4cd98291 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4cd98291 Branch: refs/heads/master Commit: 4cd98291780691082be07b1a817bd71c1024ed62 Parents: 41c5131 Author: Niketan Pansare <[email protected]> Authored: Fri Jan 6 10:20:30 2017 -0800 Committer: Niketan Pansare <[email protected]> Committed: Fri Jan 6 10:20:30 2017 -0800 ---------------------------------------------------------------------- .../SystemML-NN/nn/layers/conv_builtin.dml | 5 +- .../org/apache/sysml/hops/ConvolutionOp.java | 12 +- .../sysml/parser/BuiltinFunctionExpression.java | 3 +- .../apache/sysml/udf/lib/SGDNesterovUpdate.java | 162 +++++++++++++++++++ 4 files changed, 170 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4cd98291/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml index f7bbd57..3113ccf 100644 --- a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml +++ b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml @@ -60,6 +60,7 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, */ N = nrow(X) F = nrow(W) + # TODO: We should eliminate this in a seperate PR Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1) Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1) @@ -68,9 +69,7 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, stride=[strideh,stridew], padding=[padh,padw]) # Add bias term to each output filter - # Note: Biases vector b is replicated to (F, Hout*Wout) first. - ones = matrix(1, rows=1, cols=Hout*Wout) - out = out + matrix(b %*% ones, rows=1, cols=F*Hout*Wout) + out = bias_add(out, b) } backward = function(matrix[double] dout, int Hout, int Wout, http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4cd98291/src/main/java/org/apache/sysml/hops/ConvolutionOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java index a00feb7..f1efbb1 100644 --- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java +++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java @@ -181,13 +181,11 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop if(op == ConvOp.BIAS_ADD) { MatrixCharacteristics[] mc = memo.getAllInputStats(getInput()); - if( mc[0].rowsKnown() && mc[0].colsKnown() ) { - ret = new long[3]; - ret[0] = mc[0].getRows(); - ret[1] = mc[0].getCols(); - ret[2] = -1; - return ret; - } + ret = new long[3]; + ret[0] = mc[0].rowsKnown() ? mc[0].getRows() : -1; + ret[1] = mc[0].colsKnown() ? mc[0].getCols() : -1; + ret[2] = -1; + return ret; } ConvolutionParameters params; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4cd98291/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java index 6cd53fd..494b919 100644 --- a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java +++ b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java @@ -1105,12 +1105,11 @@ public class BuiltinFunctionExpression extends DataIdentifier case BIAS_ADD: { - Identifier input_id = getFirstExpr().getOutput(); Expression input = _args[0]; Expression bias = _args[1]; output.setDataType(DataType.MATRIX); output.setValueType(ValueType.DOUBLE); - output.setDimensions(input_id.getDim1(), input_id.getDim2()); + output.setDimensions(input.getOutput().getDim1(), input.getOutput().getDim2()); output.setBlockDimensions(input.getOutput().getRowsInBlock(), input.getOutput().getColumnsInBlock()); checkMatrixParam(input); checkMatrixParam(bias); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4cd98291/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java b/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java new file mode 100644 index 0000000..fa3bd0e --- /dev/null +++ b/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.udf.lib; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Random; + +import org.apache.sysml.runtime.controlprogram.caching.CacheException; +import org.apache.sysml.runtime.matrix.data.IJV; +import org.apache.sysml.runtime.matrix.data.InputInfo; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.data.OutputInfo; +import org.apache.sysml.udf.FunctionParameter; +import org.apache.sysml.udf.Matrix; +import org.apache.sysml.udf.PackageFunction; +import org.apache.sysml.udf.Scalar; +import org.apache.sysml.udf.Matrix.ValueType; + +/** + * Use this class to perform an SGD update with Nesterov momentum in CP. + * Assumption: the input batch fits in CP (which is also the assumption of most deep learning systems). + * + * Usage: + * update_nesterov = externalFunction(matrix[double] X, matrix[double] dX, double lr, double mu, matrix[double] v) return (matrix[double] X, matrix[double] v) implemented in (classname="org.apache.sysml.udf.lib.SGDNesterovUpdate",exectype="mem"); + * [X, v] = update_nesterov(X, dX, lr, mu, v); + * + * + * This class eliminates the unnecessary instruction overhead as well as memory pressure. + * + */ +public class SGDNesterovUpdate extends PackageFunction { + private static final long serialVersionUID = -3905212831582648882L; + + private Matrix updatedX; + private Matrix updatedV; + private Random rand = new Random(); + + @Override + public int getNumFunctionOutputs() { + return 2; + } + + @Override + public FunctionParameter getFunctionOutput(int pos) { + if(pos == 0) + return updatedX; + else if(pos == 1) + return updatedV; + + throw new RuntimeException("Invalid function output being requested"); + } + + @Override + public void execute() { + try { + MatrixBlock X = ((Matrix) getFunctionInput(0)).getMatrixObject().acquireRead(); + MatrixBlock dX = ((Matrix) getFunctionInput(1)).getMatrixObject().acquireRead(); + double lr = Double.parseDouble(((Scalar)getFunctionInput(2)).getValue()); + double mu = Double.parseDouble(((Scalar)getFunctionInput(3)).getValue()); + MatrixBlock v = ((Matrix) getFunctionInput(4)).getMatrixObject().acquireRead(); + + // v = mu * v - lr * dX + updatedV = new Matrix( "tmp_" + rand.nextLong(), v.getNumRows(), v.getNumColumns(), ValueType.Double ); + MatrixBlock updatedVMB = allocateDenseMatrixBlock(updatedV); + double [] updatedVData = updatedVMB.getDenseBlock(); + multiplyByConstant(v, mu, updatedVData); + multiplyByConstant(dX, -lr, updatedVData); + updatedVMB.setNonZeros(-1); // rather than updatedVMB.recomputeNonZeros(); + updatedV.setMatrixDoubleArray(updatedVMB, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo); + + // X = X - mu * v_prev + (1 + mu) * v + updatedX = new Matrix( "tmp_" + rand.nextLong(), X.getNumRows(), X.getNumColumns(), ValueType.Double ); + MatrixBlock updatedXMB = allocateDenseMatrixBlock(updatedX); + double [] updatedXData = updatedXMB.getDenseBlock(); + copy(X, updatedXData); + multiplyByConstant(v, -mu, updatedXData); + multiplyByConstant(updatedVData, 1+mu, updatedXData); + updatedXMB.setNonZeros(-1); // rather than updatedXMB.recomputeNonZeros(); + updatedX.setMatrixDoubleArray(updatedXMB, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo); + + ((Matrix) getFunctionInput(0)).getMatrixObject().release(); + ((Matrix) getFunctionInput(1)).getMatrixObject().release(); + ((Matrix) getFunctionInput(4)).getMatrixObject().release(); + } catch (CacheException e) { + throw new RuntimeException("Exception while executing SGDNesterovUpdate", e); + } catch (IOException e) { + throw new RuntimeException("Exception while executing SGDNesterovUpdate", e); + } + } + + private MatrixBlock allocateDenseMatrixBlock(Matrix mat) { + int rows = (int) mat.getNumRows(); + int cols = (int) mat.getNumCols(); + MatrixBlock mb = new MatrixBlock(rows, cols, false); + mb.allocateDenseBlock(); + return mb; + } + + + // out += constant*in + private void multiplyByConstant(double [] in, double constant, double [] out) { + for(int i = 0; i < out.length; i++) { + out[i] += in[i]*constant; + } + } + + // out += constant*in + private void multiplyByConstant(MatrixBlock in, double constant, double [] out) { + if(in.isInSparseFormat()) { + Iterator<IJV> iter = in.getSparseBlockIterator(); + while(iter.hasNext()) { + IJV ijv = iter.next(); + out[ijv.getI()*ijv.getJ()] += ijv.getV() * constant; + } + } + else { + double [] denseBlock = in.getDenseBlock(); + if(denseBlock != null) { + // If not empty block + for(int i = 0; i < out.length; i++) { + out[i] += denseBlock[i]*constant; + } + } + } + } + + // Assumption dest is zero-ed out. + private void copy(MatrixBlock src, double [] dest) { + if(src.isInSparseFormat()) { + Iterator<IJV> iter = src.getSparseBlockIterator(); + while(iter.hasNext()) { + IJV ijv = iter.next(); + dest[ijv.getI()*ijv.getJ()] = ijv.getV(); + } + } + else { + double [] denseBlock = src.getDenseBlock(); + if(denseBlock != null) { + // If not empty block + System.arraycopy(denseBlock, 0, dest, 0, dest.length); + } + } + } +}
