Repository: systemml Updated Branches: refs/heads/master a99cf844a -> f63b8c6fa
[SYSTEMML-540] Support softmax function on GPU via CuDNN - This API only supports dense softmax function using CuDNN's cudnnSoftmaxForward kernel. Closes #703. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/f63b8c6f Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/f63b8c6f Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/f63b8c6f Branch: refs/heads/master Commit: f63b8c6faf074dae2a33be7898858a78174bf76b Parents: a99cf84 Author: Niketan Pansare <[email protected]> Authored: Tue Dec 5 11:11:35 2017 -0800 Committer: Niketan Pansare <[email protected]> Committed: Tue Dec 5 11:11:35 2017 -0800 ---------------------------------------------------------------------- .../nn/test/compare_backends/gen_softmax.dml | 23 ++++++++++ scripts/nn/test/compare_backends/run_tests.sh | 1 + .../nn/test/compare_backends/test_softmax.dml | 25 +++++++++++ .../nn/test/compare_backends/test_softmax.sh | 43 +++++++++++++++++++ .../java/org/apache/sysml/hops/BinaryOp.java | 14 +++++- .../java/org/apache/sysml/lops/UnaryCP.java | 14 ++++-- .../instructions/GPUInstructionParser.java | 1 + .../gpu/BuiltinUnaryGPUInstruction.java | 5 ++- .../gpu/MatrixBuiltinGPUInstruction.java | 2 + .../runtime/matrix/data/LibMatrixCuDNN.java | 45 ++++++++++++++++++++ 10 files changed, 167 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/scripts/nn/test/compare_backends/gen_softmax.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/test/compare_backends/gen_softmax.dml b/scripts/nn/test/compare_backends/gen_softmax.dml new file mode 100644 index 0000000..3b31e02 --- /dev/null +++ b/scripts/nn/test/compare_backends/gen_softmax.dml @@ -0,0 +1,23 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +X = rand(rows=$rows, cols=$cols, sparsity=$sp, min=-0.5, max=1) +write(X, "input.mtx", format="binary") \ No newline at end of file http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/scripts/nn/test/compare_backends/run_tests.sh ---------------------------------------------------------------------- diff --git a/scripts/nn/test/compare_backends/run_tests.sh b/scripts/nn/test/compare_backends/run_tests.sh index 13f57e8..8c15399 100644 --- a/scripts/nn/test/compare_backends/run_tests.sh +++ b/scripts/nn/test/compare_backends/run_tests.sh @@ -26,3 +26,4 @@ ./test_conv2d.sh ./test_maxpool.sh ./test_maxpool_bwd.sh +./test_softmax.sh \ No newline at end of file http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/scripts/nn/test/compare_backends/test_softmax.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/test/compare_backends/test_softmax.dml b/scripts/nn/test/compare_backends/test_softmax.dml new file mode 100644 index 0000000..da08e70 --- /dev/null +++ b/scripts/nn/test/compare_backends/test_softmax.dml @@ -0,0 +1,25 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- +source("nn/layers/softmax.dml") as softmax +fmt = ifdef($fmt, 'csv') +X = read("input.mtx") +out = softmax::forward(X) +write(out, $out, format=fmt) http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/scripts/nn/test/compare_backends/test_softmax.sh ---------------------------------------------------------------------- diff --git a/scripts/nn/test/compare_backends/test_softmax.sh b/scripts/nn/test/compare_backends/test_softmax.sh new file mode 100644 index 0000000..203c605 --- /dev/null +++ b/scripts/nn/test/compare_backends/test_softmax.sh @@ -0,0 +1,43 @@ +#!/usr/bin/bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +jars='systemml-*-extra.jar' + +for rows in 1 300 +do + for cols in 1 300 + do + for sparsity in 0.1 0.2 0.6 0.9 + do + # Generating the data + $SPARK_HOME/bin/spark-submit SystemML.jar -f gen_softmax.dml -nvargs sp=$sparsity rows=$rows cols=$cols + # Running a test in CPU mode + $SPARK_HOME/bin/spark-submit SystemML.jar -f test_softmax.dml -nvargs out=out_cp.csv + # Running a test in GPU mode + $SPARK_HOME/bin/spark-submit --jars $jars SystemML.jar -f test_softmax.dml -stats -gpu force -nvargs out=out_gpu.csv + # Comparing the CPU vs GPU results to make sure they are the same + $SPARK_HOME/bin/spark-submit SystemML.jar -f compare.dml -args out_cp.csv out_gpu.csv "softmax:rows="$rows",cols="$cols",sparsity="$sparsity + rm -rf out_cp.csv out_gpu.csv out_cp.csv.mtd out_gpu.csv.mtd + rm -rf input.mtx input.mtx.mtd + done + done +done \ No newline at end of file http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/hops/BinaryOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java b/src/main/java/org/apache/sysml/hops/BinaryOp.java index 76c1a64..d207cba 100644 --- a/src/main/java/org/apache/sysml/hops/BinaryOp.java +++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java @@ -638,7 +638,19 @@ public class BinaryOp extends Hop { // Both operands are Matrixes ExecType et = optFindExecType(); - if ( et == ExecType.CP || et == ExecType.GPU ) + boolean isGPUSoftmax = et == ExecType.GPU && op == Hop.OpOp2.DIV && + getInput().get(0) instanceof UnaryOp && getInput().get(1) instanceof AggUnaryOp && + ((UnaryOp)getInput().get(0)).getOp() == OpOp1.EXP && ((AggUnaryOp)getInput().get(1)).getOp() == AggOp.SUM && + ((AggUnaryOp)getInput().get(1)).getDirection() == Direction.Row && + getInput().get(0) == getInput().get(1).getInput().get(0); + if(isGPUSoftmax) { + UnaryCP softmax = new UnaryCP(getInput().get(0).getInput().get(0).constructLops(), UnaryCP.OperationTypes.SOFTMAX, + getDataType(), getValueType(), et); + setOutputDimensions(softmax); + setLineNumbers(softmax); + setLops(softmax); + } + else if ( et == ExecType.CP || et == ExecType.GPU ) { Lop binary = null; http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/lops/UnaryCP.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/lops/UnaryCP.java b/src/main/java/org/apache/sysml/lops/UnaryCP.java index 1ccafe7..1af06e2 100644 --- a/src/main/java/org/apache/sysml/lops/UnaryCP.java +++ b/src/main/java/org/apache/sysml/lops/UnaryCP.java @@ -36,7 +36,7 @@ public class UnaryCP extends Lop public enum OperationTypes { NOT, ABS, SIN, COS, TAN, ASIN, ACOS, ATAN, SQRT, LOG, EXP, SINH, COSH, TANH, CAST_AS_SCALAR, CAST_AS_MATRIX, CAST_AS_FRAME, CAST_AS_DOUBLE, CAST_AS_INT, CAST_AS_BOOLEAN, - PRINT, NROW, NCOL, LENGTH, ROUND, STOP, CEIL, FLOOR, CUMSUM + PRINT, NROW, NCOL, LENGTH, ROUND, STOP, CEIL, FLOOR, CUMSUM, SOFTMAX } public static final String CAST_AS_SCALAR_OPCODE = "castdts"; @@ -57,8 +57,9 @@ public class UnaryCP extends Lop * @param op operation type * @param dt data type * @param vt value type + * @param et exec type */ - public UnaryCP(Lop input, OperationTypes op, DataType dt, ValueType vt) { + public UnaryCP(Lop input, OperationTypes op, DataType dt, ValueType vt, ExecType et) { super(Lop.Type.UnaryCP, dt, vt); operation = op; this.addInput(input); @@ -70,7 +71,11 @@ public class UnaryCP extends Lop boolean aligner = false; boolean definesMRJob = false; lps.addCompatibility(JobType.INVALID); - this.lps.setProperties(inputs, ExecType.CP, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob); + this.lps.setProperties(inputs, et, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob); + } + + public UnaryCP(Lop input, OperationTypes op, DataType dt, ValueType vt) { + this(input, op, dt, vt, ExecType.CP); } @Override @@ -171,6 +176,9 @@ public class UnaryCP extends Lop case LENGTH: return "length"; + case SOFTMAX: + return "softmax"; + default: throw new LopsException(this.printErrorLocation() + "Unknown operation: " + operation); } http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java index c07f2b8..e234d52 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java @@ -100,6 +100,7 @@ public class GPUInstructionParser extends InstructionParser String2GPUInstructionType.put( "atan", GPUINSTRUCTION_TYPE.BuiltinUnary); String2GPUInstructionType.put( "sign", GPUINSTRUCTION_TYPE.BuiltinUnary); String2GPUInstructionType.put( "sigmoid", GPUINSTRUCTION_TYPE.BuiltinUnary); + String2GPUInstructionType.put( "softmax", GPUINSTRUCTION_TYPE.BuiltinUnary); // Binary Builtin functions String2GPUInstructionType.put( "solve", GPUINSTRUCTION_TYPE.BuiltinBinary); http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java index b67d9fa..cd78164 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java @@ -78,13 +78,14 @@ public abstract class BuiltinUnaryGPUInstruction extends GPUInstruction { opcode = parts[0]; in.split(parts[1]); out.split(parts[2]); - func = Builtin.getBuiltinFnObject(opcode); + // func = Builtin.getBuiltinFnObject(opcode); + // new UnaryOperator(func) if(in.getDataType() == DataType.SCALAR) throw new DMLRuntimeException("The instruction is not supported on GPU:" + str); // return new ScalarBuiltinCPInstruction(new SimpleOperator(func), in, out, opcode, str); else if(in.getDataType() == DataType.MATRIX) - return new MatrixBuiltinGPUInstruction(new UnaryOperator(func), in, out, opcode, str); + return new MatrixBuiltinGPUInstruction(null, in, out, opcode, str); } return null; http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java index 27eeefe..04760ee 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java @@ -82,6 +82,8 @@ public class MatrixBuiltinGPUInstruction extends BuiltinUnaryGPUInstruction { LibMatrixCUDA.sign(ec, ec.getGPUContext(0), getExtendedOpcode(), mat, _output.getName()); break; case "sigmoid": LibMatrixCUDA.sigmoid(ec, ec.getGPUContext(0), getExtendedOpcode(), mat, _output.getName()); break; + case "softmax": + LibMatrixCuDNN.softmax(ec, ec.getGPUContext(0), getExtendedOpcode(), mat, _output.getName()); break; default: throw new DMLRuntimeException("Unsupported GPU operator:" + opcode); } http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java index 4e23953..c88cfd2 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java @@ -35,6 +35,7 @@ import static jcuda.jcudnn.cudnnTensorFormat.CUDNN_TENSOR_NCHW; import static jcuda.runtime.JCuda.cudaMemset; import jcuda.CudaException; import jcuda.Pointer; +import jcuda.jcudnn.JCudnn; import jcuda.jcudnn.cudnnActivationDescriptor; import jcuda.jcudnn.cudnnConvolutionFwdPreference; import jcuda.jcudnn.cudnnHandle; @@ -54,6 +55,9 @@ import org.apache.sysml.runtime.instructions.gpu.context.GPUContext; import org.apache.sysml.utils.GPUStatistics; import org.apache.sysml.utils.Statistics; +import static jcuda.jcudnn.cudnnSoftmaxAlgorithm.CUDNN_SOFTMAX_ACCURATE; +import static jcuda.jcudnn.cudnnSoftmaxMode.CUDNN_SOFTMAX_MODE_CHANNEL; + /** * This class contains method that invoke CuDNN operations. */ @@ -164,6 +168,47 @@ public class LibMatrixCuDNN extends LibMatrixCUDA { throwCuDNNDimensionError(N, CHW, K, CRS, N, KPQ); } } + + /** + * Performs an "softmax" operation on a matrix on the GPU + * @param ec execution context + * @param gCtx a valid {@link GPUContext} + * @param instName the invoking instruction's name for record {@link Statistics}. + * @param in1 input matrix + * @param outputName output matrix name + * @throws DMLRuntimeException if DMLRuntimeException occurs + */ + public static void softmax(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in1, String outputName) throws DMLRuntimeException { + if(LOG.isTraceEnabled()) { + LOG.trace("GPU : softmax" + ", GPUContext=" + gCtx); + } + cudnnTensorDescriptor tensorDesc = allocateTensorDescriptor(toInt(in1.getNumRows()), toInt(in1.getNumColumns()), 1, 1); + Pointer srcPointer = getDensePointerForCuDNN(gCtx, in1, instName); + MatrixObject out = ec.getMatrixObject(outputName); + ec.allocateGPUMatrixObject(outputName, in1.getNumRows(), in1.getNumColumns()); + out.getGPUObject(gCtx).allocateAndFillDense(0); + Pointer dstPointer = getDensePointerForCuDNN(gCtx, out, instName); + JCudnn.cudnnSoftmaxForward(gCtx.getCudnnHandle(), CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, + one(), tensorDesc, srcPointer, + zero(), tensorDesc, dstPointer); + cudnnDestroyTensorDescriptor(tensorDesc); + } + + /** + * Convenience method to get tensor descriptor + * @param N number of images + * @param C number of channels + * @param H height + * @param W width + * @return cudnn tensor descriptor + * @throws DMLRuntimeException if the input descriptor and matrix dimensions don't match + */ + private static cudnnTensorDescriptor allocateTensorDescriptor(int N, int C, int H, int W) throws DMLRuntimeException { + cudnnTensorDescriptor tensorDescriptor = new cudnnTensorDescriptor(); + cudnnCreateTensorDescriptor(tensorDescriptor); + cudnnSetTensor4dDescriptor(tensorDescriptor, CUDNN_TENSOR_NCHW, LibMatrixCUDA.CUDNN_DATA_TYPE, N, C, H, W); + return tensorDescriptor; + } /**
