Repository: systemml
Updated Branches:
  refs/heads/master a99cf844a -> f63b8c6fa


[SYSTEMML-540] Support softmax function on GPU via CuDNN

- This API only supports dense softmax function using CuDNN's
  cudnnSoftmaxForward kernel.

Closes #703.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/f63b8c6f
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/f63b8c6f
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/f63b8c6f

Branch: refs/heads/master
Commit: f63b8c6faf074dae2a33be7898858a78174bf76b
Parents: a99cf84
Author: Niketan Pansare <[email protected]>
Authored: Tue Dec 5 11:11:35 2017 -0800
Committer: Niketan Pansare <[email protected]>
Committed: Tue Dec 5 11:11:35 2017 -0800

----------------------------------------------------------------------
 .../nn/test/compare_backends/gen_softmax.dml    | 23 ++++++++++
 scripts/nn/test/compare_backends/run_tests.sh   |  1 +
 .../nn/test/compare_backends/test_softmax.dml   | 25 +++++++++++
 .../nn/test/compare_backends/test_softmax.sh    | 43 +++++++++++++++++++
 .../java/org/apache/sysml/hops/BinaryOp.java    | 14 +++++-
 .../java/org/apache/sysml/lops/UnaryCP.java     | 14 ++++--
 .../instructions/GPUInstructionParser.java      |  1 +
 .../gpu/BuiltinUnaryGPUInstruction.java         |  5 ++-
 .../gpu/MatrixBuiltinGPUInstruction.java        |  2 +
 .../runtime/matrix/data/LibMatrixCuDNN.java     | 45 ++++++++++++++++++++
 10 files changed, 167 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/scripts/nn/test/compare_backends/gen_softmax.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/compare_backends/gen_softmax.dml 
b/scripts/nn/test/compare_backends/gen_softmax.dml
new file mode 100644
index 0000000..3b31e02
--- /dev/null
+++ b/scripts/nn/test/compare_backends/gen_softmax.dml
@@ -0,0 +1,23 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = rand(rows=$rows, cols=$cols, sparsity=$sp, min=-0.5, max=1)
+write(X, "input.mtx", format="binary")
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/scripts/nn/test/compare_backends/run_tests.sh
----------------------------------------------------------------------
diff --git a/scripts/nn/test/compare_backends/run_tests.sh 
b/scripts/nn/test/compare_backends/run_tests.sh
index 13f57e8..8c15399 100644
--- a/scripts/nn/test/compare_backends/run_tests.sh
+++ b/scripts/nn/test/compare_backends/run_tests.sh
@@ -26,3 +26,4 @@
 ./test_conv2d.sh
 ./test_maxpool.sh
 ./test_maxpool_bwd.sh
+./test_softmax.sh
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/scripts/nn/test/compare_backends/test_softmax.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/compare_backends/test_softmax.dml 
b/scripts/nn/test/compare_backends/test_softmax.dml
new file mode 100644
index 0000000..da08e70
--- /dev/null
+++ b/scripts/nn/test/compare_backends/test_softmax.dml
@@ -0,0 +1,25 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+source("nn/layers/softmax.dml") as softmax
+fmt = ifdef($fmt, 'csv')
+X = read("input.mtx")
+out = softmax::forward(X)
+write(out, $out, format=fmt)

http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/scripts/nn/test/compare_backends/test_softmax.sh
----------------------------------------------------------------------
diff --git a/scripts/nn/test/compare_backends/test_softmax.sh 
b/scripts/nn/test/compare_backends/test_softmax.sh
new file mode 100644
index 0000000..203c605
--- /dev/null
+++ b/scripts/nn/test/compare_backends/test_softmax.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+jars='systemml-*-extra.jar'
+
+for rows in 1 300
+do
+       for cols in 1 300
+       do
+               for sparsity in 0.1 0.2 0.6 0.9
+               do
+                       # Generating the data
+                       $SPARK_HOME/bin/spark-submit SystemML.jar -f 
gen_softmax.dml -nvargs sp=$sparsity rows=$rows cols=$cols
+                       # Running a test in CPU mode
+                       $SPARK_HOME/bin/spark-submit SystemML.jar -f 
test_softmax.dml -nvargs out=out_cp.csv
+                       # Running a test in GPU mode
+                       $SPARK_HOME/bin/spark-submit --jars $jars SystemML.jar 
-f test_softmax.dml -stats -gpu force -nvargs out=out_gpu.csv
+                       # Comparing the CPU vs GPU results to make sure they 
are the same
+                       $SPARK_HOME/bin/spark-submit SystemML.jar -f 
compare.dml -args out_cp.csv out_gpu.csv 
"softmax:rows="$rows",cols="$cols",sparsity="$sparsity
+                       rm -rf out_cp.csv out_gpu.csv out_cp.csv.mtd 
out_gpu.csv.mtd
+                       rm -rf input.mtx input.mtx.mtd
+               done
+       done
+done
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/hops/BinaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java 
b/src/main/java/org/apache/sysml/hops/BinaryOp.java
index 76c1a64..d207cba 100644
--- a/src/main/java/org/apache/sysml/hops/BinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java
@@ -638,7 +638,19 @@ public class BinaryOp extends Hop
                {
                        // Both operands are Matrixes
                        ExecType et = optFindExecType();
-                       if ( et == ExecType.CP || et == ExecType.GPU ) 
+                       boolean isGPUSoftmax = et == ExecType.GPU && op == 
Hop.OpOp2.DIV && 
+                                       getInput().get(0) instanceof UnaryOp && 
getInput().get(1) instanceof AggUnaryOp && 
+                                       ((UnaryOp)getInput().get(0)).getOp() == 
OpOp1.EXP && ((AggUnaryOp)getInput().get(1)).getOp() == AggOp.SUM &&
+                                       
((AggUnaryOp)getInput().get(1)).getDirection() == Direction.Row &&
+                                       getInput().get(0) == 
getInput().get(1).getInput().get(0);
+                       if(isGPUSoftmax) {
+                               UnaryCP softmax = new 
UnaryCP(getInput().get(0).getInput().get(0).constructLops(), 
UnaryCP.OperationTypes.SOFTMAX, 
+                                               getDataType(), getValueType(), 
et);
+                               setOutputDimensions(softmax);
+                               setLineNumbers(softmax);
+                               setLops(softmax);
+                       }
+                       else if ( et == ExecType.CP || et == ExecType.GPU ) 
                        {
                                Lop binary = null;
                                

http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/lops/UnaryCP.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/UnaryCP.java 
b/src/main/java/org/apache/sysml/lops/UnaryCP.java
index 1ccafe7..1af06e2 100644
--- a/src/main/java/org/apache/sysml/lops/UnaryCP.java
+++ b/src/main/java/org/apache/sysml/lops/UnaryCP.java
@@ -36,7 +36,7 @@ public class UnaryCP extends Lop
        public enum OperationTypes {
                NOT, ABS, SIN, COS, TAN, ASIN, ACOS, ATAN, SQRT, LOG, EXP, 
SINH, COSH, TANH,
                CAST_AS_SCALAR, CAST_AS_MATRIX, CAST_AS_FRAME, CAST_AS_DOUBLE, 
CAST_AS_INT, CAST_AS_BOOLEAN, 
-               PRINT, NROW, NCOL, LENGTH, ROUND, STOP, CEIL, FLOOR, CUMSUM
+               PRINT, NROW, NCOL, LENGTH, ROUND, STOP, CEIL, FLOOR, CUMSUM, 
SOFTMAX
        }
        
        public static final String CAST_AS_SCALAR_OPCODE = "castdts";
@@ -57,8 +57,9 @@ public class UnaryCP extends Lop
         * @param op operation type
         * @param dt data type
         * @param vt value type
+        * @param et exec type
         */
-       public UnaryCP(Lop input, OperationTypes op, DataType dt, ValueType vt) 
{
+       public UnaryCP(Lop input, OperationTypes op, DataType dt, ValueType vt, 
ExecType et) {
                super(Lop.Type.UnaryCP, dt, vt);
                operation = op;
                this.addInput(input);
@@ -70,7 +71,11 @@ public class UnaryCP extends Lop
                boolean aligner = false;
                boolean definesMRJob = false;
                lps.addCompatibility(JobType.INVALID);
-               this.lps.setProperties(inputs, ExecType.CP, 
ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob);
+               this.lps.setProperties(inputs, et, ExecLocation.ControlProgram, 
breaksAlignment, aligner, definesMRJob);
+       }
+       
+       public UnaryCP(Lop input, OperationTypes op, DataType dt, ValueType vt) 
{
+               this(input, op, dt, vt, ExecType.CP);
        }
 
        @Override
@@ -171,6 +176,9 @@ public class UnaryCP extends Lop
                case LENGTH:
                        return "length";
 
+               case SOFTMAX:
+                       return "softmax";
+                       
                default:
                        throw new LopsException(this.printErrorLocation() + 
"Unknown operation: " + operation);
                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java 
b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
index c07f2b8..e234d52 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
@@ -100,6 +100,7 @@ public class GPUInstructionParser  extends InstructionParser
                String2GPUInstructionType.put( "atan",  
GPUINSTRUCTION_TYPE.BuiltinUnary);
                String2GPUInstructionType.put( "sign",  
GPUINSTRUCTION_TYPE.BuiltinUnary);
                String2GPUInstructionType.put( "sigmoid", 
GPUINSTRUCTION_TYPE.BuiltinUnary);
+               String2GPUInstructionType.put( "softmax", 
GPUINSTRUCTION_TYPE.BuiltinUnary);
 
                // Binary Builtin functions
                String2GPUInstructionType.put( "solve", 
GPUINSTRUCTION_TYPE.BuiltinBinary);

http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java
index b67d9fa..cd78164 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java
@@ -78,13 +78,14 @@ public abstract class BuiltinUnaryGPUInstruction extends 
GPUInstruction {
                        opcode = parts[0];
                        in.split(parts[1]);
                        out.split(parts[2]);
-                       func = Builtin.getBuiltinFnObject(opcode);
+                       // func = Builtin.getBuiltinFnObject(opcode);
+                       // new UnaryOperator(func)
                        
                        if(in.getDataType() == DataType.SCALAR)
                                throw new DMLRuntimeException("The instruction 
is not supported on GPU:" + str);
 //                             return new ScalarBuiltinCPInstruction(new 
SimpleOperator(func), in, out, opcode, str);
                        else if(in.getDataType() == DataType.MATRIX)
-                               return new MatrixBuiltinGPUInstruction(new 
UnaryOperator(func), in, out, opcode, str);
+                               return new MatrixBuiltinGPUInstruction(null, 
in, out, opcode, str);
                }
                
                return null;

http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
index 27eeefe..04760ee 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
@@ -82,6 +82,8 @@ public class MatrixBuiltinGPUInstruction extends 
BuiltinUnaryGPUInstruction {
                                LibMatrixCUDA.sign(ec, ec.getGPUContext(0), 
getExtendedOpcode(), mat, _output.getName()); break;
                        case "sigmoid":
                                LibMatrixCUDA.sigmoid(ec, ec.getGPUContext(0), 
getExtendedOpcode(), mat, _output.getName()); break;
+                       case "softmax":
+                               LibMatrixCuDNN.softmax(ec, ec.getGPUContext(0), 
getExtendedOpcode(), mat, _output.getName()); break;
                        default:
                                throw new DMLRuntimeException("Unsupported GPU 
operator:" + opcode);
                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/f63b8c6f/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
index 4e23953..c88cfd2 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
@@ -35,6 +35,7 @@ import static 
jcuda.jcudnn.cudnnTensorFormat.CUDNN_TENSOR_NCHW;
 import static jcuda.runtime.JCuda.cudaMemset;
 import jcuda.CudaException;
 import jcuda.Pointer;
+import jcuda.jcudnn.JCudnn;
 import jcuda.jcudnn.cudnnActivationDescriptor;
 import jcuda.jcudnn.cudnnConvolutionFwdPreference;
 import jcuda.jcudnn.cudnnHandle;
@@ -54,6 +55,9 @@ import 
org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
 import org.apache.sysml.utils.GPUStatistics;
 import org.apache.sysml.utils.Statistics;
 
+import static jcuda.jcudnn.cudnnSoftmaxAlgorithm.CUDNN_SOFTMAX_ACCURATE;
+import static jcuda.jcudnn.cudnnSoftmaxMode.CUDNN_SOFTMAX_MODE_CHANNEL;
+
 /**
  * This class contains method that invoke CuDNN operations.
  */
@@ -164,6 +168,47 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                        throwCuDNNDimensionError(N, CHW, K, CRS, N, KPQ);
                }
        }
+       
+       /**
+        * Performs an "softmax" operation on a matrix on the GPU
+        * @param ec    execution context
+        * @param gCtx a valid {@link GPUContext}
+        * @param instName the invoking instruction's name for record {@link 
Statistics}.
+        * @param in1   input matrix
+        * @param outputName    output matrix name
+        * @throws DMLRuntimeException  if DMLRuntimeException occurs
+        */
+       public static void softmax(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+               if(LOG.isTraceEnabled()) {
+                       LOG.trace("GPU : softmax" + ", GPUContext=" + gCtx);
+               }
+               cudnnTensorDescriptor tensorDesc = 
allocateTensorDescriptor(toInt(in1.getNumRows()), toInt(in1.getNumColumns()), 
1, 1);
+               Pointer srcPointer = getDensePointerForCuDNN(gCtx, in1, 
instName);
+               MatrixObject out = ec.getMatrixObject(outputName);
+               ec.allocateGPUMatrixObject(outputName, in1.getNumRows(), 
in1.getNumColumns());
+               out.getGPUObject(gCtx).allocateAndFillDense(0);
+               Pointer dstPointer = getDensePointerForCuDNN(gCtx, out, 
instName);
+               JCudnn.cudnnSoftmaxForward(gCtx.getCudnnHandle(), 
CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, 
+                one(), tensorDesc, srcPointer,
+                zero(), tensorDesc, dstPointer);
+               cudnnDestroyTensorDescriptor(tensorDesc);
+       }
+       
+       /**
+        * Convenience method to get tensor descriptor
+        * @param N number of images
+        * @param C number of channels
+        * @param H height
+        * @param W width
+        * @return cudnn tensor descriptor
+        * @throws DMLRuntimeException if the input descriptor and matrix 
dimensions don't match
+        */
+       private static cudnnTensorDescriptor allocateTensorDescriptor(int N, 
int C, int H, int W) throws DMLRuntimeException {
+               cudnnTensorDescriptor tensorDescriptor = new 
cudnnTensorDescriptor();
+               cudnnCreateTensorDescriptor(tensorDescriptor);
+               cudnnSetTensor4dDescriptor(tensorDescriptor, CUDNN_TENSOR_NCHW, 
LibMatrixCUDA.CUDNN_DATA_TYPE, N, C, H, W);
+               return tensorDescriptor;
+       }
 
 
        /**

Reply via email to