Repository: incubator-systemml Updated Branches: refs/heads/master b0fb707d2 -> afe61b5a2
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java index 67efa8b..f25f3a1 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java @@ -43,8 +43,8 @@ public class ConvolutionGPUInstruction extends GPUInstruction public ConvolutionGPUInstruction(CPOperand in1, CPOperand in2, CPOperand out, String opcode, String istr) throws DMLRuntimeException { super(new ReorgOperator(SwapIndex.getSwapIndexFnObject()), opcode, istr); - if(!opcode.equals("bias_add")) { - throw new DMLRuntimeException("Incorrect usage. Expected the opcode to be bias_add, but found " + opcode); + if(!(opcode.equals("bias_add") || opcode.equals("relu_backward"))) { + throw new DMLRuntimeException("Incorrect usage. Expected the opcode to be bias_add or relu_backward, but found " + opcode); } _input1 = in1; _input2 = in2; @@ -129,7 +129,7 @@ public class ConvolutionGPUInstruction extends GPUInstruction return new ConvolutionGPUInstruction(in1, null, out, opcode, str, stride, padding, input_shape, filter_shape); } - else if( opcode.equalsIgnoreCase("bias_add") ) { + else if( opcode.equalsIgnoreCase("bias_add") || opcode.equalsIgnoreCase("relu_backward") ) { InstructionUtils.checkNumFields(parts, 3); CPOperand in1 = new CPOperand(parts[1]); CPOperand in2 = new CPOperand(parts[2]); @@ -155,6 +155,20 @@ public class ConvolutionGPUInstruction extends GPUInstruction ec.releaseMatrixOutputForGPUInstruction(_output.getName()); } + public void processReLUBackwardInstruction(ExecutionContext ec) throws DMLRuntimeException { + Statistics.incrementNoOfExecutedGPUInst(); + MatrixObject input = ec.getMatrixInputForGPUInstruction(_input1.getName()); + MatrixObject dout = ec.getMatrixInputForGPUInstruction(_input2.getName()); + + MatrixObject out = ec.getDenseMatrixOutputForGPUInstruction(_output.getName()); + ec.setMetaData(_output.getName(), input.getNumRows(), input.getNumColumns()); + LibMatrixCUDA.relu_backward(input, dout, out); + // release inputs/outputs + ec.releaseMatrixInputForGPUInstruction(_input1.getName()); + ec.releaseMatrixInputForGPUInstruction(_input2.getName()); + ec.releaseMatrixOutputForGPUInstruction(_output.getName()); + } + @Override public void processInstruction(ExecutionContext ec) throws DMLRuntimeException @@ -163,6 +177,10 @@ public class ConvolutionGPUInstruction extends GPUInstruction processBiasInstruction(ec); return; } + else if (instOpcode.equalsIgnoreCase("relu_backward")) { + processReLUBackwardInstruction(ec); + return; + } Statistics.incrementNoOfExecutedGPUInst(); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java index 0cfffdb..4cfe79f 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java @@ -42,6 +42,7 @@ import static jcuda.jcudnn.JCudnn.cudnnSetConvolution2dDescriptor; import static jcuda.jcudnn.JCudnn.cudnnSetFilter4dDescriptor; import static jcuda.jcudnn.JCudnn.cudnnSetPooling2dDescriptor; import static jcuda.jcudnn.JCudnn.cudnnSetTensor4dDescriptor; +import static jcuda.jcudnn.JCudnn.cudnnActivationBackward; import static jcuda.jcudnn.cudnnConvolutionMode.CUDNN_CROSS_CORRELATION; import static jcuda.jcudnn.cudnnDataType.CUDNN_DATA_DOUBLE; import static jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_MAX; @@ -244,6 +245,23 @@ public class LibMatrixCUDA { return poolingDesc; } + public static void relu_backward(MatrixObject input, MatrixObject dout, MatrixObject outputBlock) throws DMLRuntimeException { + if(isInSparseFormat(input)) { + ((JCudaObject)input.getGPUObject()).sparseToDense(); + } + if(isInSparseFormat(dout)) { + ((JCudaObject)dout.getGPUObject()).sparseToDense(); + } + long rows = input.getNumRows(); + long cols = input.getNumColumns(); + Pointer imagePointer = ((JCudaObject)input.getGPUObject()).jcudaDenseMatrixPtr; + Pointer doutPointer = ((JCudaObject)dout.getGPUObject()).jcudaDenseMatrixPtr; + Pointer outputPointer = ((JCudaObject)outputBlock.getGPUObject()).jcudaDenseMatrixPtr; + kernels.launchKernel("relu_backward", + ExecutionConfig.getConfigForSimpleMatrixOperations((int)rows, (int)cols), + imagePointer, doutPointer, outputPointer, (int)rows, (int)cols); + } + public static void bias_add(MatrixObject input, MatrixObject bias, MatrixObject outputBlock) throws DMLRuntimeException { if(isInSparseFormat(input)) { ((JCudaObject)input.getGPUObject()).sparseToDense(); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java index 1400b31..89cdff8 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java @@ -57,7 +57,8 @@ public class LibMatrixDNN { enum TaskType { MaxPooling_Forward, MaxPooling_Backward, // Alternate approaches that we tried but the performance was unsatisfactory be included: direct, non-looped im2col - LoopedIm2ColConv2d, LoopedIm2ColConv2dBwdFilter, LoopedIm2ColConv2dBwdData + LoopedIm2ColConv2d, LoopedIm2ColConv2dBwdFilter, LoopedIm2ColConv2dBwdData, + BiasAdd, ReluBackward } // ------------------------------------------------------------------------------------------------ @@ -564,21 +565,108 @@ public class LibMatrixDNN { return maxIndex; } - public static void bias_add(MatrixBlock input, MatrixBlock bias, MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException { - // Keeping it single-threaded as memory-bound operation. TODO: explore optimization potential for multithreaded implementation + public static void relu_backward(MatrixBlock input, MatrixBlock dout, MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException { + int N = input.getNumRows(); + ConvolutionParameters params = new ConvolutionParameters(N, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, numThreads); + params.input1 = input; + params.input2 = dout; + params.output = outputBlock; + if(input.getNumRows() != dout.getNumRows() || input.getNumColumns() != dout.getNumColumns()) { + throw new DMLRuntimeException("Incorrect dimensions for relu_backward:" + + input.getNumRows() + " != " + dout.getNumRows() + " || " + input.getNumColumns() + " != " + dout.getNumColumns()); + } + runConvTask(TaskType.ReluBackward, params); + } + + private static void doReluBackward(int n, ConvolutionParameters params) throws DMLRuntimeException { + // (X > 0) * dout + double [] outputArray = params.output.getDenseBlock(); + int numOutCols = params.input1.getNumColumns(); + if(!params.input1.isInSparseFormat() && !params.input2.isInSparseFormat()) { + double [] inputArr = params.input1.getDenseBlock(); + double [] doutArr = params.input2.getDenseBlock(); + for(int i = n*numOutCols; i < (n+1)*numOutCols; i++) { + outputArray[i] = inputArr[i] > 0 ? doutArr[i] : 0; + } + } + else { + // Perform (X > 0) + if(params.input1.isInSparseFormat()) { + Iterator<IJV> iter = params.input1.sparseBlock.getIterator(n, n+1); + while(iter.hasNext()) { + IJV ijv = iter.next(); + int i = ijv.getI(); + int j = ijv.getJ(); + outputArray[i*numOutCols + j] = ijv.getV() > 0 ? 1 : 0; + } + } + else { + double [] inputArr = params.input1.getDenseBlock(); + for(int i = n*numOutCols; i < (n+1)*numOutCols; i++) { + outputArray[i] = inputArr[i] > 0 ? 1 : 0; + } + } + // Then perform (X > 0) * dout + if(params.input2.isInSparseFormat()) { + Iterator<IJV> iter = params.input2.sparseBlock.getIterator(n, n+1); + while(iter.hasNext()) { + IJV ijv = iter.next(); + int i = ijv.getI(); + int j = ijv.getJ(); + outputArray[i*numOutCols + j] *= ijv.getV(); + } + } + else { + double [] doutArr = params.input2.getDenseBlock(); + for(int i = n*numOutCols; i < (n+1)*numOutCols; i++) { + outputArray[i] *= doutArr[i]; + } + } + } + } + + public static void bias_add(MatrixBlock input, MatrixBlock bias, MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException { int N = input.getNumRows(); int K = bias.getNumRows(); int PQ = input.getNumColumns() / K; - double [] outputArray = outputBlock.getDenseBlock(); + + ConvolutionParameters params = new ConvolutionParameters(N, PQ, -1, -1, K, -1, -1, -1, -1, -1, -1, numThreads); + params.input1 = input; + params.input2 = bias; + params.output = outputBlock; + if(input.isEmptyBlock()) { - fillBias(bias, outputArray, N, K, PQ); + double [] outputArray = outputBlock.getDenseBlock(); + for(int n = 0; n < N; n++) + fillBias(bias, outputArray, n, N, K, PQ); + } + else { + runConvTask(TaskType.BiasAdd, params); + } + } + + private static void doBiasAdd(int n, ConvolutionParameters params) throws DMLRuntimeException { + double [] outputArray = params.output.getDenseBlock(); + int PQ = params.C; + int numOutCols = params.input1.getNumColumns(); + + if(!params.input1.isInSparseFormat() && !params.input2.isInSparseFormat()) { + double [] inputArr = params.input1.getDenseBlock(); + double [] biasArr = params.input2.getDenseBlock(); + int K = params.K; + final int inputOffset = n*K*PQ; + for(int k = 0; k < K; k++) { + int offset = inputOffset + k*PQ; + for(int pq = 0; pq < PQ; pq++) { + outputArray[offset + pq] = inputArr[offset + pq] + biasArr[k]; + } + } } else { - fillBias(bias, outputArray, N, K, PQ); - int numOutCols = input.getNumColumns(); - if(input.isInSparseFormat()) { - Iterator<IJV> iter = input.sparseBlock.getIterator(); + fillBias(params.input2, outputArray, n, params.N, params.K, PQ); + if(params.input1.isInSparseFormat()) { + Iterator<IJV> iter = params.input1.sparseBlock.getIterator(n, n+1); while(iter.hasNext()) { IJV ijv = iter.next(); int i = ijv.getI(); @@ -587,35 +675,32 @@ public class LibMatrixDNN { } } else { - double [] inputArr = input.getDenseBlock(); - for(int i = 0; i < inputArr.length; i++) { + double [] inputArr = params.input1.getDenseBlock(); + for(int i = n*numOutCols; i < (n+1)*numOutCols; i++) { outputArray[i] += inputArr[i]; } } } + } - private static void fillBias(MatrixBlock bias, double [] outputArray, int N, int K, int PQ) { + private static void fillBias(MatrixBlock bias, double [] outputArray, int n, int N, int K, int PQ) { if(bias.isInSparseFormat()) { Iterator<IJV> iter = bias.sparseBlock.getIterator(); while(iter.hasNext()) { IJV ijv = iter.next(); int k = ijv.getI(); double val = ijv.getV(); - for(int n = 0; n < N; n++) { - int fromIndex = n*K*PQ + k*PQ; - Arrays.fill(outputArray, fromIndex, fromIndex + PQ, val); - } + int fromIndex = n*K*PQ + k*PQ; + Arrays.fill(outputArray, fromIndex, fromIndex + PQ, val); } } else { - double [] biasArr = bias.getDenseBlock(); - for(int n = 0; n < N; n++) { - for(int k = 0; k < K; k++) { - int fromIndex = n*K*PQ + k*PQ; - double val = biasArr[k]; - Arrays.fill(outputArray, fromIndex, fromIndex + PQ, val); - } + double [] biasArr = bias.getDenseBlock(); + for(int k = 0; k < K; k++) { + int fromIndex = n*K*PQ + k*PQ; + double val = biasArr[k]; + Arrays.fill(outputArray, fromIndex, fromIndex + PQ, val); } } } @@ -842,6 +927,14 @@ public class LibMatrixDNN { for(int n = n1; n < n2; n++) doPoolingBackward(n, params); break; + case BiasAdd: + for(int n = n1; n < n2; n++) + doBiasAdd(n, params); + break; + case ReluBackward: + for(int n = n1; n < n2; n++) + doReluBackward(n, params); + break; case LoopedIm2ColConv2d: { MatrixBlock im2ColOutBlock = im2ColOutBlocks.remove(); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/test/java/org/apache/sysml/test/integration/functions/tensor/ReluBackwardTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/tensor/ReluBackwardTest.java b/src/test/java/org/apache/sysml/test/integration/functions/tensor/ReluBackwardTest.java new file mode 100644 index 0000000..9dd2338 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/tensor/ReluBackwardTest.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sysml.test.integration.functions.tensor; + +import java.util.HashMap; + +import org.apache.sysml.api.DMLScript; +import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM; +import org.apache.sysml.lops.LopProperties.ExecType; +import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.integration.TestConfiguration; +import org.apache.sysml.test.utils.TestUtils; +import org.junit.Test; + +public class ReluBackwardTest extends AutomatedTestBase +{ + private final static String TEST_NAME = "ReluBackwardTest"; + private final static String TEST_DIR = "functions/tensor/"; + private final static String TEST_CLASS_DIR = TEST_DIR + ReluBackwardTest.class.getSimpleName() + "/"; + private final static double epsilon=0.0000000001; + + @Override + public void setUp() { + addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, + new String[] {"B"})); + } + + @Test + public void testReluBackwardDense1() + { + runReluBackwardTest(ExecType.CP, 10, 100); + } + + @Test + public void testReluBackwardDense2() + { + runReluBackwardTest(ExecType.CP, 100, 10); + } + + @Test + public void testReluBackwardDense3() + { + runReluBackwardTest(ExecType.CP, 100, 100); + } + + /** + * + * @param et + * @param sparse + */ + public void runReluBackwardTest( ExecType et, int M, int N) + { + RUNTIME_PLATFORM oldRTP = rtplatform; + + boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; + + try + { + TestConfiguration config = getTestConfiguration(TEST_NAME); + if(et == ExecType.SPARK) { + rtplatform = RUNTIME_PLATFORM.SPARK; + } + else { + rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; + } + if( rtplatform == RUNTIME_PLATFORM.SPARK ) + DMLScript.USE_LOCAL_SPARK_CONFIG = true; + + loadTestConfiguration(config); + + /* This is for running the junit test the new way, i.e., construct the arguments directly */ + String RI_HOME = SCRIPT_DIR + TEST_DIR; + fullDMLScriptName = RI_HOME + TEST_NAME + ".dml"; + + programArgs = new String[]{"-explain", "-args", "" + M, "" + N, + output("B")}; + + boolean exceptionExpected = false; + int expectedNumberOfJobs = -1; + runTest(true, exceptionExpected, null, expectedNumberOfJobs); + + fullRScriptName = RI_HOME + TEST_NAME + ".R"; + rCmd = "Rscript" + " " + fullRScriptName + " " + M + " " + N + " " + expectedDir(); + + // Run comparison R script + runRScript(true); + HashMap<CellIndex, Double> bHM = readRMatrixFromFS("B"); + + HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B"); + TestUtils.compareMatrices(dmlfile, bHM, epsilon, "B-DML", "NumPy"); + + } + finally + { + rtplatform = oldRTP; + DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; + } + } + +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/test/scripts/functions/tensor/ReluBackwardTest.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/tensor/ReluBackwardTest.R b/src/test/scripts/functions/tensor/ReluBackwardTest.R new file mode 100644 index 0000000..6fec565 --- /dev/null +++ b/src/test/scripts/functions/tensor/ReluBackwardTest.R @@ -0,0 +1,30 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- +args <- commandArgs(TRUE) +library("Matrix") +library("matrixStats") +M=as.integer(args[1]) +N=as.integer(args[2]) + +x=matrix(seq(1 - M, M*N - M), M, N, byrow=TRUE) +dout=matrix(seq(M*N, 1), M, N, byrow=TRUE) +output = (x > 0) * dout +writeMM(as(output,"CsparseMatrix"), paste(args[3], "B", sep="")) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/test/scripts/functions/tensor/ReluBackwardTest.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/tensor/ReluBackwardTest.dml b/src/test/scripts/functions/tensor/ReluBackwardTest.dml new file mode 100644 index 0000000..500ee8c --- /dev/null +++ b/src/test/scripts/functions/tensor/ReluBackwardTest.dml @@ -0,0 +1,27 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- +M = $1 +N = $2 + +x=matrix(seq(1 - M, M*N - M), rows=M, cols=N) +dout=matrix(seq(M*N, 1), rows=M, cols=N) +output = (x > 0) * dout +write(output, $3, format="text") \ No newline at end of file