[1/2] incubator-systemml git commit: [SYSTEMML-540] Improved the performance of bias_add and added relu_backward (CP + GPU)

niketanpansare Tue, 10 Jan 2017 08:42:13 -0800

Repository: incubator-systemml
Updated Branches:
  refs/heads/master b0fb707d2 -> afe61b5a2



http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java
index 67efa8b..f25f3a1 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/ConvolutionGPUInstruction.java
@@ -43,8 +43,8 @@ public class ConvolutionGPUInstruction extends GPUInstruction
        
        public ConvolutionGPUInstruction(CPOperand in1, CPOperand in2, 
CPOperand out, String opcode, String istr) throws DMLRuntimeException {
                super(new ReorgOperator(SwapIndex.getSwapIndexFnObject()), 
opcode, istr);
-               if(!opcode.equals("bias_add")) {
-                       throw new DMLRuntimeException("Incorrect usage. 
Expected the opcode to be bias_add, but found " + opcode);
+               if(!(opcode.equals("bias_add") || 
opcode.equals("relu_backward"))) {
+                       throw new DMLRuntimeException("Incorrect usage. 
Expected the opcode to be bias_add or relu_backward, but found " + opcode);
                }
                _input1 = in1;
                _input2 = in2;
@@ -129,7 +129,7 @@ public class ConvolutionGPUInstruction extends 
GPUInstruction
                        return new ConvolutionGPUInstruction(in1, null, out, 
opcode, str, stride,
                                        padding, input_shape, filter_shape);
                }
-               else if( opcode.equalsIgnoreCase("bias_add") ) {
+               else if( opcode.equalsIgnoreCase("bias_add") || 
opcode.equalsIgnoreCase("relu_backward") ) {
                        InstructionUtils.checkNumFields(parts, 3);
                        CPOperand in1 = new CPOperand(parts[1]);
                        CPOperand in2 = new CPOperand(parts[2]);
@@ -155,6 +155,20 @@ public class ConvolutionGPUInstruction extends 
GPUInstruction
                ec.releaseMatrixOutputForGPUInstruction(_output.getName());
        }
        
+       public void processReLUBackwardInstruction(ExecutionContext ec) throws 
DMLRuntimeException {
+               Statistics.incrementNoOfExecutedGPUInst();
+               MatrixObject input = 
ec.getMatrixInputForGPUInstruction(_input1.getName());
+               MatrixObject dout = 
ec.getMatrixInputForGPUInstruction(_input2.getName());
+               
+               MatrixObject out = 
ec.getDenseMatrixOutputForGPUInstruction(_output.getName());
+               ec.setMetaData(_output.getName(), input.getNumRows(), 
input.getNumColumns());
+               LibMatrixCUDA.relu_backward(input, dout, out);
+               // release inputs/outputs
+               ec.releaseMatrixInputForGPUInstruction(_input1.getName());
+               ec.releaseMatrixInputForGPUInstruction(_input2.getName());
+               ec.releaseMatrixOutputForGPUInstruction(_output.getName());
+       }
+       
        @Override
        public void processInstruction(ExecutionContext ec) 
                        throws DMLRuntimeException 
@@ -163,6 +177,10 @@ public class ConvolutionGPUInstruction extends 
GPUInstruction
                        processBiasInstruction(ec);
                        return;
                }
+               else if (instOpcode.equalsIgnoreCase("relu_backward")) {
+                       processReLUBackwardInstruction(ec);
+                       return;
+               }
                
                Statistics.incrementNoOfExecutedGPUInst();
                                        

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 0cfffdb..4cfe79f 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -42,6 +42,7 @@ import static 
jcuda.jcudnn.JCudnn.cudnnSetConvolution2dDescriptor;
 import static jcuda.jcudnn.JCudnn.cudnnSetFilter4dDescriptor;
 import static jcuda.jcudnn.JCudnn.cudnnSetPooling2dDescriptor;
 import static jcuda.jcudnn.JCudnn.cudnnSetTensor4dDescriptor;
+import static jcuda.jcudnn.JCudnn.cudnnActivationBackward;
 import static jcuda.jcudnn.cudnnConvolutionMode.CUDNN_CROSS_CORRELATION;
 import static jcuda.jcudnn.cudnnDataType.CUDNN_DATA_DOUBLE;
 import static jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_MAX;
@@ -244,6 +245,23 @@ public class LibMatrixCUDA {
                return poolingDesc;
        }
 
+       public static void relu_backward(MatrixObject input, MatrixObject dout, 
MatrixObject outputBlock) throws DMLRuntimeException {
+               if(isInSparseFormat(input)) {
+                       ((JCudaObject)input.getGPUObject()).sparseToDense();
+               }
+               if(isInSparseFormat(dout)) {
+                       ((JCudaObject)dout.getGPUObject()).sparseToDense();
+               }
+               long rows = input.getNumRows();
+               long cols = input.getNumColumns();
+               Pointer imagePointer = 
((JCudaObject)input.getGPUObject()).jcudaDenseMatrixPtr;
+               Pointer doutPointer = 
((JCudaObject)dout.getGPUObject()).jcudaDenseMatrixPtr;
+               Pointer outputPointer = 
((JCudaObject)outputBlock.getGPUObject()).jcudaDenseMatrixPtr;
+               kernels.launchKernel("relu_backward",
+                               
ExecutionConfig.getConfigForSimpleMatrixOperations((int)rows, (int)cols),
+                               imagePointer, doutPointer, outputPointer, 
(int)rows, (int)cols);
+       }
+       
        public static void bias_add(MatrixObject input, MatrixObject bias, 
MatrixObject outputBlock) throws DMLRuntimeException {
                if(isInSparseFormat(input)) {
                        ((JCudaObject)input.getGPUObject()).sparseToDense();

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 1400b31..89cdff8 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -57,7 +57,8 @@ public class LibMatrixDNN {
        enum TaskType {
                MaxPooling_Forward, MaxPooling_Backward, 
                // Alternate approaches that we tried but the performance was 
unsatisfactory be included: direct, non-looped im2col
-               LoopedIm2ColConv2d, LoopedIm2ColConv2dBwdFilter, 
LoopedIm2ColConv2dBwdData
+               LoopedIm2ColConv2d, LoopedIm2ColConv2dBwdFilter, 
LoopedIm2ColConv2dBwdData,
+               BiasAdd, ReluBackward
        }
        
        // 
------------------------------------------------------------------------------------------------
@@ -564,21 +565,108 @@ public class LibMatrixDNN {
                return maxIndex;
        }
        
-       public static void bias_add(MatrixBlock input, MatrixBlock bias, 
MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException {
-               // Keeping it single-threaded as memory-bound operation. TODO: 
explore optimization potential for multithreaded implementation
+       public static void relu_backward(MatrixBlock input, MatrixBlock dout, 
MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException {
+               int N = input.getNumRows();
+               ConvolutionParameters params = new ConvolutionParameters(N, -1, 
-1, -1, -1, -1, -1, -1, -1, -1, -1, numThreads);
+               params.input1 = input;
+               params.input2 = dout;
+               params.output = outputBlock;
+               if(input.getNumRows() != dout.getNumRows() || 
input.getNumColumns() != dout.getNumColumns()) {
+                       throw new DMLRuntimeException("Incorrect dimensions for 
relu_backward:" + 
+                               input.getNumRows() + " != " + dout.getNumRows() 
+ " || " + input.getNumColumns() + " != " + dout.getNumColumns());
+               }
+               runConvTask(TaskType.ReluBackward, params);
+       }
+       
+       private static void doReluBackward(int n, ConvolutionParameters params) 
throws DMLRuntimeException {
+               // (X > 0) * dout
+               double [] outputArray = params.output.getDenseBlock();
+               int numOutCols = params.input1.getNumColumns();
                
+               if(!params.input1.isInSparseFormat() && 
!params.input2.isInSparseFormat()) {
+                       double [] inputArr = params.input1.getDenseBlock();
+                       double [] doutArr = params.input2.getDenseBlock();
+                       for(int i = n*numOutCols; i < (n+1)*numOutCols; i++) {
+                               outputArray[i] = inputArr[i] > 0 ? doutArr[i] : 
0;
+                       }
+               }
+               else {
+                       // Perform (X > 0)
+                       if(params.input1.isInSparseFormat()) {
+                               Iterator<IJV> iter = 
params.input1.sparseBlock.getIterator(n, n+1);
+                               while(iter.hasNext()) {
+                                       IJV ijv = iter.next();
+                                       int i = ijv.getI();
+                                       int j = ijv.getJ();
+                                       outputArray[i*numOutCols + j] = 
ijv.getV() > 0 ? 1 : 0;
+                               }
+                       }
+                       else {
+                               double [] inputArr = 
params.input1.getDenseBlock();
+                               for(int i = n*numOutCols; i < (n+1)*numOutCols; 
i++) {
+                                       outputArray[i] = inputArr[i] > 0 ? 1 : 
0;
+                               }
+                       }
+                       // Then perform (X > 0) * dout
+                       if(params.input2.isInSparseFormat()) {
+                               Iterator<IJV> iter = 
params.input2.sparseBlock.getIterator(n, n+1);
+                               while(iter.hasNext()) {
+                                       IJV ijv = iter.next();
+                                       int i = ijv.getI();
+                                       int j = ijv.getJ();
+                                       outputArray[i*numOutCols + j] *= 
ijv.getV();
+                               }
+                       }
+                       else {
+                               double [] doutArr = 
params.input2.getDenseBlock();
+                               for(int i = n*numOutCols; i < (n+1)*numOutCols; 
i++) {
+                                       outputArray[i] *= doutArr[i];
+                               }
+                       }
+               }
+       }
+       
+       public static void bias_add(MatrixBlock input, MatrixBlock bias, 
MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException {
                int N = input.getNumRows();
                int K = bias.getNumRows();
                int PQ = input.getNumColumns() / K;
-               double [] outputArray = outputBlock.getDenseBlock();
+               
+               ConvolutionParameters params = new ConvolutionParameters(N, PQ, 
-1, -1, K, -1, -1, -1, -1, -1, -1, numThreads);
+               params.input1 = input;
+               params.input2 = bias;
+               params.output = outputBlock;
+               
                if(input.isEmptyBlock()) {
-                       fillBias(bias, outputArray, N, K, PQ);
+                       double [] outputArray = outputBlock.getDenseBlock();
+                       for(int n = 0;  n < N; n++) 
+                               fillBias(bias, outputArray, n, N, K, PQ);
+               }
+               else {
+                       runConvTask(TaskType.BiasAdd, params);
+               }
+       }
+       
+       private static void doBiasAdd(int n, ConvolutionParameters params) 
throws DMLRuntimeException {
+               double [] outputArray = params.output.getDenseBlock();
+               int PQ = params.C;
+               int numOutCols = params.input1.getNumColumns();
+               
+               if(!params.input1.isInSparseFormat() && 
!params.input2.isInSparseFormat()) {
+                       double [] inputArr = params.input1.getDenseBlock();
+                       double [] biasArr = params.input2.getDenseBlock();
+                       int K = params.K;
+                       final int inputOffset = n*K*PQ;
+                       for(int k = 0; k < K; k++) {
+                               int offset = inputOffset + k*PQ;
+                               for(int pq = 0; pq < PQ; pq++) {
+                                       outputArray[offset + pq] = 
inputArr[offset + pq] + biasArr[k];
+                               }
+                       }
                }
                else {
-                       fillBias(bias, outputArray, N, K, PQ);
-                       int numOutCols = input.getNumColumns();
-                       if(input.isInSparseFormat()) {
-                               Iterator<IJV> iter = 
input.sparseBlock.getIterator();
+                       fillBias(params.input2, outputArray, n, params.N, 
params.K, PQ);
+                       if(params.input1.isInSparseFormat()) {
+                               Iterator<IJV> iter = 
params.input1.sparseBlock.getIterator(n, n+1);
                                while(iter.hasNext()) {
                                        IJV ijv = iter.next();
                                        int i = ijv.getI();
@@ -587,35 +675,32 @@ public class LibMatrixDNN {
                                }
                        }
                        else {
-                               double [] inputArr = input.getDenseBlock();
-                               for(int i = 0; i < inputArr.length; i++) {
+                               double [] inputArr = 
params.input1.getDenseBlock();
+                               for(int i = n*numOutCols; i < (n+1)*numOutCols; 
i++) {
                                        outputArray[i] += inputArr[i];
                                }
                        }
                }
+               
        }
        
-       private static void fillBias(MatrixBlock bias, double [] outputArray, 
int N, int K, int PQ) {
+       private static void fillBias(MatrixBlock bias, double [] outputArray, 
int n, int N, int K, int PQ) {
                if(bias.isInSparseFormat()) {
                        Iterator<IJV> iter = bias.sparseBlock.getIterator();
                        while(iter.hasNext()) {
                                IJV ijv = iter.next();
                                int k = ijv.getI();
                                double val = ijv.getV();
-                               for(int n = 0;  n < N; n++) {
-                                       int fromIndex = n*K*PQ + k*PQ;
-                                       Arrays.fill(outputArray, fromIndex, 
fromIndex + PQ, val);
-                               }
+                               int fromIndex = n*K*PQ + k*PQ;
+                               Arrays.fill(outputArray, fromIndex, fromIndex + 
PQ, val);
                        }
                }
                else {
-                       double [] biasArr = bias.getDenseBlock(); 
-                       for(int n = 0;  n < N; n++) {
-                               for(int k = 0; k < K; k++) {
-                                       int fromIndex = n*K*PQ + k*PQ;
-                                       double val = biasArr[k];
-                                       Arrays.fill(outputArray, fromIndex, 
fromIndex + PQ, val);
-                               }
+                       double [] biasArr = bias.getDenseBlock();
+                       for(int k = 0; k < K; k++) {
+                               int fromIndex = n*K*PQ + k*PQ;
+                               double val = biasArr[k];
+                               Arrays.fill(outputArray, fromIndex, fromIndex + 
PQ, val);
                        }
                }
        }
@@ -842,6 +927,14 @@ public class LibMatrixDNN {
                                        for(int n = n1; n < n2; n++) 
                                                doPoolingBackward(n, params);
                                        break;
+                               case BiasAdd:
+                                       for(int n = n1; n < n2; n++) 
+                                               doBiasAdd(n, params);
+                                       break;
+                               case ReluBackward:
+                                       for(int n = n1; n < n2; n++) 
+                                               doReluBackward(n, params);
+                                       break;
                                case LoopedIm2ColConv2d:
                                {       
                                        MatrixBlock im2ColOutBlock = 
im2ColOutBlocks.remove();

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/test/java/org/apache/sysml/test/integration/functions/tensor/ReluBackwardTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/tensor/ReluBackwardTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/tensor/ReluBackwardTest.java
new file mode 100644
index 0000000..9dd2338
--- /dev/null
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/tensor/ReluBackwardTest.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sysml.test.integration.functions.tensor;
+
+import java.util.HashMap;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Test;
+
+public class ReluBackwardTest extends AutomatedTestBase
+{
+       private final static String TEST_NAME = "ReluBackwardTest";
+       private final static String TEST_DIR = "functions/tensor/";
+       private final static String TEST_CLASS_DIR = TEST_DIR + 
ReluBackwardTest.class.getSimpleName() + "/";
+       private final static double epsilon=0.0000000001;
+       
+       @Override
+       public void setUp() {
+               addTestConfiguration(TEST_NAME, new 
TestConfiguration(TEST_CLASS_DIR, TEST_NAME, 
+                               new String[] {"B"}));
+       }
+       
+       @Test
+       public void testReluBackwardDense1() 
+       {
+               runReluBackwardTest(ExecType.CP, 10, 100);
+       }
+       
+       @Test
+       public void testReluBackwardDense2() 
+       {
+               runReluBackwardTest(ExecType.CP, 100, 10);
+       }
+       
+       @Test
+       public void testReluBackwardDense3() 
+       {
+               runReluBackwardTest(ExecType.CP, 100, 100);
+       }
+       
+       /**
+        * 
+        * @param et
+        * @param sparse
+        */
+       public void runReluBackwardTest( ExecType et, int M, int N) 
+       {
+               RUNTIME_PLATFORM oldRTP = rtplatform;
+                       
+               boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+               
+               try
+               {
+                   TestConfiguration config = getTestConfiguration(TEST_NAME);
+                   if(et == ExecType.SPARK) {
+                       rtplatform = RUNTIME_PLATFORM.SPARK;
+                   }
+                   else {
+                       rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP 
: RUNTIME_PLATFORM.SINGLE_NODE;
+                   }
+                       if( rtplatform == RUNTIME_PLATFORM.SPARK )
+                               DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+                       
+                       loadTestConfiguration(config);
+               
+                       /* This is for running the junit test the new way, 
i.e., construct the arguments directly */
+                       String RI_HOME = SCRIPT_DIR + TEST_DIR;
+                       fullDMLScriptName = RI_HOME + TEST_NAME + ".dml";
+                       
+                       programArgs = new String[]{"-explain", "-args",  "" + 
M, "" + N, 
+                                       output("B")};
+                               
+                       boolean exceptionExpected = false;
+                       int expectedNumberOfJobs = -1;
+                       runTest(true, exceptionExpected, null, 
expectedNumberOfJobs);
+                       
+                       fullRScriptName = RI_HOME + TEST_NAME + ".R";
+                       rCmd = "Rscript" + " " + fullRScriptName + " " + M + " 
" + N + " " + expectedDir(); 
+                       
+                       // Run comparison R script
+                       runRScript(true);
+                       HashMap<CellIndex, Double> bHM = readRMatrixFromFS("B");
+                       
+                       HashMap<CellIndex, Double> dmlfile = 
readDMLMatrixFromHDFS("B");
+                       TestUtils.compareMatrices(dmlfile, bHM, epsilon, 
"B-DML", "NumPy");
+                       
+               }
+               finally
+               {
+                       rtplatform = oldRTP;
+                       DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+               }
+       }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/test/scripts/functions/tensor/ReluBackwardTest.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/tensor/ReluBackwardTest.R 
b/src/test/scripts/functions/tensor/ReluBackwardTest.R
new file mode 100644
index 0000000..6fec565
--- /dev/null
+++ b/src/test/scripts/functions/tensor/ReluBackwardTest.R
@@ -0,0 +1,30 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+args <- commandArgs(TRUE)
+library("Matrix")
+library("matrixStats") 
+M=as.integer(args[1])
+N=as.integer(args[2])
+
+x=matrix(seq(1 - M, M*N - M), M, N, byrow=TRUE)
+dout=matrix(seq(M*N, 1), M, N, byrow=TRUE)
+output = (x > 0) * dout
+writeMM(as(output,"CsparseMatrix"), paste(args[3], "B", sep=""))
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/afe61b5a/src/test/scripts/functions/tensor/ReluBackwardTest.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/tensor/ReluBackwardTest.dml 
b/src/test/scripts/functions/tensor/ReluBackwardTest.dml
new file mode 100644
index 0000000..500ee8c
--- /dev/null
+++ b/src/test/scripts/functions/tensor/ReluBackwardTest.dml
@@ -0,0 +1,27 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# 
+#-------------------------------------------------------------
+M = $1
+N = $2
+
+x=matrix(seq(1 - M, M*N - M), rows=M, cols=N)
+dout=matrix(seq(M*N, 1), rows=M, cols=N)
+output = (x > 0) * dout
+write(output, $3, format="text")
\ No newline at end of file

[1/2] incubator-systemml git commit: [SYSTEMML-540] Improved the performance of bias_add and added relu_backward (CP + GPU)

Reply via email to