Repository: incubator-systemml
Updated Branches:
  refs/heads/master ceeec4bbf -> 772fb5883


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java 
b/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java
new file mode 100644
index 0000000..4052fef
--- /dev/null
+++ 
b/src/test/java/org/apache/sysml/test/gpu/MatrixMatrixElementWiseOpTests.java
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.gpu;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.sysml.api.mlcontext.Matrix;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Test Elementwise operations on the GPU
+ */
+public class MatrixMatrixElementWiseOpTests extends GPUTests {
+       private final static String TEST_NAME = 
"MatrixMatrixElementWiseOpTests";
+
+       private final int[] rowSizes = new int[] { 1, 64, 130, 1024, 2049 };
+       private final int[] columnSizes = new int[] { 1, 64, 130, 1024, 2049 };
+       private final double[] sparsities = new double[] { 0.0, 0.03, 0.3, 0.9 
};
+       private final double[] scalars = new double[] { 0.0, 0.5, 2.0 };
+       private final int seed = 42;
+
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_DIR, TEST_NAME);
+               getAndLoadTestConfiguration(TEST_NAME);
+       }
+
+       @Test
+       public void testAxpy() {
+               runAxpyTest("O = a*X + Y", "X", "Y", "a", "O", "gpu_-*");
+       }
+
+       @Test
+       public void testAxmy() {
+               runAxpyTest("O = X - a*Y", "X", "Y", "a", "O", "gpu_+*");
+       }
+
+       @Test
+       public void testAdd() {
+               runMatrixMatrixElementwiseTest("O = X + Y", "X", "Y", "O", 
"gpu_+");
+       }
+
+       @Test
+       public void testMatrixColumnVectorAdd() {
+               runMatrixColumnVectorTest("O = X + Y", "X", "Y", "O", "gpu_+");
+       }
+
+       @Test
+       public void testMatrixRowVectorAdd() {
+               runMatrixRowVectorTest("O = X + Y", "X", "Y", "O", "gpu_+");
+       }
+
+       @Test
+       public void testSubtract() {
+               runMatrixMatrixElementwiseTest("O = X - Y", "X", "Y", "O", 
"gpu_-");
+       }
+
+       @Test
+       public void testMatrixColumnVectorSubtract() {
+               runMatrixColumnVectorTest("O = X - Y", "X", "Y", "O", "gpu_-");
+       }
+
+       @Test
+       public void testMatrixRowVectorSubtract() {
+               runMatrixRowVectorTest("O = X - Y", "X", "Y", "O", "gpu_-");
+       }
+
+       @Test
+       public void testMultiply() {
+               runMatrixMatrixElementwiseTest("O = X * Y", "X", "Y", "O", 
"gpu_*");
+       }
+
+       @Test
+       public void testMatrixColumnVectorMultiply() {
+               runMatrixColumnVectorTest("O = X * Y", "X", "Y", "O", "gpu_*");
+       }
+
+       @Test
+       public void testMatrixRowVectorMultiply() {
+               runMatrixRowVectorTest("O = X * Y", "X", "Y", "O", "gpu_*");
+       }
+
+       @Test
+       public void testDivide() {
+               runMatrixMatrixElementwiseTest("O = X / Y", "X", "Y", "O", 
"gpu_/");
+       }
+
+       @Test
+       public void testMatrixColumnVectorDivide() {
+               runMatrixColumnVectorTest("O = X / Y", "X", "Y", "O", "gpu_/");
+       }
+
+       @Test
+       public void testMatrixRowVectorDivide() {
+               runMatrixRowVectorTest("O = X / Y", "X", "Y", "O", "gpu_/");
+       }
+
+       // ****************************************************************
+       // ************************ IGNORED TEST **************************
+       // FIXME : There is a bug in CPU "^" when a A ^ B is executed where A & 
B are all zeroes
+       @Ignore
+       @Test
+       public void testPower() {
+               runMatrixMatrixElementwiseTest("O = X ^ Y", "X", "Y", "O", 
"gpu_%");
+       }
+
+       /**
+        * Runs a simple matrix-matrix elementwise op test
+        *
+        * @param scriptStr         the script string
+        * @param input1            name of the first input variable in the 
script string
+        * @param input2            name of the second input variable in the 
script string
+        * @param output            name of the output variable in the script 
string
+        * @param heavyHitterOpcode the string printed for the unary op heavy 
hitter when executed on gpu
+        */
+       private void runMatrixMatrixElementwiseTest(String scriptStr, String 
input1, String input2, String output,
+                       String heavyHitterOpcode) {
+               for (int i = 0; i < rowSizes.length; i++) {
+                       for (int j = 0; j < columnSizes.length; j++) {
+                               for (int k = 0; k < sparsities.length; k++) {
+                                       int m = rowSizes[i];
+                                       int n = columnSizes[j];
+                                       double sparsity = sparsities[k];
+                                       Matrix X = generateInputMatrix(spark, 
m, n, sparsity, seed);
+                                       Matrix Y = generateInputMatrix(spark, 
m, n, sparsity, seed);
+                                       HashMap<String, Object> inputs = new 
HashMap<>();
+                                       inputs.put(input1, X);
+                                       inputs.put(input2, Y);
+                                       List<Object> cpuOut = runOnCPU(spark, 
scriptStr, inputs, Arrays.asList(output));
+                                       List<Object> gpuOut = runOnGPU(spark, 
scriptStr, inputs, Arrays.asList(output));
+                                       
//assertHeavyHitterPresent(heavyHitterOpcode);
+                                       assertEqualObjects(cpuOut.get(0), 
gpuOut.get(0));
+                               }
+                       }
+               }
+       }
+
+       /**
+        * Run O = aX +/- Y type operations test
+        *
+        * @param scriptStr         the script string
+        * @param input1            name of the first matrix input variable in 
the script string
+        * @param input2            name of the second matrix input variable in 
the script string
+        * @param scalarInput       name of the scalar which is multiplied with 
the first or second matrix
+        * @param output            name of the output variable in the script 
string
+        * @param heavyHitterOpcode the string printed for the unary op heavy 
hitter when executed on gpu
+        */
+       private void runAxpyTest(String scriptStr, String input1, String 
input2, String scalarInput, String output,
+                       String heavyHitterOpcode) {
+               for (int i = 0; i < rowSizes.length; i++) {
+                       for (int j = 0; j < columnSizes.length; j++) {
+                               for (int k = 0; k < sparsities.length; k++) {
+                                       for (int l = 0; l < scalars.length; 
l++) {
+                                               int m = rowSizes[i];
+                                               int n = columnSizes[j];
+                                               double scalar = scalars[l];
+                                               double sparsity = sparsities[k];
+                                               Matrix X = 
generateInputMatrix(spark, m, n, sparsity, seed);
+                                               Matrix Y = 
generateInputMatrix(spark, m, n, sparsity, seed);
+                                               HashMap<String, Object> inputs 
= new HashMap<>();
+                                               inputs.put(input1, X);
+                                               inputs.put(input2, Y);
+                                               inputs.put(scalarInput, scalar);
+
+                                               // Test O = aX + Y
+                                               List<Object> cpuOut = 
runOnCPU(spark, scriptStr, inputs, Arrays.asList(output));
+                                               List<Object> gpuOut = 
runOnGPU(spark, scriptStr, inputs, Arrays.asList(output));
+                                               
//assertHeavyHitterPresent(heavyHitterOpcode);
+                                               
assertEqualObjects(cpuOut.get(0), gpuOut.get(0));
+                                       }
+                               }
+                       }
+               }
+       }
+
+       /**
+        * Run O = X op Y where X is a matrix, Y is a column vector
+        *
+        * @param scriptStr         the script string
+        * @param matrixInput       name of the matrix input variable in the 
script string
+        * @param vectorInput       name of the vector input variable in the 
script string
+        * @param output            name of the output variable in the script 
string
+        * @param heavyHitterOpcode the string printed for the unary op heavy 
hitter when executed on gpu
+        */
+       private void runMatrixColumnVectorTest(String scriptStr, String 
matrixInput, String vectorInput, String output,
+                       String heavyHitterOpcode) {
+               int[] rows = new int[] { 64, 130, 1024, 2049 };
+               int[] cols = new int[] { 64, 130, 1024, 2049 };
+
+               for (int i = 0; i < rows.length; i++) {
+                       for (int j = 0; j < cols.length; j++) {
+                               for (int k = 0; k < sparsities.length; k++) {
+                                       int m = rows[i];
+                                       int n = cols[j];
+                                       double sparsity = sparsities[k];
+                                       Matrix X = generateInputMatrix(spark, 
m, n, sparsity, seed);
+                                       Matrix Y = generateInputMatrix(spark, 
m, 1, sparsity, seed);
+                                       HashMap<String, Object> inputs = new 
HashMap<>();
+                                       inputs.put(matrixInput, X);
+                                       inputs.put(vectorInput, Y);
+
+                                       System.out.println("Vector[" + m + ", 
1] op Matrix[" + m + ", " + n + "], sparsity = " + sparsity);
+                                       List<Object> cpuOut = runOnCPU(spark, 
scriptStr, inputs, Arrays.asList(output));
+                                       List<Object> gpuOut = runOnGPU(spark, 
scriptStr, inputs, Arrays.asList(output));
+                                       
//assertHeavyHitterPresent(heavyHitterOpcode);
+                                       assertEqualObjects(cpuOut.get(0), 
gpuOut.get(0));
+
+                               }
+                       }
+               }
+       }
+
+       /**
+        * Run O = X op Y where X is a matrix, Y is a row vector
+        *
+        * @param scriptStr         the script string
+        * @param matrixInput       name of the matrix input variable in the 
script string
+        * @param vectorInput       name of the vector input variable in the 
script string
+        * @param output            name of the output variable in the script 
string
+        * @param heavyHitterOpcode the string printed for the unary op heavy 
hitter when executed on gpu
+        */
+       private void runMatrixRowVectorTest(String scriptStr, String 
matrixInput, String vectorInput, String output,
+                       String heavyHitterOpcode) {
+               int[] rows = new int[] { 64, 130, 1024, 2049 };
+               int[] cols = new int[] { 64, 130, 1024, 2049 };
+
+               for (int i = 0; i < rows.length; i++) {
+                       for (int j = 0; j < cols.length; j++) {
+                               for (int k = 0; k < sparsities.length; k++) {
+                                       int m = rows[i];
+                                       int n = cols[j];
+                                       double sparsity = sparsities[k];
+                                       Matrix X = generateInputMatrix(spark, 
m, n, sparsity, seed);
+                                       Matrix Y = generateInputMatrix(spark, 
1, n, sparsity, seed);
+                                       HashMap<String, Object> inputs = new 
HashMap<>();
+                                       inputs.put(matrixInput, X);
+                                       inputs.put(vectorInput, Y);
+
+                                       System.out.println("Vector[" + m + ", 
1] op Matrix[" + m + ", " + n + "], sparsity = " + sparsity);
+                                       List<Object> cpuOut = runOnCPU(spark, 
scriptStr, inputs, Arrays.asList(output));
+                                       List<Object> gpuOut = runOnGPU(spark, 
scriptStr, inputs, Arrays.asList(output));
+                                       
//assertHeavyHitterPresent(heavyHitterOpcode);
+                                       assertEqualObjects(cpuOut.get(0), 
gpuOut.get(0));
+                               }
+                       }
+               }
+       }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java 
b/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java
new file mode 100644
index 0000000..f7c7851
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.gpu;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.sysml.api.mlcontext.Matrix;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Test;
+
+/**
+ * Tests matrix multiplication on the GPU
+ */
+public class MatrixMultiplicationOpTest extends GPUTests {
+       private final static String TEST_NAME = "MatrixMultiplicationOpTest";
+       private final int seed = 42;
+
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_DIR, TEST_NAME);
+               getAndLoadTestConfiguration(TEST_NAME);
+       }
+
+       @Override
+       public double getTHRESHOLD() {
+               return 1e-5;
+       }
+
+       @Test
+       public void matrixMatrixTest1() {
+               String scriptStr = "O = X %*% Y";
+
+               int[] X1 = { 1, 128, 513, 1024 };
+               int[] X2 = { 128, 512, 1024 };
+               int[] Y2 = { 1, 128, 513, 1024 };
+               double[] SX = { 0.0, 0.03, 0.3, 0.9 };
+               double[] SY = { 0.0, 0.03, 0.3, 0.9 };
+
+               for (int x1 = 0; x1 < X1.length; x1++) {
+                       for (int x2 = 0; x2 < X2.length; x2++) {
+                               int y1 = x2;
+                               for (int y2 = 0; y2 < Y2.length; y2++) {
+                                       for (int sx = 0; sx < SX.length; sx++) {
+                                               for (int sy = 0; sy < 
SY.length; sy++) {
+                                                       
assertMatrixMultiplication(scriptStr, X1[x1], X2[x2], X2[y1], Y2[y2], SX[sx], 
SY[sy]);
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       @Test
+       public void matrixMatrixTest2() {
+               String scriptStr = "O = X %*% t(Y)";
+
+               int[] X1 = { 1, 128, 513, 1024 };
+               int[] X2 = { 128, 512, 1024 };
+               int[] Y1 = { 1, 128, 513, 1024 };
+               double[] SX = { 0.0, 0.03, 0.3, 0.9 };
+               double[] SY = { 0.0, 0.03, 0.3, 0.9 };
+
+               for (int x1 = 0; x1 < X1.length; x1++) {
+                       for (int x2 = 0; x2 < X2.length; x2++) {
+                               int y2 = x2;
+                               for (int y1 = 0; y1 < Y1.length; y1++) {
+                                       for (int sx = 0; sx < SX.length; sx++) {
+                                               for (int sy = 0; sy < 
SY.length; sy++) {
+                                                       
assertMatrixMultiplication(scriptStr, X1[x1], X2[x2], Y1[x2], X2[y2], SX[sx], 
SY[sy]);
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       @Test
+       public void matrixMatrixTest3() {
+               String scriptStr = "O = t(X) %*% Y";
+
+               int[] X1 = { 1, 128, 513, 1024 };
+               int[] X2 = { 128, 512, 1024 };
+               int[] Y2 = { 1, 128, 513, 1024 };
+               double[] SX = { 0.0, 0.03, 0.3, 0.9 };
+               double[] SY = { 0.0, 0.03, 0.3, 0.9 };
+
+               for (int x1 = 0; x1 < X1.length; x1++) {
+                       int y1 = x1;
+                       for (int x2 = 0; x2 < X2.length; x2++) {
+                               for (int y2 = 0; y2 < Y2.length; y2++) {
+                                       for (int sx = 0; sx < SX.length; sx++) {
+                                               for (int sy = 0; sy < 
SY.length; sy++) {
+                                                       
assertMatrixMultiplication(scriptStr, X1[x1], X2[x2], X1[y1], Y2[y2], SX[sx], 
SY[sy]);
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       @Test
+       public void matrixMatrixTest4() {
+               String scriptStr = "O = t(X) %*% t(Y)";
+
+               int[] X1 = { 1, 128, 513, 1024 };
+               int[] X2 = { 128, 512, 1024 };
+               int[] Y1 = { 1, 128, 513, 1024 };
+               double[] SX = { 0.0, 0.03, 0.3, 0.9 };
+               double[] SY = { 0.0, 0.03, 0.3, 0.9 };
+
+               for (int x1 = 0; x1 < X1.length; x1++) {
+                       int y2 = x1;
+                       for (int x2 = 0; x2 < X2.length; x2++) {
+                               for (int y1 = 0; y1 < Y1.length; y1++) {
+                                       for (int sx = 0; sx < SX.length; sx++) {
+                                               for (int sy = 0; sy < 
SY.length; sy++) {
+                                                       
assertMatrixMultiplication(scriptStr, X1[x1], X2[x2], Y1[y1], X1[y2], SX[sx], 
SY[sy]);
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       @Test
+       public void transposeSelfMatrixMultiply() {
+               String scriptStr = "O = t(X) %*% X";
+
+               int[] sizes = { 1, 128, 512, 1024, 2049 };
+               double[] sparsities = { 0.0, 0.03, 0.3, 0.9 };
+
+               for (int i = 0; i < sizes.length; i++) {
+                       for (int j = 0; j < sparsities.length; j++) {
+                               int side = sizes[i];
+                               double sparsity = sparsities[j];
+                               Matrix X = generateInputMatrix(spark, side, 
side, sparsity, seed);
+                               HashMap<String, Object> inputs = new 
HashMap<>();
+                               inputs.put("X", X);
+                               List<Object> cpuOuts = runOnCPU(spark, 
scriptStr, inputs, Arrays.asList("O"));
+                               List<Object> gpuOuts = runOnGPU(spark, 
scriptStr, inputs, Arrays.asList("O"));
+                               //assertHeavyHitterPresent("gpu_tsmm'");
+                               assertEqualObjects(cpuOuts.get(0), 
gpuOuts.get(0));
+                       }
+               }
+       }
+
+       /**
+        * Assert that matrix multiplication is the same on gpu and cpu
+        *
+        * @param scriptStr script string that has matrix multiplication (eg : 
O = X %*% Y)
+        * @param rows1     rows in X
+        * @param cols1     cols in X
+        * @param rows2     rows in Y
+        * @param cols2     cols in Y
+        * @param sparsity1 sparsity for X
+        * @param sparsity2 sparsity for Y
+        */
+       private void assertMatrixMultiplication(String scriptStr, int rows1, 
int cols1, int rows2, int cols2,
+                       double sparsity1, double sparsity2) {
+               HashMap<String, Object> inputs = new HashMap<>();
+               Matrix X = generateInputMatrix(spark, rows1, cols1, sparsity1, 
seed);
+               Matrix Y = generateInputMatrix(spark, rows2, cols2, sparsity2, 
seed);
+               inputs.put("X", X);
+               inputs.put("Y", Y);
+               List<Object> cpuOuts = runOnCPU(spark, scriptStr, inputs, 
Arrays.asList("O"));
+               List<Object> gpuOuts = runOnGPU(spark, scriptStr, inputs, 
Arrays.asList("O"));
+               //assertHeavyHitterPresent("gpu_ba+*'");
+               assertEqualObjects(cpuOuts.get(0), gpuOuts.get(0));
+       }
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java 
b/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java
new file mode 100644
index 0000000..f1f1ea5
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/gpu/NeuralNetworkOpTests.java
@@ -0,0 +1,508 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.gpu;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.sysml.api.mlcontext.Matrix;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
+import org.apache.sysml.runtime.util.ConvolutionUtils;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Test;
+
+/**
+ * Test neural network operations on the GPU
+ */
+public class NeuralNetworkOpTests extends GPUTests {
+
+       private final static String TEST_NAME = "NeuralNetworkOpTests";
+       private final int seed = 42;
+
+       // The MAX_OP_SIZE is to take into consideration the memory available 
on the GPU as well as
+       // limits set by cudnn (operands need to be less than 2GB)
+       private static final double MAX_OP_SIZE;
+
+       static {
+               double MAX = 0.5 * 1024 * 1024 * 1024; // 0.5 GB (this HAS to 
be less than 2GB)
+               try {
+                       // Cap the maximum allowed operand size to 1/3rd of the 
usable GPU memory or MAX, whichever is lesser
+                       GPUContext gCtx = GPUContextPool.getFromPool();
+                       long availableMemory = gCtx.getAvailableMemory();
+                       double averageMemoryPerOperand = availableMemory / 3.0;
+                       MAX_OP_SIZE = Math.min(averageMemoryPerOperand, MAX);
+                       GPUContextPool.returnToPool(gCtx);
+               } catch (DMLRuntimeException e) {
+                       throw new RuntimeException(e);
+               }
+
+       }
+
+       // More comprehensive but time consuming tests
+       /*
+       private final List<Integer> Nlst = Arrays.asList(128, 64, 32);
+    private final List<Integer> Clst = Arrays.asList(30, 20, 3);
+    private final List<Integer> Hlst = Arrays.asList(400, 128, 32);
+    private final List<Integer> Wlst = Arrays.asList(400, 128, 32);
+    private final List<Integer> Klst = Arrays.asList(30, 20, 10);
+    private final List<Integer> Rlst = Arrays.asList(128, 63, 4);
+    private final List<Integer> Slst = Arrays.asList(128, 63, 4);
+    private final List<Integer> strideHeightLst = Arrays.asList(9, 3);
+    private final List<Integer> strideWidthLst = Arrays.asList(9, 3);
+    private final List<Integer> padHeightLst = Arrays.asList(3, 1);
+    private final List<Integer> padWidthLst = Arrays.asList(3, 1);
+    private final List<Double> sparsitylst = Arrays.asList(1.0);    // Only 
test for dense
+    */
+
+       private final List<Integer> Nlst = Arrays.asList(128, 64);
+       private final List<Integer> Clst = Arrays.asList(30, 3);
+       private final List<Integer> Hlst = Arrays.asList(256, 64);
+       private final List<Integer> Wlst = Arrays.asList(256, 64);
+       private final List<Integer> Klst = Arrays.asList(30, 20);
+       private final List<Integer> Rlst = Arrays.asList(128, 3, 1);
+       private final List<Integer> Slst = Arrays.asList(128, 3, 1);
+       private final List<Integer> strideHeightLst = Arrays.asList(9, 1);
+       private final List<Integer> strideWidthLst = Arrays.asList(9, 1);
+       private final List<Integer> padHeightLst = Arrays.asList(3, 1);
+       private final List<Integer> padWidthLst = Arrays.asList(3, 1);
+       private final List<Double> sparsitylst = Arrays.asList(1.0);    // Only 
test for dense
+
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_DIR, TEST_NAME);
+               getAndLoadTestConfiguration(TEST_NAME);
+       }
+
+       @Override
+       public double getTHRESHOLD() {
+               return 1e-5;
+       }
+
+       @Test
+       public void testConv2d() {
+               String scriptStr = "O = conv2d(image, filter, padding=[padH, 
padW], stride=[strideH, strideW], input_shape=[N,C,H,W], 
filter_shape=[K,C,R,S])";
+
+               for (long N : Nlst) {
+                       for (long C : Clst) {
+                               for (long H : Hlst) {
+                                       for (long W : Wlst) {
+                                               for (long K : Klst) {
+                                                       for (long R : Rlst) {
+                                                               for (long S : 
Slst) {
+                                                                       for 
(long strideH : strideHeightLst) {
+                                                                               
for (long strideW : strideWidthLst) {
+                                                                               
        for (long padH : padHeightLst) {
+                                                                               
                for (long padW : padWidthLst) {
+                                                                               
                        for (double sparsity : sparsitylst) {
+
+                                                                               
                                // Make sure ops fit in GPU memory and within 
constraints of cudnn
+                                                                               
                                long imageSize = N * C * H * W * 8l;
+                                                                               
                                if (imageSize > MAX_OP_SIZE)  // image size
+                                                                               
                                        continue;
+                                                                               
                                long filterSize = K * C * R * S * 8l;
+                                                                               
                                if (filterSize > MAX_OP_SIZE)  // filter size
+                                                                               
                                        continue;
+                                                                               
                                // filter is smaller than image + padding
+                                                                               
                                if (R > (H + padH) || S > (W + padW))
+                                                                               
                                        continue;
+
+                                                                               
                                int P = (int) ConvolutionUtils.getP(H, R, 
strideH, padH);
+                                                                               
                                int Q = (int) ConvolutionUtils.getQ(W, S, 
strideW, padW);
+
+                                                                               
                                long doutSize = N * K * P * Q * 8l;
+                                                                               
                                if (doutSize > MAX_OP_SIZE) // dout/output size
+                                                                               
                                        continue;
+
+                                                                               
                                double imageSizeInMB = imageSize / (1024.0 * 
1024.0);
+                                                                               
                                double filterSizeInMB = filterSize / (1024.0 * 
1024.0);
+                                                                               
                                double doutSizeInMB = doutSize / (1024.0 * 
1024.0);
+                                                                               
                                System.out
+                                                                               
                                                .format("conv2d, 
image[%d,%d,%d,%d](%.1fMB), filter[%d,%d,%d,%d](%.1f), 
dout[%d,%d,%d,%d](%.1fMB), stride[%d,%d], padding[%d,%d]",
+                                                                               
                                                                N, C, H, W, 
imageSizeInMB, N, C, R, S,
+                                                                               
                                                                filterSizeInMB, 
N, K, P, Q, doutSizeInMB,
+                                                                               
                                                                strideH, 
strideW, padH, padW);
+                                                                               
                                Matrix image = generateInputMatrix(spark, (int) 
N,
+                                                                               
                                                (int) (C * H * W), sparsity, 
seed);
+                                                                               
                                Matrix filter = generateInputMatrix(spark, 
(int) K,
+                                                                               
                                                (int) (C * R * S), sparsity, 
seed);
+                                                                               
                                HashMap<String, Object> inputs = new 
HashMap<>();
+                                                                               
                                inputs.put("N", N);
+                                                                               
                                inputs.put("C", C);
+                                                                               
                                inputs.put("H", H);
+                                                                               
                                inputs.put("W", W);
+                                                                               
                                inputs.put("K", K);
+                                                                               
                                inputs.put("R", R);
+                                                                               
                                inputs.put("S", S);
+                                                                               
                                inputs.put("strideH", strideH);
+                                                                               
                                inputs.put("strideW", strideW);
+                                                                               
                                inputs.put("padH", padH);
+                                                                               
                                inputs.put("padW", padW);
+                                                                               
                                inputs.put("image", image);
+                                                                               
                                inputs.put("filter", filter);
+                                                                               
                                List<Object> outCPU = runOnCPU(spark, 
scriptStr, inputs,
+                                                                               
                                                Arrays.asList("O"));
+                                                                               
                                List<Object> outGPU = runOnGPU(spark, 
scriptStr, inputs,
+                                                                               
                                                Arrays.asList("O"));
+                                                                               
                                assertHeavyHitterPresent("gpu_conv2d");
+                                                                               
                                assertEqualObjects(outCPU.get(0), 
outGPU.get(0));
+                                                                               
                                clearGPUMemory();
+                                                                               
                        }
+                                                                               
                }
+                                                                               
        }
+                                                                               
}
+                                                                       }
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       @Test
+       public void testConv2dBackwardFilter() {
+               String scriptStr = "O = conv2d_backward_filter(image, dout, 
padding=[padH, padW], stride=[strideH, strideW], input_shape=[N,C,H,W], 
filter_shape=[K,C,R,S])";
+
+               for (long N : Nlst) {
+                       for (long C : Clst) {
+                               for (long H : Hlst) {
+                                       for (long W : Wlst) {
+                                               for (long K : Klst) {
+                                                       for (long R : Rlst) {
+                                                               for (long S : 
Slst) {
+                                                                       for 
(long strideH : strideHeightLst) {
+                                                                               
for (long strideW : strideWidthLst) {
+                                                                               
        for (long padH : padHeightLst) {
+                                                                               
                for (long padW : padWidthLst) {
+                                                                               
                        for (double sparsity : sparsitylst) {
+
+                                                                               
                                // filter is smaller than image + padding
+                                                                               
                                if (R > (H + padH) || S > (W + padW))
+                                                                               
                                        continue;
+
+                                                                               
                                // Make sure ops fit in GPU memory and within 
constraints of cudnn
+                                                                               
                                long imageSize = N * C * H * W * 8l;
+                                                                               
                                if (imageSize > MAX_OP_SIZE)  // image size
+                                                                               
                                        continue;
+                                                                               
                                long filterSize = K * C * R * S * 8l;
+                                                                               
                                if (filterSize > MAX_OP_SIZE)  // filter size
+                                                                               
                                        continue;
+
+                                                                               
                                int P = (int) ConvolutionUtils.getP(H, R, 
strideH, padH);
+                                                                               
                                int Q = (int) ConvolutionUtils.getQ(W, S, 
strideW, padW);
+
+                                                                               
                                long doutSize = N * K * P * Q * 8l;
+                                                                               
                                if (doutSize > MAX_OP_SIZE) // dout/output size
+                                                                               
                                        continue;
+
+                                                                               
                                double imageSizeInMB = imageSize / (1024.0 * 
1024.0);
+                                                                               
                                double filterSizeInMB = filterSize / (1024.0 * 
1024.0);
+                                                                               
                                double doutSizeInMB = doutSize / (1024.0 * 
1024.0);
+                                                                               
                                System.out
+                                                                               
                                                
.format("conv2d_backward_filter, image[%d,%d,%d,%d](%.1fMB), 
filter[%d,%d,%d,%d](%.1f), dout[%d,%d,%d,%d](%.1fMB), stride[%d,%d], 
padding[%d,%d]",
+                                                                               
                                                                N, C, H, W, 
imageSizeInMB, N, C, R, S,
+                                                                               
                                                                filterSizeInMB, 
N, K, P, Q, doutSizeInMB,
+                                                                               
                                                                strideH, 
strideW, padH, padW);
+                                                                               
                                Matrix image = generateInputMatrix(spark, (int) 
N,
+                                                                               
                                                (int) (C * H * W), sparsity, 
seed);
+                                                                               
                                Matrix dout = generateInputMatrix(spark, (int) 
N,
+                                                                               
                                                (int) (K * P * Q), sparsity, 
seed);
+                                                                               
                                HashMap<String, Object> inputs = new 
HashMap<>();
+                                                                               
                                inputs.put("N", N);
+                                                                               
                                inputs.put("C", C);
+                                                                               
                                inputs.put("H", H);
+                                                                               
                                inputs.put("W", W);
+                                                                               
                                inputs.put("K", K);
+                                                                               
                                inputs.put("R", R);
+                                                                               
                                inputs.put("S", S);
+                                                                               
                                inputs.put("strideH", strideH);
+                                                                               
                                inputs.put("strideW", strideW);
+                                                                               
                                inputs.put("padH", padH);
+                                                                               
                                inputs.put("padW", padW);
+                                                                               
                                inputs.put("image", image);
+                                                                               
                                inputs.put("dout", dout);
+                                                                               
                                List<Object> outCPU = runOnCPU(spark, 
scriptStr, inputs,
+                                                                               
                                                Arrays.asList("O"));
+                                                                               
                                List<Object> outGPU = runOnGPU(spark, 
scriptStr, inputs,
+                                                                               
                                                Arrays.asList("O"));
+                                                                               
                                
assertHeavyHitterPresent("gpu_conv2d_backward_filter");
+                                                                               
                                assertEqualObjects(outCPU.get(0), 
outGPU.get(0));
+                                                                               
                                clearGPUMemory();
+                                                                               
                        }
+                                                                               
                }
+                                                                               
        }
+                                                                               
}
+                                                                       }
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       @Test
+       public void testConv2dBackwardData() {
+               String scriptStr = "O = conv2d_backward_data(filter, dout, 
padding=[padH, padW], stride=[strideH, strideW], input_shape=[N,C,H,W], 
filter_shape=[K,C,R,S])";
+
+               for (long N : Nlst) {
+                       for (long C : Clst) {
+                               for (long H : Hlst) {
+                                       for (long W : Wlst) {
+                                               for (long K : Klst) {
+                                                       for (long R : Rlst) {
+                                                               for (long S : 
Slst) {
+                                                                       for 
(long strideH : strideHeightLst) {
+                                                                               
for (long strideW : strideWidthLst) {
+                                                                               
        for (long padH : padHeightLst) {
+                                                                               
                for (long padW : padWidthLst) {
+                                                                               
                        for (double sparsity : sparsitylst) {
+
+                                                                               
                                // filter is smaller than image + padding
+                                                                               
                                if (R > (H + padH) || S > (W + padW))
+                                                                               
                                        continue;
+
+                                                                               
                                // Make sure ops fit in GPU memory and within 
constraints of cudnn
+                                                                               
                                long imageSize = N * C * H * W * 8l;
+                                                                               
                                if (imageSize > MAX_OP_SIZE)  // image size
+                                                                               
                                        continue;
+                                                                               
                                long filterSize = K * C * R * S * 8l;
+                                                                               
                                if (filterSize > MAX_OP_SIZE)  // filter size
+                                                                               
                                        continue;
+
+                                                                               
                                int P = (int) ConvolutionUtils.getP(H, R, 
strideH, padH);
+                                                                               
                                int Q = (int) ConvolutionUtils.getQ(W, S, 
strideW, padW);
+
+                                                                               
                                long doutSize = N * K * P * Q * 8l;
+                                                                               
                                if (doutSize > MAX_OP_SIZE) // dout/output size
+                                                                               
                                        continue;
+
+                                                                               
                                double imageSizeInMB = imageSize / (1024.0 * 
1024.0);
+                                                                               
                                double filterSizeInMB = filterSize / (1024.0 * 
1024.0);
+                                                                               
                                double doutSizeInMB = doutSize / (1024.0 * 
1024.0);
+                                                                               
                                System.out
+                                                                               
                                                .format("conv2d_backward_data, 
image[%d,%d,%d,%d](%.1fMB), filter[%d,%d,%d,%d](%.1f), 
dout[%d,%d,%d,%d](%.1fMB), stride[%d,%d], padding[%d,%d]",
+                                                                               
                                                                N, C, H, W, 
imageSizeInMB, N, C, R, S,
+                                                                               
                                                                filterSizeInMB, 
N, K, P, Q, doutSizeInMB,
+                                                                               
                                                                strideH, 
strideW, padH, padW);
+
+                                                                               
                                Matrix filter = generateInputMatrix(spark, 
(int) K,
+                                                                               
                                                (int) (C * R * S), sparsity, 
seed);
+                                                                               
                                Matrix dout = generateInputMatrix(spark, (int) 
N,
+                                                                               
                                                (int) (K * P * Q), sparsity, 
seed);
+                                                                               
                                HashMap<String, Object> inputs = new 
HashMap<>();
+                                                                               
                                inputs.put("N", N);
+                                                                               
                                inputs.put("C", C);
+                                                                               
                                inputs.put("H", H);
+                                                                               
                                inputs.put("W", W);
+                                                                               
                                inputs.put("K", K);
+                                                                               
                                inputs.put("R", R);
+                                                                               
                                inputs.put("S", S);
+                                                                               
                                inputs.put("strideH", strideH);
+                                                                               
                                inputs.put("strideW", strideW);
+                                                                               
                                inputs.put("padH", padH);
+                                                                               
                                inputs.put("padW", padW);
+                                                                               
                                inputs.put("filter", filter);
+                                                                               
                                inputs.put("dout", dout);
+                                                                               
                                List<Object> outCPU = runOnCPU(spark, 
scriptStr, inputs,
+                                                                               
                                                Arrays.asList("O"));
+                                                                               
                                List<Object> outGPU = runOnGPU(spark, 
scriptStr, inputs,
+                                                                               
                                                Arrays.asList("O"));
+                                                                               
                                
assertHeavyHitterPresent("gpu_conv2d_backward_data");
+                                                                               
                                assertEqualObjects(outCPU.get(0), 
outGPU.get(0));
+                                                                               
                                clearGPUMemory();
+                                                                               
                        }
+                                                                               
                }
+                                                                               
        }
+                                                                               
}
+                                                                       }
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       @Test
+       public void testMaxPool() {
+               String scriptStr = "O = max_pool(image, padding=[padH, padW], 
stride=[strideH, strideW], input_shape=[N,C,H,W], pool_size=[R,S])";
+
+               for (long N : Nlst) {
+                       for (long C : Clst) {
+                               for (long H : Hlst) {
+                                       for (long W : Wlst) {
+                                               for (long R : Rlst) {
+                                                       for (long S : Slst) {
+                                                               for (long 
strideH : strideHeightLst) {
+                                                                       for 
(long strideW : strideWidthLst) {
+                                                                               
for (long padH : padHeightLst) {
+                                                                               
        for (long padW : padWidthLst) {
+                                                                               
                for (double sparsity : sparsitylst) {
+
+                                                                               
                        // pool is smaller than image + padding
+                                                                               
                        if (R > (H + padH) || S > (W + padW))
+                                                                               
                                continue;
+
+                                                                               
                        // Make sure ops fit in GPU memory and within 
constraints of cudnn
+                                                                               
                        long imageSize = N * C * H * W * 8l;
+                                                                               
                        if (imageSize > MAX_OP_SIZE)  // image size
+                                                                               
                                continue;
+                                                                               
                        long poolSize = R * S * 8l;
+                                                                               
                        if (poolSize > MAX_OP_SIZE)  // filter size
+                                                                               
                                continue;
+
+                                                                               
                        int P = (int) ConvolutionUtils.getP(H, R, strideH, 
padH);
+                                                                               
                        int Q = (int) ConvolutionUtils.getQ(W, S, strideW, 
padW);
+
+                                                                               
                        long doutSize = N * C * P * Q * 8l;
+                                                                               
                        if (doutSize > MAX_OP_SIZE) // dout/output size
+                                                                               
                                continue;
+
+                                                                               
                        double imageSizeInMB = imageSize / (1024.0 * 1024.0);
+                                                                               
                        double poolSizeInMB = poolSize / (1024.0 * 1024.0);
+                                                                               
                        double doutSizeInMB = doutSize / (1024.0 * 1024.0);
+                                                                               
                        System.out
+                                                                               
                                        .format("max_pool, 
image[%d,%d,%d,%d](%.1fMB), pool[%d,%d](%.1f), dout[%d,%d,%d,%d](%.1fMB), 
stride[%d,%d], padding[%d,%d]",
+                                                                               
                                                        N, C, H, W, 
imageSizeInMB, R, S, poolSizeInMB, N, C,
+                                                                               
                                                        P, Q, doutSizeInMB, 
strideH, strideW, padH, padW);
+
+                                                                               
                        Matrix image = generateInputMatrix(spark, (int) N,
+                                                                               
                                        (int) (C * H * W), sparsity, seed);
+                                                                               
                        HashMap<String, Object> inputs = new HashMap<>();
+                                                                               
                        inputs.put("N", N);
+                                                                               
                        inputs.put("C", C);
+                                                                               
                        inputs.put("H", H);
+                                                                               
                        inputs.put("W", W);
+                                                                               
                        inputs.put("R", R);
+                                                                               
                        inputs.put("S", S);
+                                                                               
                        inputs.put("strideH", strideH);
+                                                                               
                        inputs.put("strideW", strideW);
+                                                                               
                        inputs.put("padH", padH);
+                                                                               
                        inputs.put("padW", padW);
+                                                                               
                        inputs.put("image", image);
+                                                                               
                        List<Object> outCPU = runOnCPU(spark, scriptStr, inputs,
+                                                                               
                                        Arrays.asList("O"));
+                                                                               
                        List<Object> outGPU = runOnGPU(spark, scriptStr, inputs,
+                                                                               
                                        Arrays.asList("O"));
+                                                                               
                        assertHeavyHitterPresent("gpu_maxpooling");
+                                                                               
                        assertEqualObjects(outCPU.get(0), outGPU.get(0));
+                                                                               
                        clearGPUMemory();
+                                                                               
                }
+                                                                               
        }
+                                                                               
}
+                                                                       }
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       @Test
+       public void testMaxPoolBackward() {
+               String scriptStr = "O = max_pool_backward(image, dout, 
padding=[padH, padW], stride=[strideH, strideW], input_shape=[N,C,H,W], 
pool_size=[R,S])";
+
+               for (long N : Nlst) {
+                       for (long C : Clst) {
+                               for (long H : Hlst) {
+                                       for (long W : Wlst) {
+                                               for (long R : Rlst) {
+                                                       for (long S : Slst) {
+                                                               for (long 
strideH : strideHeightLst) {
+                                                                       for 
(long strideW : strideWidthLst) {
+                                                                               
for (long padH : padHeightLst) {
+                                                                               
        for (long padW : padWidthLst) {
+                                                                               
                for (double sparsity : sparsitylst) {
+
+                                                                               
                        // pool is smaller than image + padding
+                                                                               
                        if (R > (H + padH) || S > (W + padW))
+                                                                               
                                continue;
+
+                                                                               
                        // Make sure ops fit in GPU memory and within 
constraints of cudnn
+                                                                               
                        long imageSize = N * C * H * W * 8l;
+                                                                               
                        if (imageSize > MAX_OP_SIZE)  // image size
+                                                                               
                                continue;
+                                                                               
                        long poolSize = R * S * 8l;
+                                                                               
                        if (poolSize > MAX_OP_SIZE)  // filter size
+                                                                               
                                continue;
+
+                                                                               
                        int P = (int) ConvolutionUtils.getP(H, R, strideH, 
padH);
+                                                                               
                        int Q = (int) ConvolutionUtils.getQ(W, S, strideW, 
padW);
+
+                                                                               
                        long doutSize = N * C * P * Q * 8l;
+                                                                               
                        if (doutSize > MAX_OP_SIZE) // dout/output size
+                                                                               
                                continue;
+
+                                                                               
                        double imageSizeInMB = imageSize / (1024.0 * 1024.0);
+                                                                               
                        double poolSizeInMB = poolSize / (1024.0 * 1024.0);
+                                                                               
                        double doutSizeInMB = doutSize / (1024.0 * 1024.0);
+                                                                               
                        System.out
+                                                                               
                                        .format("max_pool_backward, 
image[%d,%d,%d,%d](%.1fMB), pool[%d,%d](%.1f), dout[%d,%d,%d,%d](%.1fMB), 
stride[%d,%d], padding[%d,%d]",
+                                                                               
                                                        N, C, H, W, 
imageSizeInMB, R, S, poolSizeInMB, N, C,
+                                                                               
                                                        P, Q, doutSizeInMB, 
strideH, strideW, padH, padW);
+
+                                                                               
                        Matrix image = generateInputMatrix(spark, (int) N,
+                                                                               
                                        (int) (C * H * W), sparsity, seed);
+                                                                               
                        Matrix dout = generateInputMatrix(spark, (int) N, (int) 
(C * P * Q),
+                                                                               
                                        sparsity, seed);
+                                                                               
                        HashMap<String, Object> inputs = new HashMap<>();
+                                                                               
                        inputs.put("N", N);
+                                                                               
                        inputs.put("C", C);
+                                                                               
                        inputs.put("H", H);
+                                                                               
                        inputs.put("W", W);
+                                                                               
                        inputs.put("R", R);
+                                                                               
                        inputs.put("S", S);
+                                                                               
                        inputs.put("strideH", strideH);
+                                                                               
                        inputs.put("strideW", strideW);
+                                                                               
                        inputs.put("padH", padH);
+                                                                               
                        inputs.put("padW", padW);
+                                                                               
                        inputs.put("image", image);
+                                                                               
                        inputs.put("dout", dout);
+                                                                               
                        List<Object> outCPU = runOnCPU(spark, scriptStr, inputs,
+                                                                               
                                        Arrays.asList("O"));
+                                                                               
                        List<Object> outGPU = runOnGPU(spark, scriptStr, inputs,
+                                                                               
                                        Arrays.asList("O"));
+                                                                               
                        assertHeavyHitterPresent("gpu_maxpooling_backward");
+                                                                               
                        assertEqualObjects(outCPU.get(0), outGPU.get(0));
+                                                                               
                        clearGPUMemory();
+                                                                               
                }
+                                                                               
        }
+                                                                               
}
+                                                                       }
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/test/java/org/apache/sysml/test/gpu/ReorgOpTests.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/gpu/ReorgOpTests.java 
b/src/test/java/org/apache/sysml/test/gpu/ReorgOpTests.java
new file mode 100644
index 0000000..b5b71f8
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/gpu/ReorgOpTests.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.gpu;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.sysml.api.mlcontext.Matrix;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Test;
+
+/**
+ * Tests for GPU transpose
+ */
+public class ReorgOpTests extends GPUTests {
+
+       private final static String TEST_NAME = "ReorgOpTests";
+       private final int[] rowSizes = new int[] { 1, 64, 130, 1024, 2049 };
+       private final int[] columnSizes = new int[] { 1, 64, 130, 1024, 2049 };
+       private final double[] sparsities = new double[] { 0.0, 0.03, 0.3, 0.9 
};
+       private final int seed = 42;
+
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_DIR, TEST_NAME);
+               getAndLoadTestConfiguration(TEST_NAME);
+       }
+
+       @Test
+       public void transposeTest() {
+               String scriptStr = "out = t(in1)";
+
+               for (int i = 0; i < rowSizes.length; i++) {
+                       for (int j = 0; j < columnSizes.length; j++) {
+                               for (int k = 0; k < sparsities.length; k++) {
+                                       int m = rowSizes[i];
+                                       int n = columnSizes[j];
+                                       double sparsity = sparsities[k];
+                                       HashMap<String, Object> inputs = new 
HashMap<>();
+                                       Matrix in1 = generateInputMatrix(spark, 
m, n, sparsity, seed);
+                                       inputs.put("in1", in1);
+                                       List<Object> cpuOuts = runOnCPU(spark, 
scriptStr, inputs, Arrays.asList("out"));
+                                       List<Object> gpuOuts = runOnGPU(spark, 
scriptStr, inputs, Arrays.asList("out"));
+                                       //assertHeavyHitterPresent("gpu_r'");
+                                       assertEqualObjects(cpuOuts.get(0), 
gpuOuts.get(0));
+                               }
+                       }
+               }
+
+       }
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java 
b/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java
new file mode 100644
index 0000000..65e6365
--- /dev/null
+++ 
b/src/test/java/org/apache/sysml/test/gpu/ScalarMatrixElementwiseOpTests.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.gpu;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.sysml.api.mlcontext.Matrix;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Tests scalar-matrix element wise operations on the GPU
+ */
+public class ScalarMatrixElementwiseOpTests extends GPUTests {
+
+       private final static String TEST_NAME = 
"ScalarMatrixElementwiseOpTests";
+
+       private final int[] rowSizes = new int[] { 1, 64, 130, 2049 };
+       private final int[] columnSizes = new int[] { 1, 64, 130, 2049 };
+       private final double[] sparsities = new double[] { 0.0, 0.03, 0.3, 0.9 
};
+       private final int seed = 42;
+
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_DIR, TEST_NAME);
+               getAndLoadTestConfiguration(TEST_NAME);
+       }
+
+       @Test
+       public void testPlusRightScalar() {
+               runScalarMatrixElementWiseTests("O = X + scalar", "X", 
"scalar", "O", new double[] { 0.0, 0.5, 20.0 }, "gpu_+");
+       }
+
+       @Test
+       public void testPlusLeftScalar() {
+               runScalarMatrixElementWiseTests("O = scalar + X", "X", 
"scalar", "O", new double[] { 0.0, 0.5, 20.0 }, "gpu_+");
+       }
+
+       @Test
+       public void testMinusRightScalar() {
+               runScalarMatrixElementWiseTests("O = X - scalar", "X", 
"scalar", "O", new double[] { 0.0, 0.5, 1.0 }, "gpu_-");
+       }
+
+       @Test
+       public void testMinusLeftScalar() {
+               runScalarMatrixElementWiseTests("O = scalar - X", "X", 
"scalar", "O", new double[] { 0.0, 0.5, 1.0 }, "gpu_-");
+       }
+
+       @Test
+       public void testMultRightScalar() {
+               runScalarMatrixElementWiseTests("O = X * scalar", "X", 
"scalar", "O", new double[] { 0.0, 0.5, 2.0 }, "gpu_*");
+       }
+
+       @Test
+       public void testMultLeftScalar() {
+               runScalarMatrixElementWiseTests("O = scalar * X", "X", 
"scalar", "O", new double[] { 0.0, 0.5, 2.0 }, "gpu_*");
+       }
+
+       @Test
+       public void testDivide() {
+               runScalarMatrixElementWiseTests("O = X / scalar", "X", 
"scalar", "O", new double[] { 0.0, 0.5, 5.0 }, "gpu_/");
+       }
+
+       // ****************************************************************
+       // ************************ IGNORED TEST **************************
+       // FIXME : There is a bug in CPU "^" when a A ^ B is executed where A & 
B are all zeroes
+       @Ignore
+       @Test
+       public void testPow() {
+               runScalarMatrixElementWiseTests("O = X ^ scalar", "X", 
"scalar", "O", new double[] { 0.0, 2.0, 10.0 }, "gpu_^");
+       }
+
+       /**
+        * Runs a simple scalar-matrix elementwise op test
+        *
+        * @param scriptStr         the script string
+        * @param inputMatrix       name of the matrix input in the script 
string
+        * @param inputScalar       name of the scalar input in the script 
string
+        * @param output            name of the output variable in the script 
string
+        * @param scalars           array of scalars for which to run this test
+        * @param heavyHitterOpCode the string printed for the unary op heavy 
hitter when executed on gpu
+        */
+       private void runScalarMatrixElementWiseTests(String scriptStr, String 
inputMatrix, String inputScalar,
+                       String output, double[] scalars, String 
heavyHitterOpCode) {
+               for (int i = 0; i < rowSizes.length; i++) {
+                       for (int j = 0; j < columnSizes.length; j++) {
+                               for (int k = 0; k < sparsities.length; k++) {
+                                       for (int l = 0; l < scalars.length; 
l++) {
+                                               int m = rowSizes[i];
+                                               int n = columnSizes[j];
+                                               double sparsity = sparsities[k];
+                                               double scalar = scalars[l];
+                                               System.out.println(
+                                                               "Matrix is of 
size [" + m + ", " + n + "], sparsity = " + sparsity + ", scalar = "
+                                                                               
+ scalar);
+                                               Matrix X = 
generateInputMatrix(spark, m, n, sparsity, seed);
+                                               HashMap<String, Object> inputs 
= new HashMap<>();
+                                               inputs.put(inputMatrix, X);
+                                               inputs.put(inputScalar, scalar);
+                                               List<Object> cpuOut = 
runOnCPU(spark, scriptStr, inputs, Arrays.asList(output));
+                                               List<Object> gpuOut = 
runOnGPU(spark, scriptStr, inputs, Arrays.asList(output));
+                                               
//assertHeavyHitterPresent(heavyHitterOpCode);
+                                               
assertEqualObjects(cpuOut.get(0), gpuOut.get(0));
+                                       }
+                               }
+                       }
+               }
+       }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/test/java/org/apache/sysml/test/gpu/UnaryOpTests.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/gpu/UnaryOpTests.java 
b/src/test/java/org/apache/sysml/test/gpu/UnaryOpTests.java
new file mode 100644
index 0000000..84b1f73
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/gpu/UnaryOpTests.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.gpu;
+
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Test;
+
+/**
+ * Unit tests for Unary ops on GPU
+ */
+public class UnaryOpTests extends UnaryOpTestsBase {
+
+       private final static String TEST_NAME = "UnaryOpTests";
+
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_DIR, TEST_NAME);
+               getAndLoadTestConfiguration(TEST_NAME);
+       }
+
+       @Test
+       public void testSin() throws Exception {
+               testSimpleUnaryOpMatrixOutput("sin", "gpu_sin");
+       }
+
+       @Test
+       public void testCos() throws Exception {
+               testSimpleUnaryOpMatrixOutput("cos", "gpu_cos");
+       }
+
+       @Test
+       public void testTan() throws Exception {
+               testSimpleUnaryOpMatrixOutput("tan", "gpu_tan");
+       }
+
+       @Test
+       public void testAsin() throws Exception {
+               testSimpleUnaryOpMatrixOutput("asin", "gpu_asin");
+       }
+
+       @Test
+       public void testAcos() throws Exception {
+               testSimpleUnaryOpMatrixOutput("acos", "gpu_acos");
+       }
+
+       @Test
+       public void testAtan() throws Exception {
+               testSimpleUnaryOpMatrixOutput("atan", "gpu_atan");
+       }
+
+       @Test
+       public void testExp() throws Exception {
+               testSimpleUnaryOpMatrixOutput("exp", "gpu_exp");
+       }
+
+       @Test
+       public void testLog() throws Exception {
+               testSimpleUnaryOpMatrixOutput("log", "gpu_log");
+       }
+
+       @Test
+       public void testSqrt() throws Exception {
+               testSimpleUnaryOpMatrixOutput("sqrt", "gpu_sqrt");
+       }
+
+       @Test
+       public void testAbs() throws Exception {
+               testSimpleUnaryOpMatrixOutput("abs", "gpu_abs");
+       }
+
+       @Test
+       public void testRound() throws Exception {
+               testSimpleUnaryOpMatrixOutput("round", "gpu_round");
+       }
+
+       @Test
+       public void testFloor() throws Exception {
+               testSimpleUnaryOpMatrixOutput("sqrt", "gpu_floor");
+       }
+
+       @Test
+       public void testCeil() throws Exception {
+               testSimpleUnaryOpMatrixOutput("ceil", "gpu_ceil");
+       }
+
+       @Test
+       public void testSign() throws Exception {
+               testSimpleUnaryOpMatrixOutput("sign", "gpu_sign");
+       }
+
+       @Test
+       public void testSelp() throws Exception {
+               testUnaryOpMatrixOutput("out = max(in1, 0)", "gpu_selp", "in1", 
"out");
+       }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/test/java/org/apache/sysml/test/gpu/UnaryOpTestsBase.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/gpu/UnaryOpTestsBase.java 
b/src/test/java/org/apache/sysml/test/gpu/UnaryOpTestsBase.java
new file mode 100644
index 0000000..0051dd4
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/gpu/UnaryOpTestsBase.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.gpu;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.sysml.api.mlcontext.Matrix;
+
+/**
+ * Abstract class for all Unary Op tests
+ */
+public abstract class UnaryOpTestsBase extends GPUTests {
+
+       // Set of rows and column sizes & sparsities to test unary ops
+       private final int[] rowSizes = new int[] { 2049, 1024, 140, 64, 1 };
+       private final int[] columnSizes = new int[] { 2049, 1024, 140, 64, 1 };
+       private final double[] sparsities = new double[] { 0.9, 0.3, 0.03, 0.0 
};
+       private final int seed = 42;
+
+       /**
+        * Tests unary ops with a variety of matrix shapes and sparsities.
+        * Test is skipped for blocks of size 1x1.
+        *
+        * @param function          name of the dml builtin unary op
+        * @param heavyHitterOpCode the string printed for the unary op heavy 
hitter when executed on gpu
+        */
+       protected void testSimpleUnaryOpMatrixOutput(String function, String 
heavyHitterOpCode) {
+               String scriptStr = "out = " + function + "(in1)";
+               testUnaryOpMatrixOutput(scriptStr, heavyHitterOpCode, "in1", 
"out");
+       }
+
+       /**
+        * Tests slightly more involved unary ops with a variety of matrix 
shapes and sparsities.
+        * Test is skipped for blocks of size 1x1
+        *
+        * @param scriptStr         script string
+        * @param heavyHitterOpCode the string printed for the unary op heavy 
hitter when executed on gpu
+        * @param inStr             name of input variable in provided script 
string
+        * @param outStr            name of output variable in script string
+        */
+       protected void testUnaryOpMatrixOutput(String scriptStr, String 
heavyHitterOpCode, String inStr, String outStr) {
+               int[] rows = rowSizes;
+               int[] columns = columnSizes;
+               double[] sparsities = this.sparsities;
+               int seed = this.seed;
+
+               for (int i = 0; i < rows.length; i++) {
+                       for (int j = 0; j < columns.length; j++) {
+                               for (int k = 0; k < sparsities.length; k++) {
+                                       int row = rows[i];
+                                       int column = columns[j];
+                                       double sparsity = sparsities[k];
+                                       // Skip the case of a scalar unary op
+                                       if (row == 1 && column == 1)
+                                               continue;
+
+                                       testUnaryOpMatrixOutput(scriptStr, 
heavyHitterOpCode, inStr, outStr, seed, row, column, sparsity);
+                               }
+                       }
+               }
+       }
+
+       /**
+        * Tests a single unary op with inputs and outputs of the specified 
size and sparsity
+        *
+        * @param scriptStr         script string
+        * @param heavyHitterOpCode the string printed for the unary op heavy 
hitter when executed on gpu
+        * @param inStr             name of input variable in provided script 
string
+        * @param outStr            name of output variable in script string
+        * @param seed              seed for the random number generator for 
the random input matrix
+        * @param row               number of rows of input matrix
+        * @param column            number of rows of input matrix
+        * @param sparsity          sparsity of the input matrix
+        */
+       public void testUnaryOpMatrixOutput(String scriptStr, String 
heavyHitterOpCode, String inStr, String outStr,
+                       int seed, int row, int column, double sparsity) {
+               System.out.println("Matrix of size [" + row + ", " + column + 
"], sparsity = " + sparsity);
+               Matrix in1 = generateInputMatrix(spark, row, column, sparsity, 
seed);
+               HashMap<String, Object> inputs = new HashMap<>();
+               inputs.put(inStr, in1);
+               List<Object> outCPU = runOnCPU(spark, scriptStr, inputs, 
Arrays.asList(outStr));
+               List<Object> outGPU = runOnGPU(spark, scriptStr, inputs, 
Arrays.asList(outStr));
+               //assertHeavyHitterPresent(heavyHitterOpCode);
+               assertEqualObjects(outCPU.get(0), outGPU.get(0));
+       }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/772fb588/src/test_suites/java/org/apache/sysml/test/integration/gpu/ZPackageSuite.java
----------------------------------------------------------------------
diff --git 
a/src/test_suites/java/org/apache/sysml/test/integration/gpu/ZPackageSuite.java 
b/src/test_suites/java/org/apache/sysml/test/integration/gpu/ZPackageSuite.java
new file mode 100644
index 0000000..d5e3bc0
--- /dev/null
+++ 
b/src/test_suites/java/org/apache/sysml/test/integration/gpu/ZPackageSuite.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.gpu;
+
+import org.apache.sysml.test.gpu.AggregateUnaryOpTests;
+import org.apache.sysml.test.gpu.BinaryOpTests;
+import org.apache.sysml.test.gpu.MatrixMatrixElementWiseOpTests;
+import org.apache.sysml.test.gpu.MatrixMultiplicationOpTest;
+import org.apache.sysml.test.gpu.NeuralNetworkOpTests;
+import org.apache.sysml.test.gpu.ReorgOpTests;
+import org.apache.sysml.test.gpu.ScalarMatrixElementwiseOpTests;
+import org.apache.sysml.test.gpu.UnaryOpTests;
+import org.junit.runner.RunWith;
+import org.junit.runners.Suite;
+import org.junit.runners.Suite.SuiteClasses;
+
+@RunWith(Suite.class) @SuiteClasses({
+       BinaryOpTests.class,
+    ScalarMatrixElementwiseOpTests.class,
+       MatrixMatrixElementWiseOpTests.class,
+       ReorgOpTests.class,
+       AggregateUnaryOpTests.class,
+       UnaryOpTests.class,
+       MatrixMultiplicationOpTest.class,
+    NeuralNetworkOpTests.class,
+})
+public class ZPackageSuite {
+
+}

Reply via email to