Repository: systemml Updated Branches: refs/heads/master 881caa9ba -> abbffc55e
http://git-wip-us.apache.org/repos/asf/systemml/blob/abbffc55/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java index 21a2a35..d962027 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java @@ -23,13 +23,6 @@ import static jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_TRANSPOSE; import static jcuda.runtime.JCuda.cudaMemcpy; import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice; import jcuda.Pointer; -import jcuda.Sizeof; -import jcuda.jcublas.JCublas2; -import jcuda.jcublas.cublasHandle; -import jcuda.jcublas.cublasOperation; -import jcuda.jcusparse.JCusparse; -import jcuda.jcusparse.cusparseHandle; -import jcuda.runtime.JCuda; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -43,6 +36,11 @@ import org.apache.sysml.runtime.instructions.gpu.context.GPUContext; import org.apache.sysml.utils.GPUStatistics; import org.apache.sysml.utils.Statistics; +import jcuda.jcusparse.cusparseHandle; +import jcuda.jcublas.cublasHandle; +import jcuda.jcublas.cublasOperation; +import jcuda.runtime.JCuda; + public class LibMatrixCuMatMult extends LibMatrixCUDA { private static final Log LOG = LogFactory.getLog(LibMatrixCuMatMult.class.getName()); @@ -175,7 +173,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA { // Step 3: Invoke the kernel long t1 = GPUStatistics.DISPLAY_STATISTICS ? System.nanoTime() : 0; - JCusparse.cusparseDcsrgemm(getCusparseHandle(gCtx), transa, transb, params.m, params.n, params.k, A.descr, + cudaSupportFunctions.cusparsecsrgemm(getCusparseHandle(gCtx), transa, transb, params.m, params.n, params.k, A.descr, (int) A.nnz, A.val, A.rowPtr, A.colInd, B.descr, (int) B.nnz, B.val, B.rowPtr, B.colInd, C.descr, C.val, C.rowPtr, C.colInd); if (GPUStatistics.DISPLAY_STATISTICS) @@ -239,7 +237,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA { * allocated in dense row-major format and A is sparse. * * Other than input and output, this method requires additional memory = - * outRLen * outCLen * Sizeof.DOUBLE + * outRLen * outCLen * sizeOfDataType * * @param gCtx * a valid {@link GPUContext} @@ -276,7 +274,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA { // t(C) = t(B) %*% t(A) Pointer output = null; if (outRLen != 1 && outCLen != 1) { - output = gCtx.allocate(outRLen * outCLen * Sizeof.DOUBLE); + output = gCtx.allocate(outRLen * outCLen * sizeOfDataType); } else { // no transpose required for vector output output = C; @@ -287,7 +285,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA { if (outRLen != 1 && outCLen != 1) { // Transpose: C = t(output) long t0 = GPUStatistics.DISPLAY_STATISTICS ? System.nanoTime() : 0; - JCublas2.cublasDgeam(gCtx.getCublasHandle(), cublasOperation.CUBLAS_OP_T, cublasOperation.CUBLAS_OP_T, + cudaSupportFunctions.cublasgeam(gCtx.getCublasHandle(), cublasOperation.CUBLAS_OP_T, cublasOperation.CUBLAS_OP_T, toInt(outCLen), toInt(outRLen), one(), output, toInt(outRLen), zero(), new Pointer(), toInt(outRLen), C, toInt(outCLen)); if (!DMLScript.EAGER_CUDA_FREE) @@ -331,7 +329,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA { int m = toInt(param.rightNumRows); int n = toInt(param.rightNumCols); int transa = reverseCusparseOp(cusparseOp(param.isLeftTransposed)); - JCusparse.cusparseDcsrmv(handle, transa, m, n, toInt(B.nnz), one(), B.descr, B.val, B.rowPtr, B.colInd, A, + cudaSupportFunctions.cusparsecsrmv(handle, transa, m, n, toInt(B.nnz), one(), B.descr, B.val, B.rowPtr, B.colInd, A, zero(), C); kernel = GPUInstruction.MISC_TIMER_SPARSE_MATRIX_DENSE_VECTOR_LIB; } else { @@ -342,7 +340,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA { int transa = reverseCusparseOp(cusparseOp(param.isLeftTransposed)); int transb = cusparseOp(param.isRightTransposed); LOG.debug(" GPU Sparse-Dense Matrix Multiply (rhs transpose) "); - JCusparse.cusparseDcsrmm2(handle, transa, transb, m, param.n, k, toInt(B.nnz), one(), B.descr, B.val, + cudaSupportFunctions.cusparsecsrmm2(handle, transa, transb, m, param.n, k, toInt(B.nnz), one(), B.descr, B.val, B.rowPtr, B.colInd, A, param.ldb, zero(), C, param.ldc); } if (GPUStatistics.DISPLAY_STATISTICS) @@ -383,7 +381,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA { // Vector product LOG.debug(" GPU Dense-dense Vector Product"); double[] result = { 0 }; - JCublas2.cublasDdot(handle, param.k, A, 1, B, 1, Pointer.to(result)); + cudaSupportFunctions.cublasdot(handle, param.k, A, 1, B, 1, Pointer.to(result)); // By default in CuBlas V2, cublas pointer mode is set to // CUBLAS_POINTER_MODE_HOST. // This means that scalar values passed are on host (as opposed to @@ -391,7 +389,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA { // The result is copied from the host back to the device so that the // rest of // infrastructure can treat it uniformly. - cudaMemcpy(C, Pointer.to(result), 1 * Sizeof.DOUBLE, cudaMemcpyHostToDevice); + cudaMemcpy(C, Pointer.to(result), 1 * sizeOfDataType, cudaMemcpyHostToDevice); kernel = GPUInstruction.MISC_TIMER_DENSE_DOT_LIB; } else if (param.m == 1) { // Vector-matrix multiply @@ -399,18 +397,18 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA { transb = reverseCublasOp(transb); int rightNumRows = (transb == CUSPARSE_OPERATION_TRANSPOSE) ? param.k : param.n; int rightNumCols = (transb == CUSPARSE_OPERATION_TRANSPOSE) ? param.n : param.k; - JCublas2.cublasDgemv(handle, transb, rightNumRows, rightNumCols, one(), B, param.ldb, A, 1, zero(), C, 1); + cudaSupportFunctions.cublasgemv(handle, transb, rightNumRows, rightNumCols, one(), B, param.ldb, A, 1, zero(), C, 1); kernel = GPUInstruction.MISC_TIMER_DENSE_VECTOR_DENSE_MATRIX_LIB; } else if (param.n == 1) { // Matrix-vector multiply LOG.debug(" GPU Dense Matrix-Vector Multiply"); int leftNumRows = (transa == CUSPARSE_OPERATION_NON_TRANSPOSE) ? param.m : param.k; int leftNumCols = (transa == CUSPARSE_OPERATION_NON_TRANSPOSE) ? param.k : param.m; - JCublas2.cublasDgemv(handle, transa, leftNumRows, leftNumCols, one(), A, param.lda, B, 1, zero(), C, 1); + cudaSupportFunctions.cublasgemv(handle, transa, leftNumRows, leftNumCols, one(), A, param.lda, B, 1, zero(), C, 1); kernel = GPUInstruction.MISC_TIMER_DENSE_MATRIX_DENSE_VECTOR_LIB; } else { LOG.debug(" GPU Dense-Dense Matrix Multiply "); - JCublas2.cublasDgemm(handle, transa, transb, param.m, param.n, param.k, one(), A, param.lda, B, param.ldb, + cudaSupportFunctions.cublasgemm(handle, transa, transb, param.m, param.n, param.k, one(), A, param.lda, B, param.ldb, zero(), C, param.ldc); kernel = GPUInstruction.MISC_TIMER_DENSE_MATRIX_DENSE_MATRIX_LIB; } http://git-wip-us.apache.org/repos/asf/systemml/blob/abbffc55/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index 8ee6f8d..c023890 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -3852,8 +3852,9 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab * @param ru row upper * @param cl column lower * @param cu column upper - * @param ret ? - * @return matrix block + * @param deep should perform deep copy + * @param ret output matrix block + * @return matrix block output matrix block * @throws DMLRuntimeException if DMLRuntimeException occurs */ public MatrixBlock sliceOperations(int rl, int ru, int cl, int cu, boolean deep, CacheBlock ret) http://git-wip-us.apache.org/repos/asf/systemml/blob/abbffc55/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java new file mode 100644 index 0000000..128bb39 --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java @@ -0,0 +1,208 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sysml.runtime.matrix.data; + +import static jcuda.runtime.JCuda.cudaMemcpy; +import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost; +import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.instructions.gpu.GPUInstruction; +import org.apache.sysml.runtime.instructions.gpu.context.GPUContext; +import org.apache.sysml.utils.GPUStatistics; + +import jcuda.Pointer; +import jcuda.Sizeof; +import jcuda.jcublas.JCublas2; +import jcuda.jcublas.cublasHandle; +import jcuda.jcusolver.JCusolverDn; +import jcuda.jcusolver.cusolverDnHandle; +import jcuda.jcusparse.JCusparse; +import jcuda.jcusparse.cusparseHandle; +import jcuda.jcusparse.cusparseMatDescr; + +public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions { + + private static final Log LOG = LogFactory.getLog(SinglePrecisionCudaSupportFunctions.class.getName()); + + @Override + public int cusparsecsrgemm(cusparseHandle handle, int transA, int transB, int m, int n, int k, + cusparseMatDescr descrA, int nnzA, Pointer csrValA, Pointer csrRowPtrA, Pointer csrColIndA, + cusparseMatDescr descrB, int nnzB, Pointer csrValB, Pointer csrRowPtrB, Pointer csrColIndB, + cusparseMatDescr descrC, Pointer csrValC, Pointer csrRowPtrC, Pointer csrColIndC) { + return JCusparse.cusparseScsrgemm(handle, transA, transB, m, n, k, + descrA, nnzA, csrValA, csrRowPtrA, csrColIndA, + descrB, nnzB, csrValB, csrRowPtrB, csrColIndB, + descrC, csrValC, csrRowPtrC, csrColIndC); + } + + @Override + public int cublasgeam(cublasHandle handle, int transa, int transb, int m, int n, Pointer alpha, Pointer A, int lda, + Pointer beta, Pointer B, int ldb, Pointer C, int ldc) { + return JCublas2.cublasSgeam(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc); + } + + @Override + public int cusparsecsrmv(cusparseHandle handle, int transA, int m, int n, int nnz, Pointer alpha, + cusparseMatDescr descrA, Pointer csrValA, Pointer csrRowPtrA, Pointer csrColIndA, Pointer x, Pointer beta, + Pointer y) { + return JCusparse.cusparseScsrmv(handle, transA, m, n, nnz, alpha, + descrA, csrValA, csrRowPtrA, csrColIndA, x, beta, y); + } + + @Override + public int cusparsecsrmm2(cusparseHandle handle, int transa, int transb, int m, int n, int k, int nnz, jcuda.Pointer alpha, cusparseMatDescr descrA, + jcuda.Pointer csrValA, jcuda.Pointer csrRowPtrA, jcuda.Pointer csrColIndA, + jcuda.Pointer B, int ldb, jcuda.Pointer beta, jcuda.Pointer C, int ldc) { + return JCusparse.cusparseScsrmm2(handle, transa, transb, m, n, k, nnz, alpha, descrA, csrValA, + csrRowPtrA, csrColIndA, B, ldb, beta, C, ldc); + } + + @Override + public int cublasdot(cublasHandle handle, int n, Pointer x, int incx, Pointer y, int incy, Pointer result) { + return JCublas2.cublasSdot(handle, n, x, incx, y, incy, result); + } + + @Override + public int cublasgemv(cublasHandle handle, int trans, int m, int n, Pointer alpha, Pointer A, int lda, Pointer x, + int incx, Pointer beta, Pointer y, int incy) { + return JCublas2.cublasSgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); + } + + @Override + public int cublasgemm(cublasHandle handle, int transa, int transb, int m, int n, int k, Pointer alpha, Pointer A, + int lda, Pointer B, int ldb, Pointer beta, Pointer C, int ldc) { + return JCublas2.cublasSgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); + } + + @Override + public int cusparsecsr2csc(cusparseHandle handle, int m, int n, int nnz, Pointer csrVal, Pointer csrRowPtr, + Pointer csrColInd, Pointer cscVal, Pointer cscRowInd, Pointer cscColPtr, int copyValues, int idxBase) { + return JCusparse.cusparseScsr2csc(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, cscRowInd, cscColPtr, copyValues, idxBase); + } + + @Override + public int cublassyrk(cublasHandle handle, int uplo, int trans, int n, int k, Pointer alpha, Pointer A, int lda, + Pointer beta, Pointer C, int ldc) { + return JCublas2.cublasSsyrk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); + } + + @Override + public int cublasaxpy(cublasHandle handle, int n, Pointer alpha, Pointer x, int incx, Pointer y, int incy) { + return JCublas2.cublasSaxpy(handle, n, alpha, x, incx, y, incy); + } + + @Override + public int cublastrsm(cublasHandle handle, int side, int uplo, int trans, int diag, int m, int n, Pointer alpha, + Pointer A, int lda, Pointer B, int ldb) { + return JCublas2.cublasStrsm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); + } + + @Override + public int cusolverDngeqrf_bufferSize(cusolverDnHandle handle, int m, int n, Pointer A, int lda, int[] Lwork) { + return JCusolverDn.cusolverDnSgeqrf_bufferSize(handle, m, n, A, lda, Lwork); + } + + @Override + public int cusolverDngeqrf(cusolverDnHandle handle, int m, int n, Pointer A, int lda, Pointer TAU, + Pointer Workspace, int Lwork, Pointer devInfo) { + return JCusolverDn.cusolverDnSgeqrf(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo); + } + + @Override + public int cusolverDnormqr(cusolverDnHandle handle, int side, int trans, int m, int n, int k, Pointer A, int lda, + Pointer tau, Pointer C, int ldc, Pointer work, int lwork, Pointer devInfo) { + return JCusolverDn.cusolverDnSormqr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, devInfo); + } + + @Override + public int cusparsecsrgeam(cusparseHandle handle, int m, int n, Pointer alpha, cusparseMatDescr descrA, int nnzA, + Pointer csrValA, Pointer csrRowPtrA, Pointer csrColIndA, Pointer beta, cusparseMatDescr descrB, int nnzB, + Pointer csrValB, Pointer csrRowPtrB, Pointer csrColIndB, cusparseMatDescr descrC, Pointer csrValC, + Pointer csrRowPtrC, Pointer csrColIndC) { + return JCusparse.cusparseScsrgeam(handle, m, n, alpha, descrA, nnzA, + csrValA, csrRowPtrA, csrColIndA, beta, descrB, nnzB, + csrValB, csrRowPtrB, csrColIndB, descrC, csrValC, csrRowPtrC, csrColIndC); + } + + @Override + public int cusparsecsr2dense(cusparseHandle handle, int m, int n, cusparseMatDescr descrA, Pointer csrValA, + Pointer csrRowPtrA, Pointer csrColIndA, Pointer A, int lda) { + return JCusparse.cusparseScsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda); + } + + @Override + public int cusparsedense2csr(cusparseHandle handle, int m, int n, cusparseMatDescr descrA, Pointer A, int lda, + Pointer nnzPerRow, Pointer csrValA, Pointer csrRowPtrA, Pointer csrColIndA) { + return JCusparse.cusparseSdense2csr(handle, m, n, descrA, A, lda, nnzPerRow, csrValA, csrRowPtrA, csrColIndA); + } + + @Override + public int cusparsennz(cusparseHandle handle, int dirA, int m, int n, cusparseMatDescr descrA, Pointer A, int lda, + Pointer nnzPerRowCol, Pointer nnzTotalDevHostPtr) { + return JCusparse.cusparseSnnz(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, nnzTotalDevHostPtr); + } + + @Override + public void deviceToHost(GPUContext gCtx, Pointer src, double[] dest, String instName) throws DMLRuntimeException { + long t1 = GPUStatistics.DISPLAY_STATISTICS && instName != null? System.nanoTime() : 0; + LOG.debug("Potential OOM: Allocated additional space in deviceToHost"); + if(PERFORM_CONVERSION_ON_DEVICE) { + Pointer deviceDoubleData = gCtx.allocate(((long)dest.length)*Sizeof.DOUBLE); + LibMatrixCUDA.float2double(gCtx, src, deviceDoubleData, dest.length); + cudaMemcpy(Pointer.to(dest), deviceDoubleData, ((long)dest.length)*Sizeof.DOUBLE, cudaMemcpyDeviceToHost); + gCtx.cudaFreeHelper(deviceDoubleData); + } + else { + // TODO: Perform conversion on GPU using double2float and float2double kernels + float [] floatData = new float[dest.length]; + cudaMemcpy(Pointer.to(floatData), src, ((long)dest.length)*Sizeof.FLOAT, cudaMemcpyDeviceToHost); + for(int i = 0; i < dest.length; i++) { + dest[i] = floatData[i]; + } + } + if(GPUStatistics.DISPLAY_STATISTICS && instName != null) + GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_HOST, System.nanoTime() - t1); + } + + @Override + public void hostToDevice(GPUContext gCtx, double[] src, Pointer dest, String instName) throws DMLRuntimeException { + LOG.debug("Potential OOM: Allocated additional space in hostToDevice"); + // TODO: Perform conversion on GPU using double2float and float2double kernels + long t1 = GPUStatistics.DISPLAY_STATISTICS && instName != null? System.nanoTime() : 0; + if(PERFORM_CONVERSION_ON_DEVICE) { + Pointer deviceDoubleData = gCtx.allocate(((long)src.length)*Sizeof.DOUBLE); + cudaMemcpy(deviceDoubleData, Pointer.to(src), ((long)src.length)*Sizeof.DOUBLE, cudaMemcpyHostToDevice); + LibMatrixCUDA.double2float(gCtx, deviceDoubleData, dest, src.length); + gCtx.cudaFreeHelper(deviceDoubleData); + } + else { + float [] floatData = new float[src.length]; + for(int i = 0; i < src.length; i++) { + floatData[i] = (float) src[i]; + } + cudaMemcpy(dest, Pointer.to(floatData), ((long)src.length)*Sizeof.FLOAT, cudaMemcpyHostToDevice); + } + + if(GPUStatistics.DISPLAY_STATISTICS && instName != null) + GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_HOST_TO_DEVICE, System.nanoTime() - t1); + } +} http://git-wip-us.apache.org/repos/asf/systemml/blob/abbffc55/src/test/java/org/apache/sysml/test/gpu/GPUTests.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/gpu/GPUTests.java b/src/test/java/org/apache/sysml/test/gpu/GPUTests.java index b4e4b62..d7d1ad5 100644 --- a/src/test/java/org/apache/sysml/test/gpu/GPUTests.java +++ b/src/test/java/org/apache/sysml/test/gpu/GPUTests.java @@ -51,9 +51,14 @@ public abstract class GPUTests extends AutomatedTestBase { protected final static String TEST_DIR = "org/apache/sysml/api/mlcontext"; protected static SparkSession spark; - protected final double THRESHOLD = 1e-9; // for relative error + protected final double DOUBLE_PRECISION_THRESHOLD = 1e-9; // for relative error private static final boolean PRINT_MAT_ERROR = false; + // We will use this flag until lower precision is supported on CP. + private final static String DATA_TYPE = "double"; + protected final double SINGLE_PRECISION_THRESHOLD = 1e-3; // for relative error + + @BeforeClass public static void beforeClass() { spark = createSystemMLSparkSession("GPUTests", "local"); @@ -70,7 +75,9 @@ public abstract class GPUTests extends AutomatedTestBase { * @return a valid threshold */ protected double getTHRESHOLD() { - return THRESHOLD; + if(DATA_TYPE.equals("double")) return DOUBLE_PRECISION_THRESHOLD; + else if(DATA_TYPE.equals("float")) return SINGLE_PRECISION_THRESHOLD; + else throw new RuntimeException("Unsupported datatype:" + DATA_TYPE); } @After @@ -228,7 +235,7 @@ public abstract class GPUTests extends AutomatedTestBase { } /** - * Asserts that the values in two matrices are in {@link UnaryOpTests#THRESHOLD} of each other + * Asserts that the values in two matrices are in {@link UnaryOpTests#DOUBLE_PRECISION_THRESHOLD} of each other * * @param expected expected matrix * @param actual actual matrix @@ -251,11 +258,15 @@ public abstract class GPUTests extends AutomatedTestBase { double actualDouble = actualMB.quickGetValue(i, j); if (expectedDouble != 0.0 && !Double.isNaN(expectedDouble) && Double.isFinite(expectedDouble)) { double relativeError = Math.abs((expectedDouble - actualDouble) / expectedDouble); + double absoluteError = Math.abs(expectedDouble - actualDouble); Formatter format = new Formatter(); format.format( "Relative error(%f) is more than threshold (%f). Expected = %f, Actual = %f, differed at [%d, %d]", relativeError, getTHRESHOLD(), expectedDouble, actualDouble, i, j); - Assert.assertTrue(format.toString(), relativeError < getTHRESHOLD()); + if(DATA_TYPE.equals("double")) + Assert.assertTrue(format.toString(), relativeError < getTHRESHOLD()); + else + Assert.assertTrue(format.toString(), relativeError < getTHRESHOLD() || absoluteError < getTHRESHOLD()); format.close(); } else { Assert.assertEquals(expectedDouble, actualDouble, getTHRESHOLD()); @@ -313,6 +324,7 @@ public abstract class GPUTests extends AutomatedTestBase { protected List<Object> runOnGPU(SparkSession spark, String scriptStr, Map<String, Object> inputs, List<String> outStrs) { MLContext gpuMLC = new MLContext(spark); + gpuMLC.setConfigProperty("sysml.gpu.dataType", DATA_TYPE); gpuMLC.setGPU(true); gpuMLC.setForceGPU(true); gpuMLC.setStatistics(true); http://git-wip-us.apache.org/repos/asf/systemml/blob/abbffc55/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java b/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java index d983716..cbc3563 100644 --- a/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java +++ b/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java @@ -50,9 +50,9 @@ public class MatrixMultiplicationOpTest extends GPUTests { public void matrixMatrixTest1() { String scriptStr = "O = X %*% Y"; - int[] X1 = { 1, 128, 1024 }; - int[] X2 = { 1, 128, 1024 }; - int[] Y2 = { 1, 128, 1024 }; + int[] X1 = { 1, 121 }; + int[] X2 = { 1, 123 }; + int[] Y2 = { 1, 122 }; double[] SX = { 0.0, 0.03, 0.3, 0.9 }; double[] SY = { 0.0, 0.03, 0.3, 0.9 }; @@ -74,8 +74,8 @@ public class MatrixMultiplicationOpTest extends GPUTests { public void commonCaseMLMatrixMatrixTest1() { String scriptStr = "O = X %*% Y"; - int[] X1 = { 1000000 }; - int[] X2 = { 1000 }; + int[] X1 = { 5000 }; + int[] X2 = { 50 }; int[] Y2 = { 1, 20 }; double[] SX = { 0.0, 0.03, 0.3 }; double[] SY = { 0.0, 0.03, 0.3, 0.9 }; @@ -98,9 +98,9 @@ public class MatrixMultiplicationOpTest extends GPUTests { public void commonCaseDLMatrixMatrixTest1() { String scriptStr = "O = X %*% Y"; - int[] X1 = { 100 }; - int[] X2 = { 600, 900 }; - int[] Y2 = { 205800 }; + int[] X1 = { 32 }; + int[] X2 = { 60, 90 }; + int[] Y2 = { 2058 }; double[] SX = { 0.0, 0.03, 0.3 }; double[] SY = { 0.0, 0.03, 0.3, 0.9 }; @@ -122,9 +122,9 @@ public class MatrixMultiplicationOpTest extends GPUTests { public void commonCaseDLMatrixMatrixTest2() { String scriptStr = "O = X %*% Y"; - int[] X1 = { 64 }; - int[] X2 = { 196608 }; - int[] Y2 = { 512 }; + int[] X1 = { 32 }; + int[] X2 = { 1966 }; + int[] Y2 = { 256 }; double[] SX = { 0.0, 0.03, 0.3, 0.9 }; double[] SY = { 0.0, 0.03, 0.3, 0.9 };
