Repository: systemml Updated Branches: refs/heads/master abbffc55e -> d3917effd
[SYSTEMML-446] Bugfix for GPU sparse right indexing with empty output Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d3917eff Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d3917eff Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d3917eff Branch: refs/heads/master Commit: d3917effd988de0e0977a310c73c4f232214632e Parents: abbffc5 Author: Niketan Pansare <[email protected]> Authored: Wed Oct 25 19:57:28 2017 -0700 Committer: Niketan Pansare <[email protected]> Committed: Wed Oct 25 19:57:28 2017 -0700 ---------------------------------------------------------------------- .../gpu/context/ExecutionConfig.java | 29 ++------------------ .../runtime/matrix/data/LibMatrixCUDA.java | 8 ++++-- 2 files changed, 7 insertions(+), 30 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/d3917eff/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/ExecutionConfig.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/ExecutionConfig.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/ExecutionConfig.java index 7f8eb9e..cae0660 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/ExecutionConfig.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/ExecutionConfig.java @@ -69,6 +69,8 @@ public class ExecutionConfig { * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static ExecutionConfig getConfigForSimpleVectorOperations(int numCells) throws DMLRuntimeException { + if(numCells == 0) + throw new DMLRuntimeException("Attempting to invoke a kernel with 0 threads"); int deviceNumber = 0; int blockDimX = getMaxBlockDim(deviceNumber); int gridDimX = (int) Math.ceil((double) numCells / blockDimX); @@ -76,32 +78,6 @@ public class ExecutionConfig { } /** - * Use this for simple matrix operations and use following in the kernel - * <code> - * int ix = blockIdx.x * blockDim.x + threadIdx.x; - * int iy = blockIdx.y * blockDim.y + threadIdx.y; - * </code> - * <p> - * This tries to schedule as minimum grids as possible. - * - * @param rlen number of rows - * @param clen number of columns - * @return execution configuration - * @throws DMLRuntimeException if DMLRuntimeException occurs - */ - public static ExecutionConfig getConfigForMatrixOperations(int rlen, int clen) throws DMLRuntimeException { - int deviceNumber = 0; - int maxBlockDim = getMaxBlockDim(deviceNumber); - int blockDimX = (int) Math.min(maxBlockDim, rlen); - int gridDimX = (int) Math.ceil((double) rlen / blockDimX); - int blockDimY = (int) Math.min(Math.floor(((double) maxBlockDim) / blockDimX), clen); - int gridDimY = (int) Math.ceil((double) clen / blockDimY); - if (gridDimY > 65535) - throw new DMLRuntimeException("Internal Error: gridDimY must be less than 65535 for all supported CUDA compute capabilites!"); - return new ExecutionConfig(gridDimX, gridDimY, blockDimX, blockDimY); - } - - /** * Use this for simple vector operations and use following in the kernel * <code> * int index = blockIdx.x * blockDim.x + threadIdx.x @@ -116,7 +92,6 @@ public class ExecutionConfig { return getConfigForSimpleVectorOperations(rlen * clen); } - public ExecutionConfig(int gridDimX, int blockDimX) { this.gridDimX = gridDimX; this.blockDimX = blockDimX; http://git-wip-us.apache.org/repos/asf/systemml/blob/d3917eff/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java index eb17e69..2cccde0 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java @@ -1821,17 +1821,19 @@ public class LibMatrixCUDA { */ protected static void sliceSparseDense(GPUContext gCtx, String instName, CSRPointer inPointer, Pointer outPointer, int rl, int ru, int cl, int cu, int inClen) throws DMLRuntimeException { + int size = getNnz(inPointer, rl, ru); + // Return since nnz of the output is 0 as outPointer is expected to be zeroed out. + if(size == 0) return; + int retRlen = ru - rl + 1; long t0 = GPUStatistics.DISPLAY_STATISTICS ? System.nanoTime() : 0; int retClen = cu - cl + 1; - int size = -1; String kernel = null; String timer = null; - + String kernel = null; String timer = null; // Note: row-wise parallelization scheme iterates over input rows in single thread // whereas nnz parallelization scheme iterates over number of output rows in single thread. if(inClen > 10 && retClen > 2*retRlen) { // Perform nnz parallelization for wide and short matrices - size = getNnz(inPointer, rl, ru); timer = GPUInstruction.MISC_TIMER_RIX_SPARSE_DENSE_OP_NNZ; kernel = "slice_sparse_dense_nnz"; }
