[SYSTEMML-1793] Support matrix range indexing on GPU - This commit supports matrix range indexing (i.e. right indexing) without requiring sparse to dense conversion of inputs. Note: this PR only supports dense output. - Also, added RightIndexingTests in gpu package.
Closes #637. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/628ffad1 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/628ffad1 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/628ffad1 Branch: refs/heads/master Commit: 628ffad1b26a056edd3782787f89b8bf7711f0e5 Parents: 8fb74b1 Author: Niketan Pansare <[email protected]> Authored: Thu Aug 24 14:38:52 2017 -0700 Committer: Niketan Pansare <[email protected]> Committed: Thu Aug 24 14:40:34 2017 -0700 ---------------------------------------------------------------------- src/main/cpp/kernels/SystemML.cu | 30 + src/main/cpp/kernels/SystemML.ptx | 2817 +++++++++--------- .../java/org/apache/sysml/hops/IndexingOp.java | 11 +- .../instructions/GPUInstructionParser.java | 8 + .../instructions/gpu/GPUInstruction.java | 6 +- .../gpu/MatrixIndexingGPUInstruction.java | 148 + .../instructions/gpu/context/CSRPointer.java | 20 +- .../instructions/gpu/context/GPUContext.java | 66 +- .../instructions/gpu/context/GPUObject.java | 48 +- .../runtime/matrix/data/LibMatrixCUDA.java | 930 +++--- .../org/apache/sysml/utils/GPUStatistics.java | 366 ++- .../sysml/test/gpu/RightIndexingTests.java | 74 + 12 files changed, 2472 insertions(+), 2052 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/628ffad1/src/main/cpp/kernels/SystemML.cu ---------------------------------------------------------------------- diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu index dcd64b2..d64d8aa 100644 --- a/src/main/cpp/kernels/SystemML.cu +++ b/src/main/cpp/kernels/SystemML.cu @@ -28,6 +28,36 @@ nvcc -ptx -arch=sm_30 SystemML.cu /** + * Performs a slice operation where the input matrix is sparse and the output matrix is dense. + * This function avoids unnecessary sparse to dense conversion of the input matrix. + * + * @params inVal input val pointer + * @params inRowPtr input row pointer + * @params colInd input col index pointer + * @params ret dense output pointer + * @param rl row lower + * @param ru row upper + * @param cl column lower + * @param cu column upper + */ +extern "C" +__global__ void slice_sparse_dense(double* inVal, int* inRowPtr, int* colInd, double* ret, int rl, int ru, int cl, int cu) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int rowIndex = index + rl; + if (rowIndex <= ru){ + int retClen = cu - cl + 1; + // Iterate over elements of the row 'rowIndex'. + for(int i = inRowPtr[rowIndex]; i < inRowPtr[rowIndex+1]; i++) { + // Only slice if the index falls into the given range + if(cl <= colInd[i] && colInd[i] <= cu) { + ret[ index*retClen + (colInd[i] - cl) ] = inVal[i]; + } + } + } +} + + +/** * Does a copy of upper to lower triangle of the given matrix * @param ret the input and output array allocated on the GPU * @param dim the number of rows of the square matrix ret
