[SYSTEMML-2237] Improved spark reshape of ultra-sparse matrices For ultra-sparse matrices, the separate output block index creation per row (begin and end) can dominate performance, if the number of input non-zeros is very small and empty blocks do not need to be output. Hence, this patch adds the handling of these cases by creating the output block indexes only for existing non-zeros.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a0b0e80e Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a0b0e80e Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a0b0e80e Branch: refs/heads/master Commit: a0b0e80e9d1beb28e385052141caed6e19220116 Parents: 4152680 Author: Matthias Boehm <[email protected]> Authored: Fri Apr 6 22:38:15 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Fri Apr 6 22:38:15 2018 -0700 ---------------------------------------------------------------------- .../runtime/matrix/data/LibMatrixReorg.java | 32 +++++++++++++++----- 1 file changed, 25 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/a0b0e80e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java index 42c56c4..8de6f13 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java @@ -26,6 +26,7 @@ import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map.Entry; import java.util.concurrent.Callable; @@ -471,7 +472,7 @@ public class LibMatrixReorg MatrixBlock mbIn = (MatrixBlock) in.getValue(); //prepare result blocks (no reuse in order to guarantee mem constraints) - Collection<MatrixIndexes> rix = computeAllResultBlockIndexes(ixIn, mcIn, mcOut, rowwise); + Collection<MatrixIndexes> rix = computeAllResultBlockIndexes(ixIn, mcIn, mcOut, mbIn, rowwise, outputEmptyBlocks); HashMap<MatrixIndexes, MatrixBlock> rblk = createAllResultBlocks(rix, mbIn.nonZeros, mcIn, mcOut, rowwise, out); //basic algorithm @@ -1450,7 +1451,7 @@ public class LibMatrixReorg /////////////////////////////// private static Collection<MatrixIndexes> computeAllResultBlockIndexes( MatrixIndexes ixin, - MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, boolean rowwise ) + MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, MatrixBlock in, boolean rowwise, boolean outputEmpty ) { HashSet<MatrixIndexes> ret = new HashSet<>(); @@ -1464,12 +1465,16 @@ public class LibMatrixReorg MatrixIndexes first = computeResultBlockIndex(new MatrixIndexes(), row_offset, 0, mcIn, mcOut, rowwise); MatrixIndexes last = computeResultBlockIndex(new MatrixIndexes(), max_row_offset, 0, mcIn, mcOut, rowwise); createRowwiseIndexes(first, last, mcOut.getNumColBlocks(), ret); - } - for( long i=row_offset; i<max_row_offset+1; i++ ) { - MatrixIndexes first = computeResultBlockIndex(new MatrixIndexes(), i, col_offset, mcIn, mcOut, rowwise); - MatrixIndexes last = computeResultBlockIndex(new MatrixIndexes(), i, max_col_offset, mcIn, mcOut, rowwise); - createRowwiseIndexes(first, last, mcOut.getNumColBlocks(), ret); + else if( in.getNonZeros()<in.getNumRows() && !outputEmpty ) { + createNonZeroIndexes(mcIn, mcOut, in, row_offset, col_offset, rowwise, ret); + } + else { + for( long i=row_offset; i<max_row_offset+1; i++ ) { + MatrixIndexes first = computeResultBlockIndex(new MatrixIndexes(), i, col_offset, mcIn, mcOut, rowwise); + MatrixIndexes last = computeResultBlockIndex(new MatrixIndexes(), i, max_col_offset, mcIn, mcOut, rowwise); + createRowwiseIndexes(first, last, mcOut.getNumColBlocks(), ret); + } } } else{ //colwise @@ -1478,6 +1483,9 @@ public class LibMatrixReorg MatrixIndexes last = computeResultBlockIndex(new MatrixIndexes(), 0, max_col_offset, mcIn, mcOut, rowwise); createColwiseIndexes(first, last, mcOut.getNumRowBlocks(), ret); } + else if( in.getNonZeros()<in.getNumColumns() && !outputEmpty ) { + createNonZeroIndexes(mcIn, mcOut, in, row_offset, col_offset, rowwise, ret); + } else { for( long j=col_offset; j<max_col_offset+1; j++ ) { MatrixIndexes first = computeResultBlockIndex(new MatrixIndexes(), row_offset, j, mcIn, mcOut, rowwise); @@ -1534,6 +1542,16 @@ public class LibMatrixReorg ret.add(last); } } + + private static void createNonZeroIndexes(MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, + MatrixBlock in, long row_offset, long col_offset, boolean rowwise, HashSet<MatrixIndexes> ret) { + Iterator<IJV> iter = in.getSparseBlockIterator(); + while( iter.hasNext() ) { + IJV cell = iter.next(); + ret.add(computeResultBlockIndex(new MatrixIndexes(), + row_offset+cell.getI(), col_offset+cell.getJ(), mcIn, mcOut, rowwise)); + } + } @SuppressWarnings("unused") private static HashMap<MatrixIndexes, MatrixBlock> createAllResultBlocks( Collection<MatrixIndexes> rix,
