Repository: systemml Updated Branches: refs/heads/master aa537dad4 -> b7fd340fe
[SYSTEMML-2136] Fix int overflow in spark/mr text to binary conversion This patch fixes an issue of integer overflows when converting matrices with a large number of rows or columns (>2 billion) from textcell to binary block representation. The issue was a misplaced cast in the computation of block sizes. Furthermore, this also includes a minor javadoc fix of the xor codegen support (which currently breaks the distribution build). Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/b7fd340f Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/b7fd340f Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/b7fd340f Branch: refs/heads/master Commit: b7fd340fe40b1c73f915beb9a770e78009d44db6 Parents: aa537da Author: Matthias Boehm <[email protected]> Authored: Wed Feb 7 18:38:01 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Wed Feb 7 18:38:01 2018 -0800 ---------------------------------------------------------------------- .../sysml/runtime/codegen/LibSpoofPrimitives.java | 3 +-- .../apache/sysml/runtime/matrix/data/MatrixBlock.java | 4 ++++ .../sysml/runtime/matrix/mapred/ReblockBuffer.java | 14 ++++++++------ 3 files changed, 13 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/b7fd340f/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java index 8418377..8d76e14 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java @@ -566,12 +566,11 @@ public class LibSpoofPrimitives * Computes c = xor(A,B) * * @param a dense input vector A - * @param ai start position in A * @param bval scalar value * @param c resultant vector + * @param ai start position in A * @param ci index of c * @param len number of processed elements - * @return resultant value */ public static void vectXorAdd(double[] a, double bval, double[] c, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++, ci++) http://git-wip-us.apache.org/repos/asf/systemml/blob/b7fd340f/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index 654cf53..d7811bf 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -237,6 +237,10 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab * @param val initialization value */ private void reset(int rl, int cl, boolean sp, long estnnz, double val) { + //check for valid dimensions + if( rl < 0 || cl < 0 ) + throw new RuntimeException("Invalid block dimensions: "+rl+" "+cl); + //reset basic meta data rlen = rl; clen = cl; http://git-wip-us.apache.org/repos/asf/systemml/blob/b7fd340f/src/main/java/org/apache/sysml/runtime/matrix/mapred/ReblockBuffer.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/ReblockBuffer.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/ReblockBuffer.java index 17a8618..7f273fb 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/ReblockBuffer.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/ReblockBuffer.java @@ -187,10 +187,11 @@ public class ReblockBuffer if( bi != cbi || bj != cbj ) { outputBlock(out, tmpIx, outTVal, tmpBlock); cbi = bi; - cbj = bj; + cbj = bj; tmpIx.setIndexes(bi, bj); - tmpBlock.reset(Math.min(_brlen, (int)(_rlen-(bi-1)*_brlen)), - Math.min(_bclen, (int)(_clen-(bj-1)*_bclen)), sparse); + tmpBlock.reset( + UtilFunctions.computeBlockSize(_rlen, bi, _brlen), + UtilFunctions.computeBlockSize(_clen, bj, _bclen), sparse); } int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen); @@ -263,10 +264,11 @@ public class ReblockBuffer if( bi != cbi || bj != cbj ) { outputBlock(outList, tmpIx, tmpBlock); cbi = bi; - cbj = bj; + cbj = bj; tmpIx = new MatrixIndexes(bi, bj); - tmpBlock = new MatrixBlock(Math.min(_brlen, (int)(_rlen-(bi-1)*_brlen)), - Math.min(_bclen, (int)(_clen-(bj-1)*_bclen)), sparse); + tmpBlock = new MatrixBlock( + UtilFunctions.computeBlockSize(_rlen, bi, _brlen), + UtilFunctions.computeBlockSize(_clen, bj, _bclen), sparse); } int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen);
