[SYSTEMML-2438] Fix correctness matrix compression group partitioning The recently improved bin-packing-based column group partitioning on matrix compression returned padded bins leading to duplicated column 0 and thus incorrect results.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c13a1b04 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c13a1b04 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c13a1b04 Branch: refs/heads/master Commit: c13a1b04c90198ce934888daddb0c54324c46d72 Parents: 2d01028 Author: Matthias Boehm <[email protected]> Authored: Fri Jul 13 12:55:41 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Fri Jul 13 13:02:23 2018 -0700 ---------------------------------------------------------------------- .../apache/sysml/runtime/compress/CompressedMatrixBlock.java | 2 +- .../compress/cocode/ColumnGroupPartitionerBinPacking.java | 2 +- .../org/apache/sysml/runtime/compress/utils/IntArrayList.java | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/c13a1b04/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java index 40a6674..fc0d185 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java @@ -121,7 +121,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable if( LDEBUG ) { Logger.getLogger("org.apache.sysml.runtime.compress") .setLevel((Level) LDEBUG_LEVEL); - } + } } protected ArrayList<ColGroup> _colGroups = null; http://git-wip-us.apache.org/repos/asf/systemml/blob/c13a1b04/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java b/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java index b6dccf6..2e88f5b 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java +++ b/src/main/java/org/apache/sysml/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java @@ -77,7 +77,7 @@ public class ColumnGroupPartitionerBinPacking extends ColumnGroupPartitioner } //extract native int arrays for individual bins - return bins.stream().map(b -> b.extractValues()) + return bins.stream().map(b -> b.extractValues(true)) .collect(Collectors.toList()); } http://git-wip-us.apache.org/repos/asf/systemml/blob/c13a1b04/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java b/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java index e51f538..4025559 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java +++ b/src/main/java/org/apache/sysml/runtime/compress/utils/IntArrayList.java @@ -85,6 +85,12 @@ public class IntArrayList else return _data; } + + public int[] extractValues(boolean trim) { + int[] ret = extractValues(); + return (trim && _size < ret.length) ? + Arrays.copyOfRange(ret, 0, _size) : ret; + } private void resize() { // check for integer overflow on resize
