This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 07dd88aba39547b2ba00ae44f86d9a0f83444b0e Author: baunsgaard <[email protected]> AuthorDate: Sat Oct 15 16:09:12 2022 +0200 [SYSTEMDS-3446] DDC Append --- .../sysds/runtime/compress/colgroup/AColGroup.java | 8 +++++ .../compress/colgroup/AColGroupCompressed.java | 7 +++++ .../runtime/compress/colgroup/ColGroupDDC.java | 8 +++++ .../compress/colgroup/ColGroupUncompressed.java | 5 ++++ .../compress/colgroup/dictionary/Dictionary.java | 1 - .../compress/colgroup/mapping/AMapToData.java | 3 ++ .../compress/colgroup/mapping/MapToBit.java | 19 ++++++++++++ .../compress/colgroup/mapping/MapToByte.java | 26 +++++++++++++++- .../compress/colgroup/mapping/MapToChar.java | 35 +++++++++++++++++----- .../compress/colgroup/mapping/MapToCharPByte.java | 24 +++++++++++++++ .../compress/colgroup/mapping/MapToInt.java | 20 +++++++++++++ .../compress/colgroup/mapping/MapToZero.java | 9 ++++++ .../component/compress/combine/CombineTest.java | 28 ++++++++++++++++- 13 files changed, 182 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java index 785a0d3087..efaef22cde 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java @@ -516,6 +516,14 @@ public abstract class AColGroup implements Serializable { */ public abstract double getMax(); + /** + * Short hand method for getting the sum of this column group + * + * @param nRows The number of rows in the column group + * @return The sum of this column group + */ + public abstract double getSum(int nRows); + /** * Detect if the column group contains a specific value. * diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java index cd0fa77c6d..83cdafe595 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java @@ -115,6 +115,13 @@ public abstract class AColGroupCompressed extends AColGroup { return computeMxx(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject(BuiltinCode.MAX)); } + @Override + public double getSum(int nRows) { + double[] ret = new double[1]; + computeSum(ret, nRows); + return ret[0]; + } + @Override public final void unaryAggregateOperations(AggregateUnaryOperator op, double[] c, int nRows, int rl, int ru) { unaryAggregateOperations(op, c, nRows, rl, ru, diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java index 6f5346ad4d..008d66ce1b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java @@ -22,6 +22,7 @@ package org.apache.sysds.runtime.compress.colgroup; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Arrays; import org.apache.commons.lang.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; @@ -485,6 +486,13 @@ public class ColGroupDDC extends APreAgg { @Override public AColGroup append(AColGroup g) { + if(g instanceof ColGroupDDC && Arrays.equals(g.getColIndices(), _colIndexes)) { + ColGroupDDC gDDC = (ColGroupDDC) g; + if(gDDC._dict.eq(_dict)){ + AMapToData nd = _data.append(gDDC._data); + return create(_colIndexes, _dict, nd, null); + } + } return null; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java index 8cccf580c2..fa888261f1 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java @@ -477,6 +477,11 @@ public class ColGroupUncompressed extends AColGroup { return _data.max(); } + @Override + public double getSum(int nRows) { + return _data.sum(); + } + @Override public final void tsmm(MatrixBlock ret, int nRows) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java index 530c622f26..7cfc49e32b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java @@ -1077,7 +1077,6 @@ public class Dictionary extends ADictionary { final double[] dv = mb.getDenseBlockValues(); return Arrays.equals(_values, dv); } - return false; } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java index 943e8c7027..b9bec0b742 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java @@ -815,6 +815,9 @@ public abstract class AMapToData implements Serializable { */ public abstract AMapToData slice(int l, int u); + + public abstract AMapToData append(AMapToData t); + @Override public String toString() { final int sz = size(); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java index 7bcb462fb6..d2f9f49cae 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java @@ -24,6 +24,7 @@ import java.io.DataOutput; import java.io.IOException; import java.util.BitSet; +import org.apache.commons.lang.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; import org.apache.sysds.utils.MemoryEstimates; @@ -324,4 +325,22 @@ public class MapToBit extends AMapToData { public AMapToData slice(int l, int u) { return new MapToBit(getUnique(), _data.get(l,u), u - l); } + + @Override + public AMapToData append(AMapToData t) { + if(t instanceof MapToBit){ + MapToBit tb = (MapToBit) t; + BitSet tbb = tb._data; + final int newSize = _size + t.size(); + BitSet ret = new BitSet(newSize); + ret.xor(_data); + + tbb.stream().forEach(x -> ret.set(x + _size, true)); + return new MapToBit(2, ret, newSize); + } + else{ + throw new NotImplementedException("Not implemented append on Bit map different type"); + + } + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java index 7ae3e9448d..e0e13bf1e7 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.BitSet; +import org.apache.commons.lang.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; import org.apache.sysds.utils.MemoryEstimates; @@ -147,7 +148,7 @@ public class MapToByte extends AMapToData { public int[] getCounts(int[] ret) { for(int i = 0; i < _data.length; i++) ret[_data[i] & 0xFF]++; - return ret; + return ret; } @Override @@ -209,4 +210,27 @@ public class MapToByte extends AMapToData { public AMapToData slice(int l, int u) { return new MapToByte(getUnique(), Arrays.copyOfRange(_data, l, u)); } + + @Override + public AMapToData append(AMapToData t) { + if(t instanceof MapToByte) { + MapToByte tb = (MapToByte) t; + byte[] tbb = tb._data; + final int newSize = _data.length + t.size(); + final int newDistinct = Math.max(getUnique(), t.getUnique()); + + // copy + byte[] ret = Arrays.copyOf(_data, newSize); + System.arraycopy(tbb, 0, ret, _data.length, t.size()); + + // return + if(newDistinct < 127) + return new MapToUByte(newDistinct, ret); + else + return new MapToByte(newDistinct, ret); + } + else { + throw new NotImplementedException("Not implemented append on Bit map different type"); + } + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java index 693cee52ec..b309861ee2 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.BitSet; +import org.apache.commons.lang.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; import org.apache.sysds.utils.MemoryEstimates; @@ -171,16 +172,15 @@ public class MapToChar extends AMapToData { } } - @Override public int getUpperBoundValue() { return Character.MAX_VALUE; } @Override - public void copyInt(int[] d){ + public void copyInt(int[] d) { for(int i = 0; i < _data.length; i++) - _data[i] = (char)d[i]; + _data[i] = (char) d[i]; } @Override @@ -193,23 +193,23 @@ public class MapToChar extends AMapToData { @Override public int[] getCounts(int[] ret) { for(int i = 0; i < _data.length; i++) - ret[_data[i]]++; + ret[_data[i]]++; return ret; } @Override - public AMapToData resize(int unique){ + public AMapToData resize(int unique) { final int size = _data.length; AMapToData ret; if(unique <= 1) return new MapToZero(size); else if(unique == 2 && size > 32) ret = new MapToBit(unique, size); - else if (unique <= 127) + else if(unique <= 127) ret = new MapToUByte(unique, size); else if(unique < 256) ret = new MapToByte(unique, size); - else{ + else { setUnique(unique); return this; } @@ -221,7 +221,7 @@ public class MapToChar extends AMapToData { public int countRuns() { int c = 1; char prev = _data[0]; - for(int i = 1; i <_data.length; i++){ + for(int i = 1; i < _data.length; i++) { c += prev == _data[i] ? 0 : 1; prev = _data[i]; } @@ -232,4 +232,23 @@ public class MapToChar extends AMapToData { public AMapToData slice(int l, int u) { return new MapToChar(getUnique(), Arrays.copyOfRange(_data, l, u)); } + + @Override + public AMapToData append(AMapToData t) { + if(t instanceof MapToChar) { + MapToChar tb = (MapToChar) t; + char[] tbb = tb._data; + final int newSize = _data.length + t.size(); + final int newDistinct = Math.max(getUnique(), t.getUnique()); + + // copy + char[] ret = Arrays.copyOf(_data, newSize); + System.arraycopy(tbb, 0, ret, _data.length, t.size()); + + return new MapToChar(newDistinct, ret); + } + else { + throw new NotImplementedException("Not implemented append on Bit map different type"); + } + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java index 6109024bc4..88b2e435e1 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.BitSet; +import org.apache.commons.lang.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; import org.apache.sysds.utils.MemoryEstimates; @@ -217,4 +218,27 @@ public class MapToCharPByte extends AMapToData { public AMapToData slice(int l, int u) { return new MapToCharPByte(getUnique(), Arrays.copyOfRange(_data_c, l, u), Arrays.copyOfRange(_data_b, l, u)); } + + @Override + public AMapToData append(AMapToData t) { + if(t instanceof MapToCharPByte) { + MapToCharPByte tb = (MapToCharPByte) t; + char[] tbb = tb._data_c; + byte[] tbbb = tb._data_b; + final int newSize = _data_c.length + t.size(); + final int newDistinct = Math.max(getUnique(), t.getUnique()); + + // copy + char[] ret_c = Arrays.copyOf(_data_c, newSize); + System.arraycopy(tbb, 0, ret_c, _data_c.length, t.size()); + byte[] ret_b = Arrays.copyOf(_data_b, newSize); + System.arraycopy(tbbb, 0, ret_b, _data_b.length, t.size()); + + + return new MapToCharPByte(newDistinct, ret_c, ret_b); + } + else { + throw new NotImplementedException("Not implemented append on Bit map different type"); + } + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java index d40a7a5ccb..b4117cd5b8 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.BitSet; +import org.apache.commons.lang.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; import org.apache.sysds.utils.MemoryEstimates; @@ -232,4 +233,23 @@ public class MapToInt extends AMapToData { public AMapToData slice(int l, int u) { return new MapToInt(getUnique(), Arrays.copyOfRange(_data, l, u)); } + + @Override + public AMapToData append(AMapToData t) { + if(t instanceof MapToInt) { + MapToInt tb = (MapToInt) t; + int[] tbb = tb._data; + final int newSize = _data.length + t.size(); + final int newDistinct = Math.max(getUnique(), t.getUnique()); + + // copy + int[] ret = Arrays.copyOf(_data, newSize); + System.arraycopy(tbb, 0, ret, _data.length, t.size()); + + return new MapToInt(newDistinct, ret); + } + else { + throw new NotImplementedException("Not implemented append on Bit map different type"); + } + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java index 5fc2c51178..c3cd14afc4 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java @@ -24,6 +24,7 @@ import java.io.DataOutput; import java.io.IOException; import java.util.BitSet; +import org.apache.commons.lang.NotImplementedException; import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE; @@ -149,4 +150,12 @@ public class MapToZero extends AMapToData { public AMapToData slice(int l, int u) { return new MapToZero(u - l); } + + @Override + public AMapToData append(AMapToData t) { + if(t instanceof MapToZero) + return new MapToZero(_size + t.size()); + else + throw new NotImplementedException("Not implemented append on Bit map different type"); + } } diff --git a/src/test/java/org/apache/sysds/test/component/compress/combine/CombineTest.java b/src/test/java/org/apache/sysds/test/component/compress/combine/CombineTest.java index 86736b99f3..8db991ef9f 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/combine/CombineTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/combine/CombineTest.java @@ -26,15 +26,23 @@ import static org.junit.Assert.fail; import java.util.HashMap; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory; +import org.apache.sysds.runtime.compress.CompressionSettingsBuilder; +import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; import org.apache.sysds.runtime.compress.lib.CLALibCombine; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.matrix.data.MatrixIndexes; +import org.apache.sysds.test.TestUtils; import org.junit.Test; public class CombineTest { + protected static final Log LOG = LogFactory.getLog(CombineTest.class.getName()); + @Test public void combineEmpty() { CompressedMatrixBlock m1 = CompressedMatrixBlockFactory.createConstant(100, 10, 0.0); @@ -56,7 +64,6 @@ public class CombineTest { } - @Test public void combineConst() { CompressedMatrixBlock m1 = CompressedMatrixBlockFactory.createConstant(100, 10, 1.0); @@ -78,4 +85,23 @@ public class CombineTest { } + @Test + public void combineDDC() { + MatrixBlock mb = TestUtils.ceil(TestUtils.generateTestMatrixBlock(165, 2, 1, 3, 1.0, 2514)); + CompressedMatrixBlock csb = (CompressedMatrixBlock) CompressedMatrixBlockFactory + .compress(mb, + new CompressionSettingsBuilder().clearValidCompression().addValidCompression(CompressionType.DDC)) + .getLeft(); + + AColGroup g = csb.getColGroups().get(0); + double sum = g.getSum(165); + AColGroup ret = g.append(g); + double sum2 = ret.getSum(165 * 2); + assertEquals(sum * 2, sum2, 0.001); + AColGroup ret2 = ret.append(g); + double sum3 = ret2.getSum(165 * 3); + assertEquals(sum * 3, sum3, 0.001); + + } + }
