This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit 07dd88aba39547b2ba00ae44f86d9a0f83444b0e
Author: baunsgaard <[email protected]>
AuthorDate: Sat Oct 15 16:09:12 2022 +0200

    [SYSTEMDS-3446] DDC Append
---
 .../sysds/runtime/compress/colgroup/AColGroup.java |  8 +++++
 .../compress/colgroup/AColGroupCompressed.java     |  7 +++++
 .../runtime/compress/colgroup/ColGroupDDC.java     |  8 +++++
 .../compress/colgroup/ColGroupUncompressed.java    |  5 ++++
 .../compress/colgroup/dictionary/Dictionary.java   |  1 -
 .../compress/colgroup/mapping/AMapToData.java      |  3 ++
 .../compress/colgroup/mapping/MapToBit.java        | 19 ++++++++++++
 .../compress/colgroup/mapping/MapToByte.java       | 26 +++++++++++++++-
 .../compress/colgroup/mapping/MapToChar.java       | 35 +++++++++++++++++-----
 .../compress/colgroup/mapping/MapToCharPByte.java  | 24 +++++++++++++++
 .../compress/colgroup/mapping/MapToInt.java        | 20 +++++++++++++
 .../compress/colgroup/mapping/MapToZero.java       |  9 ++++++
 .../component/compress/combine/CombineTest.java    | 28 ++++++++++++++++-
 13 files changed, 182 insertions(+), 11 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
index 785a0d3087..efaef22cde 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
@@ -516,6 +516,14 @@ public abstract class AColGroup implements Serializable {
         */
        public abstract double getMax();
 
+       /**
+        * Short hand method for getting the sum of this column group
+        * 
+        * @param nRows The number of rows in the column group
+        * @return The sum of this column group
+        */
+       public abstract double getSum(int nRows);
+
        /**
         * Detect if the column group contains a specific value.
         * 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java
index cd0fa77c6d..83cdafe595 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupCompressed.java
@@ -115,6 +115,13 @@ public abstract class AColGroupCompressed extends 
AColGroup {
                return computeMxx(Double.NEGATIVE_INFINITY, 
Builtin.getBuiltinFnObject(BuiltinCode.MAX));
        }
 
+       @Override
+       public double getSum(int nRows) {
+               double[] ret = new double[1];
+               computeSum(ret, nRows);
+               return ret[0];
+       }
+
        @Override
        public final void unaryAggregateOperations(AggregateUnaryOperator op, 
double[] c, int nRows, int rl, int ru) {
                unaryAggregateOperations(op, c, nRows, rl, ru,
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
index 6f5346ad4d..008d66ce1b 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
@@ -22,6 +22,7 @@ package org.apache.sysds.runtime.compress.colgroup;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.util.Arrays;
 
 import org.apache.commons.lang.NotImplementedException;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary;
@@ -485,6 +486,13 @@ public class ColGroupDDC extends APreAgg {
 
        @Override
        public AColGroup append(AColGroup g) {
+               if(g instanceof ColGroupDDC && Arrays.equals(g.getColIndices(), 
_colIndexes)) {
+                       ColGroupDDC gDDC = (ColGroupDDC) g;
+                       if(gDDC._dict.eq(_dict)){
+                               AMapToData nd = _data.append(gDDC._data);
+                               return create(_colIndexes, _dict, nd, null);
+                       }
+               }
                return null;
        }
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
index 8cccf580c2..fa888261f1 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
@@ -477,6 +477,11 @@ public class ColGroupUncompressed extends AColGroup {
                return _data.max();
        }
 
+       @Override
+       public double getSum(int nRows) {
+               return _data.sum();
+       }
+
        @Override
        public final void tsmm(MatrixBlock ret, int nRows) {
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java
index 530c622f26..7cfc49e32b 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java
@@ -1077,7 +1077,6 @@ public class Dictionary extends ADictionary {
                        final double[] dv = mb.getDenseBlockValues();
                        return Arrays.equals(_values, dv);
                }
-               
                return false;
        }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
index 943e8c7027..b9bec0b742 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
@@ -815,6 +815,9 @@ public abstract class AMapToData implements Serializable {
         */
        public abstract AMapToData slice(int l, int u);
 
+
+       public abstract AMapToData append(AMapToData t);
+
        @Override
        public String toString() {
                final int sz = size();
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java
index 7bcb462fb6..d2f9f49cae 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToBit.java
@@ -24,6 +24,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.util.BitSet;
 
+import org.apache.commons.lang.NotImplementedException;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary;
 import 
org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE;
 import org.apache.sysds.utils.MemoryEstimates;
@@ -324,4 +325,22 @@ public class MapToBit extends AMapToData {
        public AMapToData slice(int l, int u) {
                return new MapToBit(getUnique(), _data.get(l,u), u - l);
        }
+
+       @Override
+       public AMapToData append(AMapToData t) {
+               if(t instanceof MapToBit){
+                       MapToBit tb = (MapToBit) t;
+                       BitSet tbb = tb._data;
+                       final int newSize = _size + t.size();
+                       BitSet ret = new BitSet(newSize);
+                       ret.xor(_data);
+
+                       tbb.stream().forEach(x -> ret.set(x + _size, true));
+                       return new MapToBit(2, ret, newSize);
+               }
+               else{
+                       throw new NotImplementedException("Not implemented 
append on Bit map different type");
+
+               }
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java
index 7ae3e9448d..e0e13bf1e7 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToByte.java
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.BitSet;
 
+import org.apache.commons.lang.NotImplementedException;
 import 
org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE;
 import org.apache.sysds.utils.MemoryEstimates;
 
@@ -147,7 +148,7 @@ public class MapToByte extends AMapToData {
        public int[] getCounts(int[] ret) {
                for(int i = 0; i < _data.length; i++)
                        ret[_data[i] & 0xFF]++;
-                       return ret;
+               return ret;
        }
 
        @Override
@@ -209,4 +210,27 @@ public class MapToByte extends AMapToData {
        public AMapToData slice(int l, int u) {
                return new MapToByte(getUnique(), Arrays.copyOfRange(_data, l, 
u));
        }
+
+       @Override
+       public AMapToData append(AMapToData t) {
+               if(t instanceof MapToByte) {
+                       MapToByte tb = (MapToByte) t;
+                       byte[] tbb = tb._data;
+                       final int newSize = _data.length + t.size();
+                       final int newDistinct = Math.max(getUnique(), 
t.getUnique());
+
+                       // copy
+                       byte[] ret = Arrays.copyOf(_data, newSize);
+                       System.arraycopy(tbb, 0, ret, _data.length, t.size());
+
+                       // return
+                       if(newDistinct < 127)
+                               return new MapToUByte(newDistinct, ret);
+                       else
+                               return new MapToByte(newDistinct, ret);
+               }
+               else {
+                       throw new NotImplementedException("Not implemented 
append on Bit map different type");
+               }
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java
index 693cee52ec..b309861ee2 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToChar.java
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.BitSet;
 
+import org.apache.commons.lang.NotImplementedException;
 import 
org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE;
 import org.apache.sysds.utils.MemoryEstimates;
 
@@ -171,16 +172,15 @@ public class MapToChar extends AMapToData {
                }
        }
 
-
        @Override
        public int getUpperBoundValue() {
                return Character.MAX_VALUE;
        }
 
        @Override
-       public void copyInt(int[] d){
+       public void copyInt(int[] d) {
                for(int i = 0; i < _data.length; i++)
-                       _data[i] = (char)d[i];
+                       _data[i] = (char) d[i];
        }
 
        @Override
@@ -193,23 +193,23 @@ public class MapToChar extends AMapToData {
        @Override
        public int[] getCounts(int[] ret) {
                for(int i = 0; i < _data.length; i++)
-                       ret[_data[i]]++; 
+                       ret[_data[i]]++;
                return ret;
        }
 
        @Override
-       public AMapToData resize(int unique){
+       public AMapToData resize(int unique) {
                final int size = _data.length;
                AMapToData ret;
                if(unique <= 1)
                        return new MapToZero(size);
                else if(unique == 2 && size > 32)
                        ret = new MapToBit(unique, size);
-               else if (unique <= 127)
+               else if(unique <= 127)
                        ret = new MapToUByte(unique, size);
                else if(unique < 256)
                        ret = new MapToByte(unique, size);
-               else{
+               else {
                        setUnique(unique);
                        return this;
                }
@@ -221,7 +221,7 @@ public class MapToChar extends AMapToData {
        public int countRuns() {
                int c = 1;
                char prev = _data[0];
-               for(int i = 1; i <_data.length; i++){
+               for(int i = 1; i < _data.length; i++) {
                        c += prev == _data[i] ? 0 : 1;
                        prev = _data[i];
                }
@@ -232,4 +232,23 @@ public class MapToChar extends AMapToData {
        public AMapToData slice(int l, int u) {
                return new MapToChar(getUnique(), Arrays.copyOfRange(_data, l, 
u));
        }
+
+       @Override
+       public AMapToData append(AMapToData t) {
+               if(t instanceof MapToChar) {
+                       MapToChar tb = (MapToChar) t;
+                       char[] tbb = tb._data;
+                       final int newSize = _data.length + t.size();
+                       final int newDistinct = Math.max(getUnique(), 
t.getUnique());
+
+                       // copy
+                       char[] ret = Arrays.copyOf(_data, newSize);
+                       System.arraycopy(tbb, 0, ret, _data.length, t.size());
+
+                       return new MapToChar(newDistinct, ret);
+               }
+               else {
+                       throw new NotImplementedException("Not implemented 
append on Bit map different type");
+               }
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java
index 6109024bc4..88b2e435e1 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToCharPByte.java
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.BitSet;
 
+import org.apache.commons.lang.NotImplementedException;
 import 
org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE;
 import org.apache.sysds.utils.MemoryEstimates;
 
@@ -217,4 +218,27 @@ public class MapToCharPByte extends AMapToData {
        public AMapToData slice(int l, int u) {
                return new MapToCharPByte(getUnique(), 
Arrays.copyOfRange(_data_c, l, u), Arrays.copyOfRange(_data_b, l, u));
        }
+
+       @Override
+       public AMapToData append(AMapToData t) {
+               if(t instanceof MapToCharPByte) {
+                       MapToCharPByte tb = (MapToCharPByte) t;
+                       char[] tbb = tb._data_c;
+                       byte[] tbbb = tb._data_b;
+                       final int newSize = _data_c.length + t.size();
+                       final int newDistinct = Math.max(getUnique(), 
t.getUnique());
+
+                       // copy
+                       char[] ret_c = Arrays.copyOf(_data_c, newSize);
+                       System.arraycopy(tbb, 0, ret_c, _data_c.length, 
t.size());
+                       byte[] ret_b = Arrays.copyOf(_data_b, newSize);
+                       System.arraycopy(tbbb, 0, ret_b, _data_b.length, 
t.size());
+
+
+                       return new MapToCharPByte(newDistinct, ret_c, ret_b);
+               }
+               else {
+                       throw new NotImplementedException("Not implemented 
append on Bit map different type");
+               }
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java
index d40a7a5ccb..b4117cd5b8 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToInt.java
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.BitSet;
 
+import org.apache.commons.lang.NotImplementedException;
 import 
org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE;
 import org.apache.sysds.utils.MemoryEstimates;
 
@@ -232,4 +233,23 @@ public class MapToInt extends AMapToData {
        public AMapToData slice(int l, int u) {
                return new MapToInt(getUnique(), Arrays.copyOfRange(_data, l, 
u));
        }
+
+       @Override
+       public AMapToData append(AMapToData t) {
+               if(t instanceof MapToInt) {
+                       MapToInt tb = (MapToInt) t;
+                       int[] tbb = tb._data;
+                       final int newSize = _data.length + t.size();
+                       final int newDistinct = Math.max(getUnique(), 
t.getUnique());
+
+                       // copy
+                       int[] ret = Arrays.copyOf(_data, newSize);
+                       System.arraycopy(tbb, 0, ret, _data.length, t.size());
+
+                       return new MapToInt(newDistinct, ret);
+               }
+               else {
+                       throw new NotImplementedException("Not implemented 
append on Bit map different type");
+               }
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java
index 5fc2c51178..c3cd14afc4 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/MapToZero.java
@@ -24,6 +24,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.util.BitSet;
 
+import org.apache.commons.lang.NotImplementedException;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary;
 import 
org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE;
 
@@ -149,4 +150,12 @@ public class MapToZero extends AMapToData {
        public AMapToData slice(int l, int u) {
                return new MapToZero(u - l);
        }
+
+       @Override
+       public AMapToData append(AMapToData t) {
+               if(t instanceof MapToZero) 
+                       return new MapToZero(_size + t.size());
+               else 
+                       throw new NotImplementedException("Not implemented 
append on Bit map different type");
+       }
 }
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/combine/CombineTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/combine/CombineTest.java
index 86736b99f3..8db991ef9f 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/combine/CombineTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/combine/CombineTest.java
@@ -26,15 +26,23 @@ import static org.junit.Assert.fail;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory;
+import org.apache.sysds.runtime.compress.CompressionSettingsBuilder;
+import org.apache.sysds.runtime.compress.colgroup.AColGroup;
+import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType;
 import org.apache.sysds.runtime.compress.lib.CLALibCombine;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.data.MatrixIndexes;
+import org.apache.sysds.test.TestUtils;
 import org.junit.Test;
 
 public class CombineTest {
 
+       protected static final Log LOG = 
LogFactory.getLog(CombineTest.class.getName());
+
        @Test
        public void combineEmpty() {
                CompressedMatrixBlock m1 = 
CompressedMatrixBlockFactory.createConstant(100, 10, 0.0);
@@ -56,7 +64,6 @@ public class CombineTest {
 
        }
 
-
        @Test
        public void combineConst() {
                CompressedMatrixBlock m1 = 
CompressedMatrixBlockFactory.createConstant(100, 10, 1.0);
@@ -78,4 +85,23 @@ public class CombineTest {
 
        }
 
+       @Test
+       public void combineDDC() {
+               MatrixBlock mb = 
TestUtils.ceil(TestUtils.generateTestMatrixBlock(165, 2, 1, 3, 1.0, 2514));
+               CompressedMatrixBlock csb = (CompressedMatrixBlock) 
CompressedMatrixBlockFactory
+                       .compress(mb,
+                               new 
CompressionSettingsBuilder().clearValidCompression().addValidCompression(CompressionType.DDC))
+                       .getLeft();
+
+               AColGroup g = csb.getColGroups().get(0);
+               double sum = g.getSum(165);
+               AColGroup ret = g.append(g);
+               double sum2 = ret.getSum(165 * 2);
+               assertEquals(sum * 2, sum2, 0.001);
+               AColGroup ret2 = ret.append(g);
+               double sum3 = ret2.getSum(165 * 3);
+               assertEquals(sum * 3, sum3, 0.001);
+
+       }
+
 }

Reply via email to