This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new c398e8ec5e [SYSTEMDS-3644] Compressed-Compressed Transform Encode 
(PassThrough)
c398e8ec5e is described below

commit c398e8ec5e163647706ac309b8c854a62b594c97
Author: Sebastian Baunsgaard <[email protected]>
AuthorDate: Mon Oct 30 13:55:26 2023 +0100

    [SYSTEMDS-3644] Compressed-Compressed Transform Encode (PassThrough)
    
    Initial instance of direct compressed frame to compressed matrix
    transform encode, to start with in the case of PassThrough.
---
 .../sysds/runtime/frame/data/columns/DDCArray.java    |  6 +++++-
 .../runtime/transform/encode/CompressedEncode.java    | 19 +++++++++++++++++++
 .../runtime/transform/encode/MultiColumnEncoder.java  |  5 +++--
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java 
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
index b634cfe6ff..8f3dcd9dcb 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
@@ -55,10 +55,14 @@ public class DDCArray<T> extends ACompressedArray<T> {
                }
        }
 
-       protected Array<T> getDict(){
+       public Array<T> getDict(){
                return dict;
        }
 
+       public AMapToData getMap(){
+               return map;
+       }
+
        /**
         * Try to compress array into DDC format.
         * 
diff --git 
a/src/main/java/org/apache/sysds/runtime/transform/encode/CompressedEncode.java 
b/src/main/java/org/apache/sysds/runtime/transform/encode/CompressedEncode.java
index 8ca8b6d9fc..7fbdb1ea3c 100644
--- 
a/src/main/java/org/apache/sysds/runtime/transform/encode/CompressedEncode.java
+++ 
b/src/main/java/org/apache/sysds/runtime/transform/encode/CompressedEncode.java
@@ -49,7 +49,9 @@ import 
org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
 import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
 import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
 import org.apache.sysds.runtime.frame.data.FrameBlock;
+import org.apache.sysds.runtime.frame.data.columns.ACompressedArray;
 import org.apache.sysds.runtime.frame.data.columns.Array;
+import org.apache.sysds.runtime.frame.data.columns.DDCArray;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.util.CommonThreadPool;
 import org.apache.sysds.runtime.util.UtilFunctions;
@@ -164,6 +166,7 @@ public class CompressedEncode {
                IColIndex colIndexes = ColIndexFactory.create(0, domain);
                if(domain == 1 && !containsNull)
                        return ColGroupConst.create(colIndexes, new double[] 
{1});
+
                ADictionary d = new IdentityDictionary(colIndexes.size(), 
containsNull);
                AMapToData m = createMappingAMapToData(a, map, containsNull);
                return ColGroupDDC.create(colIndexes, d, m, null);
@@ -288,6 +291,22 @@ public class CompressedEncode {
                IColIndex colIndexes = ColIndexFactory.create(1);
                int colId = c._colID;
                Array<?> a = in.getColumn(colId - 1);
+               if(a instanceof ACompressedArray){
+                       switch(a.getFrameArrayType()) {
+                               case DDC:
+                                       DDCArray<?> aDDC = (DDCArray<?>) a;
+                                       Array<?> dict = aDDC.getDict();
+                                       double[] vals = new double[dict.size()];
+                                       for(int i = 0; i < dict.size(); i++) {
+                                               vals[i] = dict.getAsDouble(i);
+                                       }
+                                       ADictionary d = Dictionary.create(vals);
+
+                                       return ColGroupDDC.create(colIndexes, 
d, aDDC.getMap(), null);
+                               default:
+                                       throw new NotImplementedException();
+                       }
+               }
                boolean containsNull = a.containsNull();
                HashMap<Object, Long> map = (HashMap<Object, Long>) 
a.getRecodeMap();
                final int blockSz = 
ConfigurationManager.getDMLConfig().getIntValue(DMLConfig.DEFAULT_BLOCK_SIZE);
diff --git 
a/src/main/java/org/apache/sysds/runtime/transform/encode/MultiColumnEncoder.java
 
b/src/main/java/org/apache/sysds/runtime/transform/encode/MultiColumnEncoder.java
index f1813e29a7..bd9e2ba79f 100644
--- 
a/src/main/java/org/apache/sysds/runtime/transform/encode/MultiColumnEncoder.java
+++ 
b/src/main/java/org/apache/sysds/runtime/transform/encode/MultiColumnEncoder.java
@@ -102,11 +102,12 @@ public class MultiColumnEncoder implements Encoder {
        }
 
        public MatrixBlock encode(CacheBlock<?> in, int k, boolean 
compressedOut){
-               deriveNumRowPartitions(in, k);
                try {
                        if(isCompressedTransformEncode(in, compressedOut))
                                return CompressedEncode.encode(this, 
(FrameBlock ) in, k);
-                       else if(k > 1 && !MULTI_THREADED_STAGES && 
!hasLegacyEncoder()) {
+
+                       deriveNumRowPartitions(in, k);
+                       if(k > 1 && !MULTI_THREADED_STAGES && 
!hasLegacyEncoder()) {
                                MatrixBlock out = new MatrixBlock();
                                DependencyThreadPool pool = new 
DependencyThreadPool(k);
                                LOG.debug("Encoding with full DAG on " + k + " 
Threads");

Reply via email to