This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 8c532be3 chore: Removing copying data from dictionary values into 
CometDictionary (#490)
8c532be3 is described below

commit 8c532be3a49eb6bcb37736190dd28da828ff1788
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Wed May 29 21:05:02 2024 -0700

    chore: Removing copying data from dictionary values into CometDictionary 
(#490)
    
    * chore: Removing copying data from dictionary values into CometDictionary
    
    * For review
---
 .../org/apache/comet/vector/CometDictionary.java   | 102 +++++----------------
 1 file changed, 22 insertions(+), 80 deletions(-)

diff --git a/common/src/main/java/org/apache/comet/vector/CometDictionary.java 
b/common/src/main/java/org/apache/comet/vector/CometDictionary.java
index b213b75d..24a6d6d8 100644
--- a/common/src/main/java/org/apache/comet/vector/CometDictionary.java
+++ b/common/src/main/java/org/apache/comet/vector/CometDictionary.java
@@ -29,17 +29,8 @@ public class CometDictionary implements AutoCloseable {
   private CometPlainVector values;
   private final int numValues;
 
-  /** Decoded dictionary values. Only one of the following is set. */
-  private byte[] bytes;
-
-  private short[] shorts;
-  private int[] ints;
-  private long[] longs;
-  private float[] floats;
-  private double[] doubles;
-  private boolean[] booleans;
+  /** Decoded dictionary values. We only need to copy values for decimal type. 
*/
   private ByteArrayWrapper[] binaries;
-  private UTF8String[] strings;
 
   public CometDictionary(CometPlainVector values) {
     this.values = values;
@@ -59,39 +50,48 @@ public class CometDictionary implements AutoCloseable {
   }
 
   public boolean decodeToBoolean(int index) {
-    return booleans[index];
+    return values.getBoolean(index);
   }
 
   public byte decodeToByte(int index) {
-    return bytes[index];
+    return values.getByte(index);
   }
 
   public short decodeToShort(int index) {
-    return shorts[index];
+    return values.getShort(index);
   }
 
   public int decodeToInt(int index) {
-    return ints[index];
+    return values.getInt(index);
   }
 
   public long decodeToLong(int index) {
-    return longs[index];
+    return values.getLong(index);
   }
 
   public float decodeToFloat(int index) {
-    return floats[index];
+    return values.getFloat(index);
   }
 
   public double decodeToDouble(int index) {
-    return doubles[index];
+    return values.getDouble(index);
   }
 
   public byte[] decodeToBinary(int index) {
-    return binaries[index].bytes;
+    switch (values.getValueVector().getMinorType()) {
+      case VARBINARY:
+      case FIXEDSIZEBINARY:
+        return values.getBinary(index);
+      case DECIMAL:
+        return binaries[index].bytes;
+      default:
+        throw new IllegalArgumentException(
+            "Invalid Arrow minor type: " + 
values.getValueVector().getMinorType());
+    }
   }
 
   public UTF8String decodeToUTF8String(int index) {
-    return strings[index];
+    return values.getUTF8String(index);
   }
 
   @Override
@@ -101,65 +101,10 @@ public class CometDictionary implements AutoCloseable {
 
   private void initialize() {
     switch (values.getValueVector().getMinorType()) {
-      case BIT:
-        booleans = new boolean[numValues];
-        for (int i = 0; i < numValues; i++) {
-          booleans[i] = values.getBoolean(i);
-        }
-        break;
-      case TINYINT:
-        bytes = new byte[numValues];
-        for (int i = 0; i < numValues; i++) {
-          bytes[i] = values.getByte(i);
-        }
-        break;
-      case SMALLINT:
-        shorts = new short[numValues];
-        for (int i = 0; i < numValues; i++) {
-          shorts[i] = values.getShort(i);
-        }
-        break;
-      case INT:
-      case DATEDAY:
-        ints = new int[numValues];
-        for (int i = 0; i < numValues; i++) {
-          ints[i] = values.getInt(i);
-        }
-        break;
-      case BIGINT:
-      case TIMESTAMPMICRO:
-      case TIMESTAMPMICROTZ:
-        longs = new long[numValues];
-        for (int i = 0; i < numValues; i++) {
-          longs[i] = values.getLong(i);
-        }
-        break;
-      case FLOAT4:
-        floats = new float[numValues];
-        for (int i = 0; i < numValues; i++) {
-          floats[i] = values.getFloat(i);
-        }
-        break;
-      case FLOAT8:
-        doubles = new double[numValues];
-        for (int i = 0; i < numValues; i++) {
-          doubles[i] = values.getDouble(i);
-        }
-        break;
-      case VARBINARY:
-      case FIXEDSIZEBINARY:
-        binaries = new ByteArrayWrapper[numValues];
-        for (int i = 0; i < numValues; i++) {
-          binaries[i] = new ByteArrayWrapper(values.getBinary(i));
-        }
-        break;
-      case VARCHAR:
-        strings = new UTF8String[numValues];
-        for (int i = 0; i < numValues; i++) {
-          strings[i] = values.getUTF8String(i);
-        }
-        break;
       case DECIMAL:
+        // We only need to copy values for decimal type as random access
+        // to the dictionary is not efficient for decimal (it needs to copy
+        // the value to a new byte array everytime).
         binaries = new ByteArrayWrapper[numValues];
         for (int i = 0; i < numValues; i++) {
           // Need copying here since we re-use byte array for decimal
@@ -168,9 +113,6 @@ public class CometDictionary implements AutoCloseable {
           binaries[i] = new ByteArrayWrapper(bytes);
         }
         break;
-      default:
-        throw new IllegalArgumentException(
-            "Invalid Arrow minor type: " + 
values.getValueVector().getMinorType());
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to