This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 7b0a7e07 feat: Supports UUID column (#395)
7b0a7e07 is described below

commit 7b0a7e07ad36b5f4e0b19bc8231deaa637ac03c7
Author: Huaxin Gao <[email protected]>
AuthorDate: Tue May 21 09:11:21 2024 -0700

    feat: Supports UUID column (#395)
    
    * fix uuid
    
    * address comments
    
    ---------
    
    Co-authored-by: Huaxin Gao <[email protected]>
---
 .../main/java/org/apache/comet/parquet/ColumnReader.java   | 14 +++++++++++---
 .../java/org/apache/comet/vector/CometDecodedVector.java   |  7 +++++++
 .../org/apache/comet/vector/CometDictionaryVector.java     |  9 +++++----
 .../java/org/apache/comet/vector/CometPlainVector.java     | 13 +++++++++++--
 4 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/common/src/main/java/org/apache/comet/parquet/ColumnReader.java 
b/common/src/main/java/org/apache/comet/parquet/ColumnReader.java
index 7e45f4f9..46fd87f6 100644
--- a/common/src/main/java/org/apache/comet/parquet/ColumnReader.java
+++ b/common/src/main/java/org/apache/comet/parquet/ColumnReader.java
@@ -41,6 +41,7 @@ import org.apache.parquet.column.page.DataPageV1;
 import org.apache.parquet.column.page.DataPageV2;
 import org.apache.parquet.column.page.DictionaryPage;
 import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.spark.sql.types.DataType;
 
 import org.apache.comet.CometConf;
@@ -199,6 +200,11 @@ public class ColumnReader extends AbstractColumnReader {
       currentVector.close();
     }
 
+    LogicalTypeAnnotation logicalTypeAnnotation =
+        descriptor.getPrimitiveType().getLogicalTypeAnnotation();
+    boolean isUuid =
+        logicalTypeAnnotation instanceof 
LogicalTypeAnnotation.UUIDLogicalTypeAnnotation;
+
     long[] addresses = Native.currentBatch(nativeHandle);
 
     try (ArrowArray array = ArrowArray.wrap(addresses[0]);
@@ -206,7 +212,7 @@ public class ColumnReader extends AbstractColumnReader {
       FieldVector vector = Data.importVector(ALLOCATOR, array, schema, 
dictionaryProvider);
       DictionaryEncoding dictionaryEncoding = 
vector.getField().getDictionary();
 
-      CometPlainVector cometVector = new CometPlainVector(vector, 
useDecimal128);
+      CometPlainVector cometVector = new CometPlainVector(vector, 
useDecimal128, isUuid);
 
       // Update whether the current vector contains any null values. This is 
used in the following
       // batch(s) to determine whether we can skip loading the native vector.
@@ -229,12 +235,14 @@ public class ColumnReader extends AbstractColumnReader {
         // initialized yet.
         Dictionary arrowDictionary = 
dictionaryProvider.lookup(dictionaryEncoding.getId());
         CometPlainVector dictionaryVector =
-            new CometPlainVector(arrowDictionary.getVector(), useDecimal128);
+            new CometPlainVector(arrowDictionary.getVector(), useDecimal128, 
isUuid);
         dictionary = new CometDictionary(dictionaryVector);
       }
 
       currentVector =
-          new CometDictionaryVector(cometVector, dictionary, 
dictionaryProvider, useDecimal128);
+          new CometDictionaryVector(
+              cometVector, dictionary, dictionaryProvider, useDecimal128, 
false, isUuid);
+
       return currentVector;
     }
   }
diff --git 
a/common/src/main/java/org/apache/comet/vector/CometDecodedVector.java 
b/common/src/main/java/org/apache/comet/vector/CometDecodedVector.java
index 5ebe6923..f699134f 100644
--- a/common/src/main/java/org/apache/comet/vector/CometDecodedVector.java
+++ b/common/src/main/java/org/apache/comet/vector/CometDecodedVector.java
@@ -38,13 +38,20 @@ public abstract class CometDecodedVector extends 
CometVector {
   private int numValues;
   private int validityByteCacheIndex = -1;
   private byte validityByteCache;
+  protected boolean isUuid;
 
   protected CometDecodedVector(ValueVector vector, Field valueField, boolean 
useDecimal128) {
+    this(vector, valueField, useDecimal128, false);
+  }
+
+  protected CometDecodedVector(
+      ValueVector vector, Field valueField, boolean useDecimal128, boolean 
isUuid) {
     super(Utils.fromArrowField(valueField), useDecimal128);
     this.valueVector = vector;
     this.numNulls = valueVector.getNullCount();
     this.numValues = valueVector.getValueCount();
     this.hasNull = numNulls != 0;
+    this.isUuid = isUuid;
   }
 
   @Override
diff --git 
a/common/src/main/java/org/apache/comet/vector/CometDictionaryVector.java 
b/common/src/main/java/org/apache/comet/vector/CometDictionaryVector.java
index 2cd9c5d1..a74f4ff6 100644
--- a/common/src/main/java/org/apache/comet/vector/CometDictionaryVector.java
+++ b/common/src/main/java/org/apache/comet/vector/CometDictionaryVector.java
@@ -39,7 +39,7 @@ public class CometDictionaryVector extends CometDecodedVector 
{
       CometDictionary values,
       DictionaryProvider provider,
       boolean useDecimal128) {
-    this(indices, values, provider, useDecimal128, false);
+    this(indices, values, provider, useDecimal128, false, false);
   }
 
   public CometDictionaryVector(
@@ -47,8 +47,9 @@ public class CometDictionaryVector extends CometDecodedVector 
{
       CometDictionary values,
       DictionaryProvider provider,
       boolean useDecimal128,
-      boolean isAlias) {
-    super(indices.valueVector, values.getValueVector().getField(), 
useDecimal128);
+      boolean isAlias,
+      boolean isUuid) {
+    super(indices.valueVector, values.getValueVector().getField(), 
useDecimal128, isUuid);
     Preconditions.checkArgument(
         indices.valueVector instanceof IntVector, "'indices' should be a 
IntVector");
     this.values = values;
@@ -130,6 +131,6 @@ public class CometDictionaryVector extends 
CometDecodedVector {
     // Set the alias flag to true so that the sliced vector will not close the 
dictionary vector.
     // Otherwise, if the dictionary is closed, the sliced vector will not be 
able to access the
     // dictionary.
-    return new CometDictionaryVector(sliced, values, provider, useDecimal128, 
true);
+    return new CometDictionaryVector(sliced, values, provider, useDecimal128, 
true, isUuid);
   }
 }
diff --git a/common/src/main/java/org/apache/comet/vector/CometPlainVector.java 
b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java
index 521f1047..e2a625f0 100644
--- a/common/src/main/java/org/apache/comet/vector/CometPlainVector.java
+++ b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java
@@ -39,7 +39,11 @@ public class CometPlainVector extends CometDecodedVector {
   private int booleanByteCacheIndex = -1;
 
   public CometPlainVector(ValueVector vector, boolean useDecimal128) {
-    super(vector, vector.getField(), useDecimal128);
+    this(vector, useDecimal128, false);
+  }
+
+  public CometPlainVector(ValueVector vector, boolean useDecimal128, boolean 
isUuid) {
+    super(vector, vector.getField(), useDecimal128, isUuid);
     // NullType doesn't have data buffer.
     if (vector instanceof NullVector) {
       this.valueBufferAddress = -1;
@@ -111,7 +115,12 @@ public class CometPlainVector extends CometDecodedVector {
       byte[] result = new byte[length];
       Platform.copyMemory(
           null, valueBufferAddress + offset, result, 
Platform.BYTE_ARRAY_OFFSET, length);
-      return UTF8String.fromBytes(result);
+
+      if (!isUuid) {
+        return UTF8String.fromBytes(result);
+      } else {
+        return UTF8String.fromString(convertToUuid(result).toString());
+      }
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to