This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 7b0a7e07 feat: Supports UUID column (#395)
7b0a7e07 is described below
commit 7b0a7e07ad36b5f4e0b19bc8231deaa637ac03c7
Author: Huaxin Gao <[email protected]>
AuthorDate: Tue May 21 09:11:21 2024 -0700
feat: Supports UUID column (#395)
* fix uuid
* address comments
---------
Co-authored-by: Huaxin Gao <[email protected]>
---
.../main/java/org/apache/comet/parquet/ColumnReader.java | 14 +++++++++++---
.../java/org/apache/comet/vector/CometDecodedVector.java | 7 +++++++
.../org/apache/comet/vector/CometDictionaryVector.java | 9 +++++----
.../java/org/apache/comet/vector/CometPlainVector.java | 13 +++++++++++--
4 files changed, 34 insertions(+), 9 deletions(-)
diff --git a/common/src/main/java/org/apache/comet/parquet/ColumnReader.java
b/common/src/main/java/org/apache/comet/parquet/ColumnReader.java
index 7e45f4f9..46fd87f6 100644
--- a/common/src/main/java/org/apache/comet/parquet/ColumnReader.java
+++ b/common/src/main/java/org/apache/comet/parquet/ColumnReader.java
@@ -41,6 +41,7 @@ import org.apache.parquet.column.page.DataPageV1;
import org.apache.parquet.column.page.DataPageV2;
import org.apache.parquet.column.page.DictionaryPage;
import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.spark.sql.types.DataType;
import org.apache.comet.CometConf;
@@ -199,6 +200,11 @@ public class ColumnReader extends AbstractColumnReader {
currentVector.close();
}
+ LogicalTypeAnnotation logicalTypeAnnotation =
+ descriptor.getPrimitiveType().getLogicalTypeAnnotation();
+ boolean isUuid =
+ logicalTypeAnnotation instanceof
LogicalTypeAnnotation.UUIDLogicalTypeAnnotation;
+
long[] addresses = Native.currentBatch(nativeHandle);
try (ArrowArray array = ArrowArray.wrap(addresses[0]);
@@ -206,7 +212,7 @@ public class ColumnReader extends AbstractColumnReader {
FieldVector vector = Data.importVector(ALLOCATOR, array, schema,
dictionaryProvider);
DictionaryEncoding dictionaryEncoding =
vector.getField().getDictionary();
- CometPlainVector cometVector = new CometPlainVector(vector,
useDecimal128);
+ CometPlainVector cometVector = new CometPlainVector(vector,
useDecimal128, isUuid);
// Update whether the current vector contains any null values. This is
used in the following
// batch(s) to determine whether we can skip loading the native vector.
@@ -229,12 +235,14 @@ public class ColumnReader extends AbstractColumnReader {
// initialized yet.
Dictionary arrowDictionary =
dictionaryProvider.lookup(dictionaryEncoding.getId());
CometPlainVector dictionaryVector =
- new CometPlainVector(arrowDictionary.getVector(), useDecimal128);
+ new CometPlainVector(arrowDictionary.getVector(), useDecimal128,
isUuid);
dictionary = new CometDictionary(dictionaryVector);
}
currentVector =
- new CometDictionaryVector(cometVector, dictionary,
dictionaryProvider, useDecimal128);
+ new CometDictionaryVector(
+ cometVector, dictionary, dictionaryProvider, useDecimal128,
false, isUuid);
+
return currentVector;
}
}
diff --git
a/common/src/main/java/org/apache/comet/vector/CometDecodedVector.java
b/common/src/main/java/org/apache/comet/vector/CometDecodedVector.java
index 5ebe6923..f699134f 100644
--- a/common/src/main/java/org/apache/comet/vector/CometDecodedVector.java
+++ b/common/src/main/java/org/apache/comet/vector/CometDecodedVector.java
@@ -38,13 +38,20 @@ public abstract class CometDecodedVector extends
CometVector {
private int numValues;
private int validityByteCacheIndex = -1;
private byte validityByteCache;
+ protected boolean isUuid;
protected CometDecodedVector(ValueVector vector, Field valueField, boolean
useDecimal128) {
+ this(vector, valueField, useDecimal128, false);
+ }
+
+ protected CometDecodedVector(
+ ValueVector vector, Field valueField, boolean useDecimal128, boolean
isUuid) {
super(Utils.fromArrowField(valueField), useDecimal128);
this.valueVector = vector;
this.numNulls = valueVector.getNullCount();
this.numValues = valueVector.getValueCount();
this.hasNull = numNulls != 0;
+ this.isUuid = isUuid;
}
@Override
diff --git
a/common/src/main/java/org/apache/comet/vector/CometDictionaryVector.java
b/common/src/main/java/org/apache/comet/vector/CometDictionaryVector.java
index 2cd9c5d1..a74f4ff6 100644
--- a/common/src/main/java/org/apache/comet/vector/CometDictionaryVector.java
+++ b/common/src/main/java/org/apache/comet/vector/CometDictionaryVector.java
@@ -39,7 +39,7 @@ public class CometDictionaryVector extends CometDecodedVector
{
CometDictionary values,
DictionaryProvider provider,
boolean useDecimal128) {
- this(indices, values, provider, useDecimal128, false);
+ this(indices, values, provider, useDecimal128, false, false);
}
public CometDictionaryVector(
@@ -47,8 +47,9 @@ public class CometDictionaryVector extends CometDecodedVector
{
CometDictionary values,
DictionaryProvider provider,
boolean useDecimal128,
- boolean isAlias) {
- super(indices.valueVector, values.getValueVector().getField(),
useDecimal128);
+ boolean isAlias,
+ boolean isUuid) {
+ super(indices.valueVector, values.getValueVector().getField(),
useDecimal128, isUuid);
Preconditions.checkArgument(
indices.valueVector instanceof IntVector, "'indices' should be a
IntVector");
this.values = values;
@@ -130,6 +131,6 @@ public class CometDictionaryVector extends
CometDecodedVector {
// Set the alias flag to true so that the sliced vector will not close the
dictionary vector.
// Otherwise, if the dictionary is closed, the sliced vector will not be
able to access the
// dictionary.
- return new CometDictionaryVector(sliced, values, provider, useDecimal128,
true);
+ return new CometDictionaryVector(sliced, values, provider, useDecimal128,
true, isUuid);
}
}
diff --git a/common/src/main/java/org/apache/comet/vector/CometPlainVector.java
b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java
index 521f1047..e2a625f0 100644
--- a/common/src/main/java/org/apache/comet/vector/CometPlainVector.java
+++ b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java
@@ -39,7 +39,11 @@ public class CometPlainVector extends CometDecodedVector {
private int booleanByteCacheIndex = -1;
public CometPlainVector(ValueVector vector, boolean useDecimal128) {
- super(vector, vector.getField(), useDecimal128);
+ this(vector, useDecimal128, false);
+ }
+
+ public CometPlainVector(ValueVector vector, boolean useDecimal128, boolean
isUuid) {
+ super(vector, vector.getField(), useDecimal128, isUuid);
// NullType doesn't have data buffer.
if (vector instanceof NullVector) {
this.valueBufferAddress = -1;
@@ -111,7 +115,12 @@ public class CometPlainVector extends CometDecodedVector {
byte[] result = new byte[length];
Platform.copyMemory(
null, valueBufferAddress + offset, result,
Platform.BYTE_ARRAY_OFFSET, length);
- return UTF8String.fromBytes(result);
+
+ if (!isUuid) {
+ return UTF8String.fromBytes(result);
+ } else {
+ return UTF8String.fromString(convertToUuid(result).toString());
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]