This is an automated email from the ASF dual-hosted git repository.

alsuliman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new ecc0031b99 [ASTERIXDB-3392] Add UUID type in COPY TO parquet
ecc0031b99 is described below

commit ecc0031b999ad6ce8cf701385ef421edec2e3f8f
Author: preetham0202 <[email protected]>
AuthorDate: Thu Feb 6 14:54:54 2025 +0530

    [ASTERIXDB-3392] Add UUID type in COPY TO parquet
    
    Details:
    Support UUID type in copy to parquet, and error out when type is not 
supported.
    
    Ext-ref: MB-65168
    
    Change-Id: Ib65eaf9c1a16fb9f97bb38ddf9c03bceec1bec46
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19388
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Ali Alsuliman <[email protected]>
    Reviewed-by: Ali Alsuliman <[email protected]>
---
 .../parquet-cover-data-types.02.update.sqlpp            |  2 +-
 .../parquet-cover-data-types.03.update.sqlpp            |  2 +-
 .../parquet-error-checks.17.update.sqlpp}               | 17 +++++++++++------
 .../parquet-cover-data-types.02.update.sqlpp            |  2 +-
 .../parquet-cover-data-types.03.update.sqlpp            |  2 +-
 .../parquet-cover-data-types.05.adm                     |  2 +-
 .../parquet-cover-data-types.05.adm                     |  2 +-
 .../runtimets/testsuite_external_dataset_s3.xml         |  1 +
 .../test/resources/runtimets/testsuite_sqlpp_hdfs.xml   |  1 +
 .../writer/printer/parquet/AsterixParquetTypeMap.java   |  4 +++-
 .../printer/parquet/ParquetSchemaLazyVisitor.java       |  5 +++++
 .../writer/printer/parquet/ParquetValueWriter.java      |  5 +++++
 12 files changed, 32 insertions(+), 13 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
index d2a376c924..8e0b9e8bb4 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
@@ -19,5 +19,5 @@
 
 use test;
 
-insert into TestCollection({"id":18, "name": "Virat" , 
"dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" : 
false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00") 
});
+insert into TestCollection({"id":18, "name": "Virat" , 
"dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" : 
false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00") , 
 "uuidType" : uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344")  });
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
index a95146f50e..3650f61ad8 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
@@ -24,7 +24,7 @@ COPY (
 ) toWriter
 TO hdfs
 PATH ("copy-to-result", "parquet-cover-data-types")
-TYPE ( {   name : string,  id : int,  dateType : date,   timeType : time,  
boolType : boolean,   doubleType : double,  datetimeType : datetime   } )
+TYPE ( {   name : string,  id : int,  dateType : date,   timeType : time,  
boolType : boolean,   doubleType : double,  datetimeType : datetime, uuidType: 
uuid   } )
 WITH {
     "hdfs":"hdfs://127.0.0.1:31888/",
     "format":"parquet"
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
similarity index 75%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
index a85e188908..e956812458 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
@@ -19,17 +19,22 @@
 
 USE test;
 
+
+
+
 COPY (
-   select c.* from TestCollection c
-) toWriter
+select id,name from TestCollection c
+    ) toWriter
 TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-cover-data-types")
-TYPE ( {   name : string,  id : int,  dateType : date,   timeType : time,  
boolType : boolean,   doubleType : double,  datetimeType : datetime   } )
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks16")
+TYPE ( { id:int, rect: rectangle })
 WITH {
     %template_colons%,
     %additionalProperties%
-    "format":"parquet"
-};
+    "format":"parquet",
+    "max-schemas" : "2"
+    }
+
 
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
index ec1ac0c708..4cd2ec7518 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
@@ -29,5 +29,5 @@ insert into 
TestCollection({"id":`year-month-duration`("P16Y"), "name": "John"})
 insert into TestCollection({"id":`day-time-duration`("-P3829H849.392S"), 
"name": "Alex"});
 */
 
-insert into TestCollection({"id":18, "name": "Virat" , 
"dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" : 
false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00") 
});
+insert into TestCollection({"id":18, "name": "Virat" , 
"dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" : 
false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00") , 
"uuidType" : uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344")  });
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
index a85e188908..042fa99c58 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
@@ -24,7 +24,7 @@ COPY (
 ) toWriter
 TO %adapter%
 PATH (%pathprefix% "copy-to-result", "parquet-cover-data-types")
-TYPE ( {   name : string,  id : int,  dateType : date,   timeType : time,  
boolType : boolean,   doubleType : double,  datetimeType : datetime   } )
+TYPE ( {   name : string,  id : int,  dateType : date,   timeType : time,  
boolType : boolean,   doubleType : double,  datetimeType : datetime , uuidType 
: uuid  } )
 WITH {
     %template_colons%,
     %additionalProperties%
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm
index 8fc863ef6c..dd54be4122 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-cover-data-types/parquet-cover-data-types.05.adm
@@ -1 +1 @@
-{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType": 
time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType": 
datetime("1900-02-01T00:00:00.000") }
\ No newline at end of file
+{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType": 
time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType": 
datetime("1900-02-01T00:00:00.000"), "uuidType": 
uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344") }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm
index 8fc863ef6c..dd54be4122 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm
@@ -1 +1 @@
-{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType": 
time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType": 
datetime("1900-02-01T00:00:00.000") }
\ No newline at end of file
+{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType": 
time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType": 
datetime("1900-02-01T00:00:00.000"), "uuidType": 
uuid("95ca22dd-ef64-46f2-9c2a-a38005e23344") }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index da33b2e4d8..1a05334bbc 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -210,6 +210,7 @@
         <expected-error>ASX1209: Maximum value allowed for 'max-schemas' is 
10. Found 15</expected-error>
         <expected-error>HYR0133: Schema could not be inferred, empty types 
found in the result</expected-error>
         <expected-error>HYR0134: Schema Limit exceeded, maximum number of 
heterogeneous schemas allowed : '2'</expected-error>
+        <expected-error>ASX1204: 'rectangle' type not supported in parquet 
format</expected-error>
       </compilation-unit>
     </test-case>
     <test-case FilePath="copy-to/negative">
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
index b178efd99f..f2ec232456 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
@@ -258,6 +258,7 @@
         <expected-error>ASX1209: Maximum value allowed for 'max-schemas' is 
10. Found 15</expected-error>
         <expected-error>HYR0133: Schema could not be inferred, empty types 
found in the result</expected-error>
         <expected-error>HYR0134: Schema Limit exceeded, maximum number of 
heterogeneous schemas allowed : '2'</expected-error>
+        <expected-error>ASX1204: 'rectangle' type not supported in parquet 
format</expected-error>
         <source-location>false</source-location>
       </compilation-unit>
     </test-case>
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java
index 0dcdb3a335..1eb8e844fe 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetTypeMap.java
@@ -37,10 +37,12 @@ public class AsterixParquetTypeMap {
                     Map.entry(ATypeTag.DOUBLE, 
PrimitiveType.PrimitiveTypeName.DOUBLE),
                     Map.entry(ATypeTag.DATE, 
PrimitiveType.PrimitiveTypeName.INT32),
                     Map.entry(ATypeTag.TIME, 
PrimitiveType.PrimitiveTypeName.INT32),
-                    Map.entry(ATypeTag.DATETIME, 
PrimitiveType.PrimitiveTypeName.INT64));
+                    Map.entry(ATypeTag.DATETIME, 
PrimitiveType.PrimitiveTypeName.INT64),
+                    Map.entry(ATypeTag.UUID, 
PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY));
 
     public static final Map<ATypeTag, LogicalTypeAnnotation> 
LOGICAL_TYPE_ANNOTATION_MAP =
             Map.ofEntries(Map.entry(ATypeTag.STRING, 
LogicalTypeAnnotation.stringType()),
+                    Map.entry(ATypeTag.UUID, LogicalTypeAnnotation.uuidType()),
                     Map.entry(ATypeTag.DATE, LogicalTypeAnnotation.dateType()),
                     Map.entry(ATypeTag.TIME,
                             LogicalTypeAnnotation.timeType(true, 
LogicalTypeAnnotation.TimeUnit.MILLIS)),
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
index 055c635617..b59117571b 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
@@ -18,10 +18,12 @@
  */
 package org.apache.asterix.external.writer.printer.parquet;
 
+import static 
org.apache.asterix.common.exceptions.ErrorCode.TYPE_UNSUPPORTED_PARQUET_WRITE;
 import static 
org.apache.asterix.external.writer.printer.parquet.ParquetSchemaTree.buildParquetSchema;
 
 import java.util.Map;
 
+import org.apache.asterix.common.exceptions.RuntimeDataException;
 import org.apache.asterix.om.lazy.AbstractLazyVisitablePointable;
 import org.apache.asterix.om.lazy.AbstractListLazyVisitablePointable;
 import org.apache.asterix.om.lazy.FlatLazyVisitablePointable;
@@ -104,6 +106,9 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
     public Void visit(FlatLazyVisitablePointable pointable, 
ParquetSchemaTree.SchemaNode schemaNode)
             throws HyracksDataException {
         if (schemaNode.getType() == null) {
+            if 
(!AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP.containsKey(pointable.getTypeTag()))
 {
+                throw 
RuntimeDataException.create(TYPE_UNSUPPORTED_PARQUET_WRITE, 
pointable.getTypeTag());
+            }
             schemaNode.setType(new ParquetSchemaTree.FlatType(
                     
AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP.get(pointable.getTypeTag()),
                     
AsterixParquetTypeMap.LOGICAL_TYPE_ANNOTATION_MAP.get(pointable.getTypeTag())));
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
index 0390315457..04e11f7e21 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
@@ -19,6 +19,7 @@
 package org.apache.asterix.external.writer.printer.parquet;
 
 import java.io.IOException;
+import java.util.UUID;
 
 import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.common.exceptions.RuntimeDataException;
@@ -186,6 +187,10 @@ public class ParquetValueWriter {
             case DATETIME:
                 long dateTimeValue = 
ADateTimeSerializerDeserializer.getChronon(b, s);
                 addIntegerType(dateTimeValue, primitiveTypeName, typeTag, 
recordConsumer);
+                break;
+            case UUID:
+                recordConsumer.addBinary(Binary.fromReusedByteArray(b, s, l));
+                break;
             case NULL:
             case MISSING:
                 break;

Reply via email to