This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit f8e7c85cb2cbcf706eba43d6beb86d41387fb01e
Author: preetham0202 <[email protected]>
AuthorDate: Mon Mar 24 11:36:45 2025 +0530

    [ASTERIXDB-3392] Error out on mixed-type arrays in Parquet
    
    Details:
    Parquet does not support arrays with mixed data types. This patch 
introduces a check to detect such cases and explicitly error out when a user 
attempts to write an array containing different types.
    Ext-ref: MB-65899
    
    Change-Id: I07f382f802fd61e55c23c3ce23ccfcc634ede13d
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19545
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Hussain Towaileb <[email protected]>
    Reviewed-by: Hussain Towaileb <[email protected]>
---
 .../parquet-error-checks.17.update.sqlpp               |  2 +-
 ....update.sqlpp => parquet-error-checks.18.ddl.sqlpp} | 18 ++++--------------
 ...date.sqlpp => parquet-error-checks.19.update.sqlpp} | 11 ++++++-----
 ...date.sqlpp => parquet-error-checks.20.update.sqlpp} | 11 ++++++-----
 ...date.sqlpp => parquet-error-checks.21.update.sqlpp} | 11 ++++++-----
 ...date.sqlpp => parquet-error-checks.22.update.sqlpp} | 11 ++++++-----
 .../runtimets/testsuite_external_dataset_s3.xml        |  4 ++++
 .../apache/asterix/common/exceptions/ErrorCode.java    |  1 +
 .../src/main/resources/asx_errormsg/en.properties      |  1 +
 .../printer/parquet/ParquetSchemaLazyVisitor.java      | 10 +++++-----
 10 files changed, 40 insertions(+), 40 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
index e956812458..9ebd4730e3 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
@@ -26,7 +26,7 @@ COPY (
 select id,name from TestCollection c
     ) toWriter
 TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-error-checks16")
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks17")
 TYPE ( { id:int, rect: rectangle })
 WITH {
     %template_colons%,
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.18.ddl.sqlpp
similarity index 74%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.18.ddl.sqlpp
index e956812458..8e13397a57 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.18.ddl.sqlpp
@@ -21,20 +21,10 @@ USE test;
 
 
 
-
-COPY (
-select id,name from TestCollection c
-    ) toWriter
-TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-error-checks16")
-TYPE ( { id:int, rect: rectangle })
-WITH {
-    %template_colons%,
-    %additionalProperties%
-    "format":"parquet",
-    "max-schemas" : "2"
-    }
-
+CREATE COLLECTION TestCollection2(ColumnType1) PRIMARY KEY id;
+CREATE COLLECTION TestCollection3(ColumnType1) PRIMARY KEY id;
+CREATE COLLECTION TestCollection4(ColumnType1) PRIMARY KEY id;
+CREATE COLLECTION TestCollection5(ColumnType1) PRIMARY KEY id;
 
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.19.update.sqlpp
similarity index 83%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.19.update.sqlpp
index e956812458..7add0a4459 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.19.update.sqlpp
@@ -22,17 +22,18 @@ USE test;
 
 
 
+insert into TestCollection2({"id":10, "names": [ "Virat" , 18 ] });
+
+
 COPY (
-select id,name from TestCollection c
+select * from TestCollection2 c
     ) toWriter
 TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-error-checks16")
-TYPE ( { id:int, rect: rectangle })
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks19")
 WITH {
     %template_colons%,
     %additionalProperties%
-    "format":"parquet",
-    "max-schemas" : "2"
+    "format":"parquet"
     }
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.20.update.sqlpp
similarity index 82%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.20.update.sqlpp
index e956812458..c0a8308929 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.20.update.sqlpp
@@ -22,17 +22,18 @@ USE test;
 
 
 
+insert into TestCollection3({"id":10, "names": [ { "first":"Virat"} , 18 ] });
+
+
 COPY (
-select id,name from TestCollection c
+select * from TestCollection3 c
     ) toWriter
 TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-error-checks16")
-TYPE ( { id:int, rect: rectangle })
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks20")
 WITH {
     %template_colons%,
     %additionalProperties%
-    "format":"parquet",
-    "max-schemas" : "2"
+    "format":"parquet"
     }
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.21.update.sqlpp
similarity index 82%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.21.update.sqlpp
index e956812458..8fc87b47c6 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.21.update.sqlpp
@@ -22,17 +22,18 @@ USE test;
 
 
 
+insert into TestCollection4({"id":10, "names": [  "Virat" , [18]] });
+
+
 COPY (
-select id,name from TestCollection c
+select * from TestCollection4 c
     ) toWriter
 TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-error-checks16")
-TYPE ( { id:int, rect: rectangle })
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks21")
 WITH {
     %template_colons%,
     %additionalProperties%
-    "format":"parquet",
-    "max-schemas" : "2"
+    "format":"parquet"
     }
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.22.update.sqlpp
similarity index 82%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.22.update.sqlpp
index e956812458..ba501a26c9 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.22.update.sqlpp
@@ -22,17 +22,18 @@ USE test;
 
 
 
+insert into TestCollection5({"id":10, "names": [ 18,  { "first":"Virat"} ] });
+
+
 COPY (
-select id,name from TestCollection c
+select * from TestCollection5 c
     ) toWriter
 TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-error-checks16")
-TYPE ( { id:int, rect: rectangle })
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks22")
 WITH {
     %template_colons%,
     %additionalProperties%
-    "format":"parquet",
-    "max-schemas" : "2"
+    "format":"parquet"
     }
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index db72e10aba..72d2575647 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -221,6 +221,10 @@
         <expected-error>HYR0133: Schema could not be inferred, empty types 
found in the result</expected-error>
         <expected-error>HYR0134: Schema Limit exceeded, maximum number of 
heterogeneous schemas allowed : '2'</expected-error>
         <expected-error>ASX1204: 'rectangle' type not supported in parquet 
format</expected-error>
+        <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
+        <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
+        <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
+        <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
       </compilation-unit>
     </test-case>
     <test-case FilePath="copy-to/negative">
diff --git 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index 81b71ed9eb..46d63bb072 100644
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -100,6 +100,7 @@ public enum ErrorCode implements IError {
     FAILED_TO_READ_KEY(69),
     AVRO_SUPPORTED_TYPE_WITH_OPTION(70),
     CLOUD_IO_FAILURE(71),
+    PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY(72),
     UNSUPPORTED_JRE(100),
 
     EXTERNAL_UDF_RESULT_TYPE_ERROR(200),
diff --git 
a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties 
b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index c9b87265ca..d6a171ff78 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -106,6 +106,7 @@
 69 = Failed to read key. Reason: %1$s.
 70 = Avro type '%1$s' is not supported by default. To enable type conversion, 
recreate the external dataset with the option '%2$s' enabled
 71 = Cloud I/O '%1$s' operation failed for file '%2$s' while operating on 
files '%3$s'.
+72 = Parquet does not support arrays containing mixed data types
 
 100 = Unsupported JRE: %1$s
 
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
index 70872bb2ec..9ea6d77b0a 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
@@ -18,6 +18,7 @@
  */
 package org.apache.asterix.external.writer.printer.parquet;
 
+import static 
org.apache.asterix.common.exceptions.ErrorCode.PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY;
 import static 
org.apache.asterix.common.exceptions.ErrorCode.TYPE_UNSUPPORTED_PARQUET_WRITE;
 import static 
org.apache.asterix.external.writer.printer.parquet.ParquetSchemaTree.buildParquetSchema;
 
@@ -33,7 +34,6 @@ import 
org.apache.asterix.om.lazy.TypedRecordLazyVisitablePointable;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.IAType;
-import org.apache.hyracks.api.exceptions.ErrorCode;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.api.IValueReference;
 import org.apache.parquet.schema.MessageType;
@@ -63,7 +63,7 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
             schemaNode.setType(new ParquetSchemaTree.RecordType());
         }
         if (!(schemaNode.getType() instanceof ParquetSchemaTree.RecordType)) {
-            throw new 
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+            throw 
RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
         ParquetSchemaTree.RecordType recordType = 
(ParquetSchemaTree.RecordType) schemaNode.getType();
         for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
@@ -89,7 +89,7 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
             schemaNode.setType(new ParquetSchemaTree.ListType());
         }
         if (!(schemaNode.getType() instanceof ParquetSchemaTree.ListType)) {
-            throw new 
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+            throw 
RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
         ParquetSchemaTree.ListType listType = (ParquetSchemaTree.ListType) 
schemaNode.getType();
         for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
@@ -116,14 +116,14 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
             return null;
         }
         if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType)) {
-            throw new 
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+            throw 
RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
         ParquetSchemaTree.FlatType flatType = (ParquetSchemaTree.FlatType) 
schemaNode.getType();
         if (!(flatType.getPrimitiveTypeName() == 
AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP
                 .get(pointable.getTypeTag()))
                 || !(flatType.getLogicalTypeAnnotation() == 
AsterixParquetTypeMap.LOGICAL_TYPE_ANNOTATION_MAP
                         .get(pointable.getTypeTag()))) {
-            throw new 
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+            throw 
RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
         return null;
     }

Reply via email to