>From <[email protected]>:

[email protected] has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19545 )


Change subject: [ASTERIXDB-3392] Error out on mixed-type arrays in Parquet
......................................................................

[ASTERIXDB-3392] Error out on mixed-type arrays in Parquet

Details:
Parquet does not support arrays with mixed data types. This patch introduces a 
check to detect such cases and explicitly error out when a user attempts to 
write an array containing different types.
Ext-ref: MB-65899

Change-Id: I07f382f802fd61e55c23c3ce23ccfcc634ede13d
---
M 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.18.ddl.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.22.update.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.21.update.sqlpp
M 
asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.20.update.sqlpp
M 
hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
M 
hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.19.update.sqlpp
10 files changed, 220 insertions(+), 5 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/45/19545/1

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
index e956812..9ebd473 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
@@ -26,7 +26,7 @@
 select id,name from TestCollection c
     ) toWriter
 TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-error-checks16")
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks17")
 TYPE ( { id:int, rect: rectangle })
 WITH {
     %template_colons%,
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.18.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.18.ddl.sqlpp
new file mode 100644
index 0000000..8e13397
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.18.ddl.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+CREATE COLLECTION TestCollection2(ColumnType1) PRIMARY KEY id;
+CREATE COLLECTION TestCollection3(ColumnType1) PRIMARY KEY id;
+CREATE COLLECTION TestCollection4(ColumnType1) PRIMARY KEY id;
+CREATE COLLECTION TestCollection5(ColumnType1) PRIMARY KEY id;
+
+
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.19.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.19.update.sqlpp
new file mode 100644
index 0000000..7add0a4
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.19.update.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+insert into TestCollection2({"id":10, "names": [ "Virat" , 18 ] });
+
+
+COPY (
+select * from TestCollection2 c
+    ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks19")
+WITH {
+    %template_colons%,
+    %additionalProperties%
+    "format":"parquet"
+    }
+
+
+
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.20.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.20.update.sqlpp
new file mode 100644
index 0000000..c0a8308
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.20.update.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+insert into TestCollection3({"id":10, "names": [ { "first":"Virat"} , 18 ] });
+
+
+COPY (
+select * from TestCollection3 c
+    ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks20")
+WITH {
+    %template_colons%,
+    %additionalProperties%
+    "format":"parquet"
+    }
+
+
+
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.21.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.21.update.sqlpp
new file mode 100644
index 0000000..8fc87b4
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.21.update.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+insert into TestCollection4({"id":10, "names": [  "Virat" , [18]] });
+
+
+COPY (
+select * from TestCollection4 c
+    ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks21")
+WITH {
+    %template_colons%,
+    %additionalProperties%
+    "format":"parquet"
+    }
+
+
+
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.22.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.22.update.sqlpp
new file mode 100644
index 0000000..ba501a2
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.22.update.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+insert into TestCollection5({"id":10, "names": [ 18,  { "first":"Virat"} ] });
+
+
+COPY (
+select * from TestCollection5 c
+    ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks22")
+WITH {
+    %template_colons%,
+    %additionalProperties%
+    "format":"parquet"
+    }
+
+
+
+
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index e8e89de..d993484 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -211,6 +211,10 @@
         <expected-error>HYR0133: Schema could not be inferred, empty types 
found in the result</expected-error>
         <expected-error>HYR0134: Schema Limit exceeded, maximum number of 
heterogeneous schemas allowed : '2'</expected-error>
         <expected-error>ASX1204: 'rectangle' type not supported in parquet 
format</expected-error>
+        <expected-error>HYR0136: Parquet does not support arrays containing 
mixed data types</expected-error>
+        <expected-error>HYR0136: Parquet does not support arrays containing 
mixed data types</expected-error>
+        <expected-error>HYR0136: Parquet does not support arrays containing 
mixed data types</expected-error>
+        <expected-error>HYR0136: Parquet does not support arrays containing 
mixed data types</expected-error>
       </compilation-unit>
     </test-case>
     <test-case FilePath="copy-to/negative">
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
index b591175..9da389c 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
@@ -62,7 +62,7 @@
             schemaNode.setType(new ParquetSchemaTree.RecordType());
         }
         if (!(schemaNode.getType() instanceof ParquetSchemaTree.RecordType)) {
-            throw new 
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+            throw new 
HyracksDataException(ErrorCode.UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
         ParquetSchemaTree.RecordType recordType = 
(ParquetSchemaTree.RecordType) schemaNode.getType();
         for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
@@ -88,7 +88,7 @@
             schemaNode.setType(new ParquetSchemaTree.ListType());
         }
         if (!(schemaNode.getType() instanceof ParquetSchemaTree.ListType)) {
-            throw new 
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+            throw new 
HyracksDataException(ErrorCode.UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
         ParquetSchemaTree.ListType listType = (ParquetSchemaTree.ListType) 
schemaNode.getType();
         for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
@@ -115,14 +115,14 @@
             return null;
         }
         if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType)) {
-            throw new 
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+            throw new 
HyracksDataException(ErrorCode.UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
         ParquetSchemaTree.FlatType flatType = (ParquetSchemaTree.FlatType) 
schemaNode.getType();
         if (!(flatType.getPrimitiveTypeName() == 
AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP
                 .get(pointable.getTypeTag()))
                 || !(flatType.getLogicalTypeAnnotation() == 
AsterixParquetTypeMap.LOGICAL_TYPE_ANNOTATION_MAP
                         .get(pointable.getTypeTag()))) {
-            throw new 
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+            throw new 
HyracksDataException(ErrorCode.UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
         return null;
     }
diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
index 7cb107d..6b8c823 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
@@ -162,6 +162,8 @@
     EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA(132),
     EMPTY_TYPE_INFERRED(133),
     SCHEMA_LIMIT_EXCEEDED(134),
+    FAILED_IO_OPERATION(135),
+    UNSUPPORTED_MIXED_TYPE_ARRAY(136),

     // Compilation error codes.
     RULECOLLECTION_NOT_INSTANCE_OF_LIST(10000),
diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index e1fbe30..89cf6c8f 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -152,6 +152,8 @@
 132 = Extra field in the result, field '%1$s' does not exist at '%2$s' in the 
schema
 133 = Schema could not be inferred, empty types found in the result
 134 = Schema Limit exceeded, maximum number of heterogeneous schemas allowed : 
'%1$s'
+135 = An IO Operation has failed
+136 = Parquet does not support arrays containing mixed data types

 10000 = The given rule collection %1$s is not an instance of the List class.
 10001 = Cannot compose partition constraint %1$s with %2$s

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19545
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: ionic
Gerrit-Change-Id: I07f382f802fd61e55c23c3ce23ccfcc634ede13d
Gerrit-Change-Number: 19545
Gerrit-PatchSet: 1
Gerrit-Owner: [email protected]
Gerrit-MessageType: newchange

Reply via email to