>From <[email protected]>:
[email protected] has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19545 )
Change subject: [ASTERIXDB-3392] Error out on mixed-type arrays in Parquet
......................................................................
[ASTERIXDB-3392] Error out on mixed-type arrays in Parquet
Details:
Parquet does not support arrays with mixed data types. This patch introduces a
check to detect such cases and explicitly error out when a user attempts to
write an array containing different types.
Ext-ref: MB-65899
Change-Id: I07f382f802fd61e55c23c3ce23ccfcc634ede13d
---
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
A
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.18.ddl.sqlpp
A
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.22.update.sqlpp
A
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.21.update.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
A
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.20.update.sqlpp
M
hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
M
hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
A
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.19.update.sqlpp
10 files changed, 220 insertions(+), 5 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/45/19545/1
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
index e956812..9ebd473 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.17.update.sqlpp
@@ -26,7 +26,7 @@
select id,name from TestCollection c
) toWriter
TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-error-checks16")
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks17")
TYPE ( { id:int, rect: rectangle })
WITH {
%template_colons%,
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.18.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.18.ddl.sqlpp
new file mode 100644
index 0000000..8e13397
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.18.ddl.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+CREATE COLLECTION TestCollection2(ColumnType1) PRIMARY KEY id;
+CREATE COLLECTION TestCollection3(ColumnType1) PRIMARY KEY id;
+CREATE COLLECTION TestCollection4(ColumnType1) PRIMARY KEY id;
+CREATE COLLECTION TestCollection5(ColumnType1) PRIMARY KEY id;
+
+
+
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.19.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.19.update.sqlpp
new file mode 100644
index 0000000..7add0a4
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.19.update.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+insert into TestCollection2({"id":10, "names": [ "Virat" , 18 ] });
+
+
+COPY (
+select * from TestCollection2 c
+ ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks19")
+WITH {
+ %template_colons%,
+ %additionalProperties%
+ "format":"parquet"
+ }
+
+
+
+
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.20.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.20.update.sqlpp
new file mode 100644
index 0000000..c0a8308
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.20.update.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+insert into TestCollection3({"id":10, "names": [ { "first":"Virat"} , 18 ] });
+
+
+COPY (
+select * from TestCollection3 c
+ ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks20")
+WITH {
+ %template_colons%,
+ %additionalProperties%
+ "format":"parquet"
+ }
+
+
+
+
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.21.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.21.update.sqlpp
new file mode 100644
index 0000000..8fc87b4
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.21.update.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+insert into TestCollection4({"id":10, "names": [ "Virat" , [18]] });
+
+
+COPY (
+select * from TestCollection4 c
+ ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks21")
+WITH {
+ %template_colons%,
+ %additionalProperties%
+ "format":"parquet"
+ }
+
+
+
+
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.22.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.22.update.sqlpp
new file mode 100644
index 0000000..ba501a2
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.22.update.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+insert into TestCollection5({"id":10, "names": [ 18, { "first":"Virat"} ] });
+
+
+COPY (
+select * from TestCollection5 c
+ ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks22")
+WITH {
+ %template_colons%,
+ %additionalProperties%
+ "format":"parquet"
+ }
+
+
+
+
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index e8e89de..d993484 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -211,6 +211,10 @@
<expected-error>HYR0133: Schema could not be inferred, empty types
found in the result</expected-error>
<expected-error>HYR0134: Schema Limit exceeded, maximum number of
heterogeneous schemas allowed : '2'</expected-error>
<expected-error>ASX1204: 'rectangle' type not supported in parquet
format</expected-error>
+ <expected-error>HYR0136: Parquet does not support arrays containing
mixed data types</expected-error>
+ <expected-error>HYR0136: Parquet does not support arrays containing
mixed data types</expected-error>
+ <expected-error>HYR0136: Parquet does not support arrays containing
mixed data types</expected-error>
+ <expected-error>HYR0136: Parquet does not support arrays containing
mixed data types</expected-error>
</compilation-unit>
</test-case>
<test-case FilePath="copy-to/negative">
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
index b591175..9da389c 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
@@ -62,7 +62,7 @@
schemaNode.setType(new ParquetSchemaTree.RecordType());
}
if (!(schemaNode.getType() instanceof ParquetSchemaTree.RecordType)) {
- throw new
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+ throw new
HyracksDataException(ErrorCode.UNSUPPORTED_MIXED_TYPE_ARRAY);
}
ParquetSchemaTree.RecordType recordType =
(ParquetSchemaTree.RecordType) schemaNode.getType();
for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
@@ -88,7 +88,7 @@
schemaNode.setType(new ParquetSchemaTree.ListType());
}
if (!(schemaNode.getType() instanceof ParquetSchemaTree.ListType)) {
- throw new
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+ throw new
HyracksDataException(ErrorCode.UNSUPPORTED_MIXED_TYPE_ARRAY);
}
ParquetSchemaTree.ListType listType = (ParquetSchemaTree.ListType)
schemaNode.getType();
for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
@@ -115,14 +115,14 @@
return null;
}
if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType)) {
- throw new
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+ throw new
HyracksDataException(ErrorCode.UNSUPPORTED_MIXED_TYPE_ARRAY);
}
ParquetSchemaTree.FlatType flatType = (ParquetSchemaTree.FlatType)
schemaNode.getType();
if (!(flatType.getPrimitiveTypeName() ==
AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP
.get(pointable.getTypeTag()))
|| !(flatType.getLogicalTypeAnnotation() ==
AsterixParquetTypeMap.LOGICAL_TYPE_ANNOTATION_MAP
.get(pointable.getTypeTag()))) {
- throw new
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA);
+ throw new
HyracksDataException(ErrorCode.UNSUPPORTED_MIXED_TYPE_ARRAY);
}
return null;
}
diff --git
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
index 7cb107d..6b8c823 100644
---
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
+++
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
@@ -162,6 +162,8 @@
EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA(132),
EMPTY_TYPE_INFERRED(133),
SCHEMA_LIMIT_EXCEEDED(134),
+ FAILED_IO_OPERATION(135),
+ UNSUPPORTED_MIXED_TYPE_ARRAY(136),
// Compilation error codes.
RULECOLLECTION_NOT_INSTANCE_OF_LIST(10000),
diff --git
a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index e1fbe30..89cf6c8f 100644
---
a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++
b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -152,6 +152,8 @@
132 = Extra field in the result, field '%1$s' does not exist at '%2$s' in the
schema
133 = Schema could not be inferred, empty types found in the result
134 = Schema Limit exceeded, maximum number of heterogeneous schemas allowed :
'%1$s'
+135 = An IO Operation has failed
+136 = Parquet does not support arrays containing mixed data types
10000 = The given rule collection %1$s is not an instance of the List class.
10001 = Cannot compose partition constraint %1$s with %2$s
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19545
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: ionic
Gerrit-Change-Id: I07f382f802fd61e55c23c3ce23ccfcc634ede13d
Gerrit-Change-Number: 19545
Gerrit-PatchSet: 1
Gerrit-Owner: [email protected]
Gerrit-MessageType: newchange