This is an automated email from the ASF dual-hosted git repository.
htowaileb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new e57ee2d0c2 [ASTERIXDB-3392] Fix empty array writing
e57ee2d0c2 is described below
commit e57ee2d0c209cee78309209649043794d5169769
Author: preetham0202 <[email protected]>
AuthorDate: Tue Sep 3 17:16:14 2024 +0530
[ASTERIXDB-3392] Fix empty array writing
Ext-ref: MB-61498
Change-Id: I5407a3a8241160a3087caefc5a6952807f172d71
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18784
Reviewed-by: Hussain Towaileb <[email protected]>
Tested-by: Hussain Towaileb <[email protected]>
Integration-Tests: Hussain Towaileb <[email protected]>
---
.../parquet-empty-array.01.ddl.sqlpp | 29 +++++++++++++++++
.../parquet-empty-array.02.update.sqlpp | 22 +++++++++++++
.../parquet-empty-array.03.update.sqlpp | 38 ++++++++++++++++++++++
.../parquet-empty-array.04.ddl.sqlpp | 37 +++++++++++++++++++++
.../parquet-empty-array.05.query.sqlpp | 27 +++++++++++++++
.../parquet-empty-array/parquet-empty-array.05.adm | 2 ++
.../runtimets/testsuite_external_dataset_s3.xml | 5 +++
.../printer/parquet/ParquetRecordLazyVisitor.java | 22 +++++++------
8 files changed, 172 insertions(+), 10 deletions(-)
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.01.ddl.sqlpp
new file mode 100644
index 0000000000..6be9489092
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.01.ddl.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test if exists;
+CREATE DATAVERSE test;
+USE test;
+
+
+CREATE TYPE ColumnType1 AS {
+ id: integer
+};
+
+CREATE COLLECTION TestCollection(ColumnType1) PRIMARY KEY id;
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.02.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.02.update.sqlpp
new file mode 100644
index 0000000000..d569ee50f9
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.02.update.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+INSERT INTO TestCollection {"id":2,"name":{"first":["power","star"]}};
+INSERT INTO TestCollection {"id":5,"name":{"first":[]}};
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.03.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.03.update.sqlpp
new file mode 100644
index 0000000000..9a652d25ec
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.03.update.sqlpp
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+COPY (
+select c.* from TestCollection c
+ ) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-empty-array")
+TYPE ( { id : int, name : { first : [ string ] } } )
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet"
+ };
+
+
+
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.04.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.04.ddl.sqlpp
new file mode 100644
index 0000000000..f17c4d4494
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.04.ddl.sqlpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+CREATE TYPE ColumnType2 AS {
+ };
+
+
+
+CREATE EXTERNAL DATASET TestDataset(ColumnType2) USING S3
+(
+ ("serviceEndpoint"="http://127.0.0.1:8001"),
+ ("region"="us-west-2"),
+ ("container"="playground"),
+ ("definition"="copy-to-result/parquet-empty-array/"),
+ ("include"="*.parquet"),
+ ("requireVersionChangeDetection"="false"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.05.query.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.05.query.sqlpp
new file mode 100644
index 0000000000..b03fc5e726
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-empty-array/parquet-empty-array.05.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+SELECT c.*
+FROM TestDataset c
+ORDER BY c.id;
+
+
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-empty-array/parquet-empty-array.05.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-empty-array/parquet-empty-array.05.adm
new file mode 100644
index 0000000000..97de7e9ab6
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-empty-array/parquet-empty-array.05.adm
@@ -0,0 +1,2 @@
+{ "id": 2, "name": { "first": [ "power", "star" ] } }
+{ "id": 5, "name": { "first": [ ] } }
\ No newline at end of file
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index d02d551102..5501aec68c 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -59,6 +59,11 @@
<output-dir compare="Text">parquet-cover-data-types</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="copy-to">
+ <compilation-unit name="parquet-empty-array">
+ <output-dir compare="Text">parquet-empty-array</output-dir>
+ </compilation-unit>
+ </test-case>
<test-case FilePath="copy-to">
<compilation-unit name="empty-path">
<output-dir compare="Text">empty-path</output-dir>
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
index cf46cc61b6..f6e85ef432 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
@@ -122,23 +122,25 @@ public class ParquetRecordLazyVisitor implements
ILazyVisitablePointableVisitor<
}
recordConsumer.startGroup();
- recordConsumer.startField(LIST_FIELD,
groupType.getFieldIndex(LIST_FIELD));
- for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
- pointable.nextChild();
- AbstractLazyVisitablePointable child =
pointable.getChildVisitablePointable();
+ if (pointable.getNumberOfChildren() > 0) {
+ recordConsumer.startField(LIST_FIELD,
groupType.getFieldIndex(LIST_FIELD));
- recordConsumer.startGroup();
- recordConsumer.startField(ELEMENT_FIELD,
listType.getFieldIndex(ELEMENT_FIELD));
+ for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
+ pointable.nextChild();
+ AbstractLazyVisitablePointable child =
pointable.getChildVisitablePointable();
- child.accept(this, listType.getType(ELEMENT_FIELD));
+ recordConsumer.startGroup();
+ recordConsumer.startField(ELEMENT_FIELD,
listType.getFieldIndex(ELEMENT_FIELD));
+ child.accept(this, listType.getType(ELEMENT_FIELD));
+ recordConsumer.endField(ELEMENT_FIELD,
listType.getFieldIndex(ELEMENT_FIELD));
+ recordConsumer.endGroup();
- recordConsumer.endField(ELEMENT_FIELD,
listType.getFieldIndex(ELEMENT_FIELD));
- recordConsumer.endGroup();
+ }
+ recordConsumer.endField(LIST_FIELD,
groupType.getFieldIndex(LIST_FIELD));
}
- recordConsumer.endField(LIST_FIELD,
groupType.getFieldIndex(LIST_FIELD));
recordConsumer.endGroup();
return null;
}