This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit e865c48b9582b4b41097f0e1fad97dbf10f9ee46 Author: preetham0202 <preetham.polupar...@couchbase.com> AuthorDate: Wed May 14 14:44:45 2025 +0530 [ASTERIXDB-3392] Handle NULL/Empty types in Copy to parquet - user model changes: no - storage format changes: no - interface changes: no Details : Fix to handle null types especially when inside array types in copy to parquet. Ext-ref: MB-66710 Change-Id: I79af2b66200063b6d09fe0cd2dfda3d9e7939925 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20029 Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Reviewed-by: Ali Alsuliman <ali.al.solai...@gmail.com> Reviewed-by: Preetham Poluparthi <preetha...@apache.org> Tested-by: Preetham Poluparthi <preetha...@apache.org> --- .../parquet-error-checks.23.ddl.sqlpp} | 15 +- .../parquet-error-checks.24.update.sqlpp} | 17 +- .../parquet-error-checks.25.update.sqlpp} | 16 +- .../parquet-heterogeneous.02.update.sqlpp | 20 +-- .../parquet-null-type.01.ddl.sqlpp} | 19 +-- .../parquet-null-type.02.update.sqlpp | 29 ++++ .../parquet-null-type.03.update.sqlpp} | 30 +++- .../parquet-null-type.04.ddl.sqlpp} | 23 +-- .../parquet-null-type.05.query.sqlpp} | 14 +- .../parquet-null-type.06.update.sqlpp} | 9 +- .../parquet-null-type.07.ddl.sqlpp} | 20 +-- .../parquet-null-type.08.query.sqlpp} | 14 +- .../parquet-tweet/parquet-tweet.03.update.sqlpp | 181 --------------------- .../parquet-type-hierarchy.02.update.sqlpp | 2 +- .../parquet-utf8/parquet-utf8.03.update.sqlpp | 1 - .../parquet-tweet/parquet-tweet.05.adm | 4 +- .../parquet-null-type/parquet-null-type.05.adm | 5 + .../parquet-null-type/parquet-null-type.08.adm | 5 + .../copy-to/parquet-tweet/parquet-tweet.05.adm | 4 +- .../parquet-type-hierarchy.05.adm | 8 +- .../copy-to/parquet-utf8/parquet-utf8.05.adm | 16 +- .../runtimets/testsuite_external_dataset_s3.xml | 12 ++ .../parquet/ParquetSchemaInferPoolWriter.java | 11 +- .../writer/printer/ParquetExternalFilePrinter.java | 5 +- .../printer/parquet/ParquetRecordLazyVisitor.java | 146 ++++++++++++++++- .../printer/parquet/ParquetSchemaLazyVisitor.java | 62 ++++++- .../writer/printer/parquet/ParquetSchemaTree.java | 20 +++ .../printer/parquet/SchemaCheckerLazyVisitor.java | 47 ++++-- 28 files changed, 421 insertions(+), 334 deletions(-) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.23.ddl.sqlpp similarity index 76% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.23.ddl.sqlpp index ad94373b1d..a2dbe7f51d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.23.ddl.sqlpp @@ -19,17 +19,10 @@ USE test; -COPY ( - SELECT c.* FROM NameCommentDataset c -) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-utf8") -TYPE ( { comment:string, id:bigint, name:string } ) -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" -}; + + +CREATE COLLECTION TestCollection6(ColumnType1) PRIMARY KEY id; +CREATE COLLECTION TestCollection7(ColumnType1) PRIMARY KEY id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.24.update.sqlpp similarity index 79% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.24.update.sqlpp index ad94373b1d..b9bd7124af 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.24.update.sqlpp @@ -19,17 +19,24 @@ USE test; + + + +insert into TestCollection6({"id":10, "ranks": [ 1 , 18 ] }); + + COPY ( - SELECT c.* FROM NameCommentDataset c -) toWriter +select * from TestCollection6 c + ) toWriter TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-utf8") -TYPE ( { comment:string, id:bigint, name:string } ) +PATH (%pathprefix% "copy-to-result", "parquet-error-checks24") +TYPE ( { id : int, name : string , ranks : [int] } ) WITH { %template_colons%, %additionalProperties% "format":"parquet" -}; + } + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.25.update.sqlpp similarity index 82% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.25.update.sqlpp index ad94373b1d..acc640430d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.25.update.sqlpp @@ -19,17 +19,23 @@ USE test; + + + +insert into TestCollection7({"id":10, "ranks": [ 1 , missing, 2 ] }); + + COPY ( - SELECT c.* FROM NameCommentDataset c -) toWriter +select * from TestCollection7 c + ) toWriter TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-utf8") -TYPE ( { comment:string, id:bigint, name:string } ) +PATH (%pathprefix% "copy-to-result", "parquet-error-checks25") WITH { %template_colons%, %additionalProperties% "format":"parquet" -}; + } + diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-heterogeneous/parquet-heterogeneous.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-heterogeneous/parquet-heterogeneous.02.update.sqlpp index 308d3b99aa..3483cf66eb 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-heterogeneous/parquet-heterogeneous.02.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-heterogeneous/parquet-heterogeneous.02.update.sqlpp @@ -35,16 +35,16 @@ use test; insert into TestCollection({"id":2}); -insert into TestCollection({"id":5,"name":"virat"}); -insert into TestCollection({"id":8,"name":{"first":"virat"}}); -insert into TestCollection({"id":10,"name":{"first":"virat"},"age":18}); -insert into TestCollection({"id":12,"name":123}); -insert into TestCollection({"id":15,"name":[123,456]}); -insert into TestCollection({"id":17,"name":765}); -insert into TestCollection({"id":20,"name":[789]}); -insert into TestCollection({"id":21,"name":[{"first":"virat"}]}); -insert into TestCollection({"id":27,"name":[{"first":"virat","second":"kohli"}]}); -insert into TestCollection({"id":28,"name":{"first":"virat"}}); +insert into TestCollection({"id":5, "name":"virat"}); +insert into TestCollection({"id":8, "name":{"first":"virat"}}); +insert into TestCollection({"id":10, "name":{"first":"virat"},"age":18}); +insert into TestCollection({"id":12, "name":123}); +insert into TestCollection({"id":15, "name":[123,456]}); +insert into TestCollection({"id":17, "name":765}); +insert into TestCollection({"id":20, "name":[789]}); +insert into TestCollection({"id":21, "name":[{"first":"virat"}]}); +insert into TestCollection({"id":27, "name":[{"first":"virat","second":"kohli"}]}); +insert into TestCollection({"id":28, "name":{"first":"virat"}}); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.01.ddl.sqlpp similarity index 75% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.01.ddl.sqlpp index ad94373b1d..f4a6aaed14 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.01.ddl.sqlpp @@ -17,19 +17,14 @@ * under the License. */ +DROP DATAVERSE test if exists; +CREATE DATAVERSE test; USE test; -COPY ( - SELECT c.* FROM NameCommentDataset c -) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-utf8") -TYPE ( { comment:string, id:bigint, name:string } ) -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" -}; - +CREATE TYPE ColumnType1 AS { + id: integer + ,a : {b: int?, missing_test: int?}? + }; +CREATE COLLECTION TestCollection(ColumnType1) PRIMARY KEY id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.02.update.sqlpp new file mode 100644 index 0000000000..c6d33bb452 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.02.update.sqlpp @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + + + +insert into TestCollection( {"id":8, "name":null, "nested" : { "List":[100,null,300], "A" : null , "randomK" : null } , "obj_array" : [{"first":"first"},{"first":null},{"first":"second"}] , "a" : { "b" : null } , "c" : null , "f" : null , "arr" : [[1,null,2], null] }); +insert into TestCollection( {"id":10,"name":"Virat", "nested" : { "List":[] , "A" : null , "randomK" : null } , "obj_array": [{"first":"first"},{"first":"second"}] , "a" : { "b" : null } , "c" : { "d" : null , "e" : null } , "f" : [] ,"arr" : [ [1,2], [] ] } ); +insert into TestCollection({"id":28,"name":"Virat", "nested" : { "List":[null] , "A" : "a" , "randomK" : null } , "obj_array": [{"first":"first"},{"first":"second"}] , "a" : {"b" : 1} , "c" : { "d" : 1 , "e" : null } , "f" : [1,null] , "arr" : [ [1,2], [null] ] } ); +insert into TestCollection({"id":34, "name":null ,"nested" : { "List":null , "A" : null , "randomK" : "randomV" } , "obj_array": [{"first":"first"},{"first":"second"}] , "a" : null , "c" : { "d" : null , "e" : 1 } , "f" : [2,null,3.0] , "arr" : [[]] }); +insert into TestCollection({"id":37 , "name" : "Kohli", "nested" : { "List":[1,2,3] , "A" : "a" , "randomK" : null } , "obj_array": [{"first":"first"},{"first":"second"}] , "a" : { "b" : 1 } , "c" : { "d" : 1 , "e" : 1 }, "f" : [3.6,4.0] , "arr" : [[1,2,3]] }); +-- insert into TestCollection({"id":41 }); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.03.update.sqlpp similarity index 66% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.03.update.sqlpp index ad94373b1d..6c5bf3d4bb 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.03.update.sqlpp @@ -20,16 +20,36 @@ USE test; COPY ( - SELECT c.* FROM NameCommentDataset c -) toWriter +select c.* from TestCollection c + ) toWriter TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-utf8") -TYPE ( { comment:string, id:bigint, name:string } ) +PATH (%pathprefix% "copy-to-result", "parquet-null-type") +TYPE ( + { + id : int, + name : string, + nested : { + List : [ int ], + A : string, + randomK : string + }, + obj_array : [ { first : string } ], + a : { + b : int + }, + c : { + d : int, + e : int + }, + f : [ float ], + arr : [[int]] + } +) WITH { %template_colons%, %additionalProperties% "format":"parquet" -}; + }; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.04.ddl.sqlpp similarity index 72% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.04.ddl.sqlpp index ad94373b1d..077374510a 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.04.ddl.sqlpp @@ -19,17 +19,18 @@ USE test; -COPY ( - SELECT c.* FROM NameCommentDataset c -) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-utf8") -TYPE ( { comment:string, id:bigint, name:string } ) -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" -}; +CREATE TYPE ColumnType2 AS { + }; + +CREATE EXTERNAL DATASET TestDataset(ColumnType2) USING %adapter% +( + %template%, + %additional_Properties%, + ("definition"="%path_prefix%copy-to-result/parquet-null-type/"), + ("include"="*.parquet"), + ("requireVersionChangeDetection"="false"), + ("format" = "parquet") +); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.05.query.sqlpp similarity index 76% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.05.query.sqlpp index ad94373b1d..b03fc5e726 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.05.query.sqlpp @@ -19,17 +19,9 @@ USE test; -COPY ( - SELECT c.* FROM NameCommentDataset c -) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-utf8") -TYPE ( { comment:string, id:bigint, name:string } ) -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" -}; +SELECT c.* +FROM TestDataset c +ORDER BY c.id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.06.update.sqlpp similarity index 85% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.06.update.sqlpp index ad94373b1d..9e508ad87e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.06.update.sqlpp @@ -20,16 +20,15 @@ USE test; COPY ( - SELECT c.* FROM NameCommentDataset c -) toWriter +select c.* from TestCollection c + ) toWriter TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-utf8") -TYPE ( { comment:string, id:bigint, name:string } ) +PATH (%pathprefix% "copy-to-result", "parquet-null-type-schemaless") WITH { %template_colons%, %additionalProperties% "format":"parquet" -}; + }; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.07.ddl.sqlpp similarity index 73% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.07.ddl.sqlpp index ad94373b1d..1922853ffd 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.07.ddl.sqlpp @@ -19,17 +19,15 @@ USE test; -COPY ( - SELECT c.* FROM NameCommentDataset c -) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-utf8") -TYPE ( { comment:string, id:bigint, name:string } ) -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" -}; +CREATE EXTERNAL DATASET TestDatasetSchemaless(ColumnType2) USING %adapter% +( + %template%, + %additional_Properties%, + ("definition"="%path_prefix%copy-to-result/parquet-null-type-schemaless/"), + ("include"="*.parquet"), + ("requireVersionChangeDetection"="false"), + ("format" = "parquet") +); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.08.query.sqlpp similarity index 76% copy from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.08.query.sqlpp index ad94373b1d..670cba6b04 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.08.query.sqlpp @@ -19,17 +19,9 @@ USE test; -COPY ( - SELECT c.* FROM NameCommentDataset c -) toWriter -TO %adapter% -PATH (%pathprefix% "copy-to-result", "parquet-utf8") -TYPE ( { comment:string, id:bigint, name:string } ) -WITH { - %template_colons%, - %additionalProperties% - "format":"parquet" -}; +SELECT c.* +FROM TestDatasetSchemaless c +ORDER BY c.id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.03.update.sqlpp index 28872dda0c..1a93794d24 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.03.update.sqlpp @@ -24,187 +24,6 @@ COPY ( ) toWriter TO %adapter% PATH (%pathprefix% "copy-to-result", "parquet-tweet") -TYPE ( { - coordinates: { - coordinates: [ - double - ], - `type` : string - }, - created_at: string, - entities: { - urls: [ - { - display_url: string, - expanded_url: string, - indices: [ - int - ], - url: string - } - ], - user_mentions: [ - { - id: int, - id_str: string, - indices: [ - int - ], - name: string, - screen_name: string - } - ] - }, - favorite_count: int, - favorited: boolean, - filter_level: string, - geo: { - coordinates: [ - double - ], - `type`: string - }, - id: string, - id_str: string, - in_reply_to_screen_name: string, - in_reply_to_status_id: int, - in_reply_to_status_id_str: string, - in_reply_to_user_id: int, - in_reply_to_user_id_str: string, - is_quote_status: boolean, - lang: string, - place: { - bounding_box: { - coordinates: [ - [ - [ - double - ] - ] - ], - `type`: string - }, - country: string, - country_code: string, - full_name: string, - id: string, - name: string, - place_type: string, - url: string - }, - possibly_sensitive: boolean, - quoted_status: { - created_at: string, - entities: { - user_mentions: [ - { - id: int, - id_str: string, - indices: [ - int - ], - name: string, - screen_name: string - } - ] - }, - favorite_count: int, - favorited: boolean, - filter_level: string, - id: int, - id_str: string, - in_reply_to_screen_name: string, - in_reply_to_status_id: int, - in_reply_to_status_id_str: string, - in_reply_to_user_id: int, - in_reply_to_user_id_str: string, - is_quote_status: boolean, - lang: string, - retweet_count: int, - retweeted: boolean, - source: string, - text: string, - truncated: boolean, - user: { - contributors_enabled: boolean, - created_at: string, - default_profile: boolean, - default_profile_image: boolean, - description: string, - favourites_count: int, - followers_count: int, - friends_count: int, - geo_enabled: boolean, - id: int, - id_str: string, - is_translator: boolean, - lang: string, - listed_count: int, - name: string, - profile_background_color: string, - profile_background_image_url: string, - profile_background_image_url_https: string, - profile_background_tile: boolean, - profile_banner_url: string, - profile_image_url: string, - profile_image_url_https: string, - profile_link_color: string, - profile_sidebar_border_color: string, - profile_sidebar_fill_color: string, - profile_text_color: string, - profile_use_background_image: boolean, - protected: boolean, - screen_name: string, - statuses_count: int, - verified: boolean - } - }, - quoted_status_id: int, - quoted_status_id_str: string, - retweet_count: int, - retweeted: boolean, - source: string, - text: string, - timestamp_ms: string, - truncated: boolean, - user: { - contributors_enabled: boolean, - created_at: string, - default_profile: boolean, - default_profile_image: boolean, - description: string, - favourites_count: int, - followers_count: int, - friends_count: int, - geo_enabled: boolean, - id: int, - id_str: string, - is_translator: boolean, - lang: string, - listed_count: int, - location: string, - name: string, - profile_background_color: string, - profile_background_image_url: string, - profile_background_image_url_https: string, - profile_background_tile: boolean, - profile_banner_url: string, - profile_image_url: string, - profile_image_url_https: string, - profile_link_color: string, - profile_sidebar_border_color: string, - profile_sidebar_fill_color: string, - profile_text_color: string, - profile_use_background_image: boolean, - protected: boolean, - screen_name: string, - statuses_count: int, - time_zone: string, - url: string, - utc_offset: int, - verified: boolean - } - } ) WITH { %template_colons%, %additionalProperties% diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.02.update.sqlpp index 9a6f3c408d..b0f7cb402c 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.02.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.02.update.sqlpp @@ -20,7 +20,7 @@ use test; -insert into TestCollection({"id":2}); +insert into TestCollection({"id":2, "rating" : null, "ratings" : [null]}); insert into TestCollection({"id":5,"rating" :1 , "ratings" : [] }); insert into TestCollection({"id":8,"rating" :2 , "ratings" : [ 1 ] }); insert into TestCollection({"id":10,"rating" :3.0 , "ratings" : [ 1, 2, 3] }); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp index ad94373b1d..a1283b6b14 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp @@ -24,7 +24,6 @@ COPY ( ) toWriter TO %adapter% PATH (%pathprefix% "copy-to-result", "parquet-utf8") -TYPE ( { comment:string, id:bigint, name:string } ) WITH { %template_colons%, %additionalProperties% diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-tweet/parquet-tweet.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-tweet/parquet-tweet.05.adm index 5e0df967f3..aec4326c48 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-tweet/parquet-tweet.05.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-tweet/parquet-tweet.05.adm @@ -1,2 +1,2 @@ -{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "entities": { "urls": [ { "display_url": "string", "expanded_url": "string", "indices": [ 1 ], "url": "string" } ], "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "0000000", "id_str": "string", "in_reply_to_scr [...] -{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "11111111111111111111", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": [...] \ No newline at end of file +{ "quoted_status": { "in_reply_to_status_id_str": "string", "in_reply_to_status_id": 1, "created_at": "string", "in_reply_to_user_id_str": "string", "truncated": true, "source": "string", "retweet_count": 1, "retweeted": true, "filter_level": "string", "in_reply_to_screen_name": "string", "is_quote_status": true, "entities": { "user_mentions": [ { "indices": [ 1 ], "screen_name": "string", "id_str": "string", "name": "string", "id": 1 } ] }, "id_str": "string", "in_reply_to_user_id": 1, [...] +{ "quoted_status": { "in_reply_to_status_id_str": "string", "in_reply_to_status_id": 1, "created_at": "string", "in_reply_to_user_id_str": "string", "truncated": true, "source": "string", "retweet_count": 1, "retweeted": true, "filter_level": "string", "in_reply_to_screen_name": "string", "is_quote_status": true, "entities": { "user_mentions": [ { "indices": [ 1 ], "screen_name": "string", "id_str": "string", "name": "string", "id": 1 } ] }, "id_str": "string", "in_reply_to_user_id": 1, [...] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm new file mode 100644 index 0000000000..763b652306 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm @@ -0,0 +1,5 @@ +{ "id": 8, "nested": { "List": [ 100, 300 ] }, "obj_array": [ { "first": "first" }, { }, { "first": "second" } ], "a": { }, "arr": [ [ 1, 2 ] ] } +{ "id": 10, "name": "Virat", "nested": { "List": [ ] }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { }, "c": { }, "f": [ ], "arr": [ [ 1, 2 ], [ ] ] } +{ "id": 28, "name": "Virat", "nested": { "List": [ ], "A": "a" }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { "b": 1 }, "c": { "d": 1 }, "f": [ 1.0 ], "arr": [ [ 1, 2 ], [ ] ] } +{ "id": 34, "nested": { "randomK": "randomV" }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "c": { "e": 1 }, "f": [ 2.0, 3.0 ], "arr": [ [ ] ] } +{ "id": 37, "name": "Kohli", "nested": { "List": [ 1, 2, 3 ], "A": "a" }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { "b": 1 }, "c": { "d": 1, "e": 1 }, "f": [ 3.5999999046325684, 4.0 ], "arr": [ [ 1, 2, 3 ] ] } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm new file mode 100644 index 0000000000..628b82f725 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm @@ -0,0 +1,5 @@ +{ "arr": [ [ 1, 2 ] ], "a": { }, "id": 8, "nested": { "List": [ 100, 300 ] }, "obj_array": [ { "first": "first" }, { }, { "first": "second" } ] } +{ "arr": [ [ 1, 2 ], [ ] ], "a": { }, "c": { }, "f": [ ], "name": "Virat", "id": 10, "nested": { "List": [ ] }, "obj_array": [ { "first": "first" }, { "first": "second" } ] } +{ "arr": [ [ 1, 2 ], [ ] ], "a": { "b": 1 }, "c": { "d": 1 }, "f": [ 1.0 ], "name": "Virat", "id": 28, "nested": { "A": "a", "List": [ ] }, "obj_array": [ { "first": "first" }, { "first": "second" } ] } +{ "arr": [ [ ] ], "c": { "e": 1 }, "f": [ 2.0, 3.0 ], "id": 34, "nested": { "randomK": "randomV" }, "obj_array": [ { "first": "first" }, { "first": "second" } ] } +{ "arr": [ [ 1, 2, 3 ] ], "a": { "b": 1 }, "c": { "d": 1, "e": 1 }, "f": [ 3.6, 4.0 ], "name": "Kohli", "id": 37, "nested": { "A": "a", "List": [ 1, 2, 3 ] }, "obj_array": [ { "first": "first" }, { "first": "second" } ] } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-tweet/parquet-tweet.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-tweet/parquet-tweet.05.adm index 5e0df967f3..aec4326c48 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-tweet/parquet-tweet.05.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-tweet/parquet-tweet.05.adm @@ -1,2 +1,2 @@ -{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "entities": { "urls": [ { "display_url": "string", "expanded_url": "string", "indices": [ 1 ], "url": "string" } ], "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "0000000", "id_str": "string", "in_reply_to_scr [...] -{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "11111111111111111111", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": [...] \ No newline at end of file +{ "quoted_status": { "in_reply_to_status_id_str": "string", "in_reply_to_status_id": 1, "created_at": "string", "in_reply_to_user_id_str": "string", "truncated": true, "source": "string", "retweet_count": 1, "retweeted": true, "filter_level": "string", "in_reply_to_screen_name": "string", "is_quote_status": true, "entities": { "user_mentions": [ { "indices": [ 1 ], "screen_name": "string", "id_str": "string", "name": "string", "id": 1 } ] }, "id_str": "string", "in_reply_to_user_id": 1, [...] +{ "quoted_status": { "in_reply_to_status_id_str": "string", "in_reply_to_status_id": 1, "created_at": "string", "in_reply_to_user_id_str": "string", "truncated": true, "source": "string", "retweet_count": 1, "retweeted": true, "filter_level": "string", "in_reply_to_screen_name": "string", "is_quote_status": true, "entities": { "user_mentions": [ { "indices": [ 1 ], "screen_name": "string", "id_str": "string", "name": "string", "id": 1 } ] }, "id_str": "string", "in_reply_to_user_id": 1, [...] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm index 5c4c334aca..4fd973e9bc 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm @@ -1,11 +1,11 @@ -{ "id": 2 } +{ "ratings": [ ], "id": 2 } { "ratings": [ ], "rating": 1.0, "id": 5 } -{ "ratings": [ 1 ], "rating": 2, "id": 8 } -{ "ratings": [ 1, 2, 3 ], "rating": 3, "id": 10 } +{ "ratings": [ 1 ], "rating": 2.0, "id": 8 } +{ "ratings": [ 1, 2, 3 ], "rating": 3.0, "id": 10 } { "ratings": [ 1.0, 2.0, 3.0, 4.0, 5.0 ], "rating": 4.3, "id": 12 } { "ratings": [ 1.0, 2.0, 3.0, 4.0, 5.0 ], "rating": 4.7, "id": 15 } { "ratings": [ 1.1111, 2.222222, 3.3333, 4.44444, 5.555555 ], "rating": 4.22222, "id": 17 } { "ratings": [ 1.0, 2.0, 3.0, 4.0, 5.0 ], "rating": 5.455555555, "id": 20 } { "ratings": [ 0.0, 6.7 ], "rating": 1.0, "id": 21 } { "ratings": [ 1.0 ], "rating": 8.0, "id": 27 } -{ "ratings": [ ], "rating": 3, "id": 28 } +{ "ratings": [ ], "rating": 3.0, "id": 28 } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-utf8/parquet-utf8.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-utf8/parquet-utf8.05.adm index c60145d7f4..3ea2e8d765 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-utf8/parquet-utf8.05.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-utf8/parquet-utf8.05.adm @@ -1,8 +1,8 @@ -{ "id": 1, "name": "John" } -{ "id": 2, "name": "Abel" } -{ "id": 3, "name": "Sandy" } -{ "id": 4, "name": "Alex" } -{ "id": 5, "name": "Mike" } -{ "id": 6, "name": "Tom" } -{ "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا", "id": 7, "name": "Jerry" } -{ "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffe e ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حس [...] +{ "name": "John", "id": 1 } +{ "name": "Abel", "id": 2 } +{ "name": "Sandy", "id": 3 } +{ "name": "Alex", "id": 4 } +{ "name": "Mike", "id": 5 } +{ "name": "Tom", "id": 6 } +{ "name": "Jerry", "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا", "id": 7 } +{ "name": "William", "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢 😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢 [...] diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index ae0a5207b2..a1ca8038a8 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -114,6 +114,16 @@ <output-dir compare="Text">parquet-cover-data-types</output-dir> </compilation-unit> </test-case> + <test-case FilePath="copy-to"> + <compilation-unit name="parquet-null-type"> + <placeholder name="adapter" value="S3" /> + <placeholder name="pathprefix" value="" /> + <placeholder name="path_prefix" value="" /> + <placeholder name="additionalProperties" value='"container":"playground",' /> + <placeholder name="additional_Properties" value='("container"="playground")' /> + <output-dir compare="Text">parquet-null-type</output-dir> + </compilation-unit> + </test-case> <test-case FilePath="copy-to"> <compilation-unit name="parquet-file-writers"> <placeholder name="adapter" value="S3" /> @@ -256,6 +266,8 @@ <expected-error>ASX0072: Parquet does not support arrays containing mixed data types</expected-error> <expected-error>ASX0072: Parquet does not support arrays containing mixed data types</expected-error> <expected-error>ASX0072: Parquet does not support arrays containing mixed data types</expected-error> + <expected-error>HYR0132: Extra field in the result, field 'c' does not exist at 'root' in the schema</expected-error> + <expected-error>ASX0072: Parquet does not support arrays containing mixed data types</expected-error> </compilation-unit> </test-case> <test-case FilePath="copy-to/negative"> diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java index ca87cede48..221042aa22 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java @@ -37,11 +37,11 @@ import org.apache.logging.log4j.Logger; public class ParquetSchemaInferPoolWriter { private static final Logger LOGGER = LogManager.getLogger(); private final ParquetExternalWriterFactory writerFactory; - private List<ParquetSchemaTree.SchemaNode> schemaNodes; - private List<IExternalWriter> writerList; + private final List<ParquetSchemaTree.SchemaNode> schemaNodes; + private final List<IExternalWriter> writerList; private final int maxSchemas; - private ISchemaChecker schemaChecker; - private ParquetSchemaLazyVisitor schemaLazyVisitor; + private final ISchemaChecker schemaChecker; + private final ParquetSchemaLazyVisitor schemaLazyVisitor; public ParquetSchemaInferPoolWriter(ParquetExternalWriterFactory writerFactory, ISchemaChecker schemaChecker, ParquetSchemaLazyVisitor parquetSchemaLazyVisitor, int maxSchemas) { @@ -57,12 +57,11 @@ public class ParquetSchemaInferPoolWriter { for (int i = 0; i < schemaNodes.size(); i++) { ISchemaChecker.SchemaComparisonType schemaComparisonType = schemaChecker.checkSchema(schemaNodes.get(i), value); - if (schemaComparisonType.equals(ISchemaChecker.SchemaComparisonType.EQUIVALENT)) { return; } else if (schemaComparisonType.equals(ISchemaChecker.SchemaComparisonType.GROWING)) { // If the schema is growing, close the existing writer and create a new one with the new schema. - schemaNodes.set(i, schemaLazyVisitor.inferSchema(value)); + schemaLazyVisitor.updateSchema(value, schemaNodes.get(i)); closeWriter(i); return; } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java index 046c03f707..1a3aea1c43 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java @@ -40,16 +40,15 @@ public class ParquetExternalFilePrinter implements IExternalPrinter { private final CompressionCodecName compressionCodecName; private final MessageType schema; private ParquetOutputFile parquetOutputFile; - // private String parquetSchemaString; private ParquetWriter<IValueReference> writer; private final long rowGroupSize; private final int pageSize; private final ParquetProperties.WriterVersion writerVersion; - public ParquetExternalFilePrinter(CompressionCodecName compressionCodecName, MessageType parquetSchemaString, + public ParquetExternalFilePrinter(CompressionCodecName compressionCodecName, MessageType parquetSchema, IAType typeInfo, long rowGroupSize, int pageSize, ParquetProperties.WriterVersion writerVersion) { this.compressionCodecName = compressionCodecName; - this.schema = parquetSchemaString; + this.schema = parquetSchema; this.typeInfo = typeInfo; this.rowGroupSize = rowGroupSize; this.pageSize = pageSize; diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java index cffeb2fde7..d57d3c7e11 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java @@ -23,6 +23,7 @@ import static org.apache.asterix.external.writer.printer.parquet.ParquetValueWri import static org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.LIST_FIELD; import static org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.PRIMITIVE_TYPE_ERROR_FIELD; +import org.apache.asterix.common.exceptions.RuntimeDataException; import org.apache.asterix.om.lazy.AbstractLazyVisitablePointable; import org.apache.asterix.om.lazy.AbstractListLazyVisitablePointable; import org.apache.asterix.om.lazy.FlatLazyVisitablePointable; @@ -43,6 +44,103 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.Type; +/** + * + * + * + * + * Lets say we have the following record type: + { a : int, b : [ int ] , c : { d : int }, e : [ { f : int } ] } + + The corresponding parquet Schema : + required group schema { + optional int64 a; + optional group b (List) { + repeated group list { + optional int64 element; + } + } + optional group c { + optional int64 d; + } + optional group e (List) { + repeated group list { + optional group element { + optional binary f (String); + } + } + } + } + + The recordConsumer will be called as follows for different cases: + + ======================================================================================================================= + + writing into a : + startField("a") + addValue() + endField("a") + + ======================================================================================================================= + + + writing into b: b is an empty array write a null field + + startField("b") startField("b") startField("b") + startGroup() startGroup() startGroup() + startField("list") startField("list") + startGroup() startGroup() + startField("element") + addValue() + endField("element") + endGroup() endGroup() + endField("list") endField("list") + endGroup() endGroup() endGroup() + endField("b") endField("b") endField("b") + + ======================================================================================================================= + + + writing into d: d is null c is an empty object + c : { d : null } c : {} + + startField("c") startField("c") startField("c") + startGroup() startGroup() startGroup() + startField("d") + addValue() + endField("d") + endGroup() endGroup() endGroup() + endField("c") endField("c") endField("c") + + + + ======================================================================================================================= + + + writing into f: e is an empty array e has nulls e has empty objects + e : [] e : [ null ] e : [ {} ] + + + startField("e") startField("e") startField("e") startField("e") + startGroup() startGroup() startGroup() startGroup() + startField("list") startField("list") startField("list") + startGroup() startGroup() startGroup() + startField("element") startField("element") + startGroup() startGroup() + startField("f") + addValue() + endField("f") + endGroup() endGroup() + endField("element") endField("element") + endGroup() endGroup() endGroup() + endField("list") endField("list") endField("list") + endGroup() endGroup() endGroup() endGroup() + endField("e") endField("e") endField("e") endField("e") + + * + * + */ + public class ParquetRecordLazyVisitor implements ILazyVisitablePointableVisitor<Void, Type> { private static final Logger LOGGER = LogManager.getLogger(); private final MessageType schema; @@ -80,24 +178,39 @@ public class ParquetRecordLazyVisitor implements ILazyVisitablePointableVisitor< PRIMITIVE_TYPE_ERROR_FIELD, type.getName()); } GroupType groupType = type.asGroupType(); + int nonMissingChildren = 0; recordConsumer.startGroup(); for (int i = 0; i < pointable.getNumberOfChildren(); i++) { pointable.nextChild(); AbstractLazyVisitablePointable child = pointable.getChildVisitablePointable(); String columnName = fieldNamesDictionary.getOrCreateFieldNameIndex(pointable.getFieldName()); - + if (child.getTypeTag() == ATypeTag.MISSING) { + continue; + } + nonMissingChildren++; if (!groupType.containsField(columnName)) { LOGGER.info("Group type: {} does not contain field in record type: {}", LogRedactionUtil.userData(groupType.getName()), LogRedactionUtil.userData(columnName)); throw new HyracksDataException(ErrorCode.EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA, columnName, groupType.getName()); } + + if (child.getTypeTag() == ATypeTag.NULL) { + continue; + } + recordConsumer.startField(columnName, groupType.getFieldIndex(columnName)); child.accept(this, groupType.getType(columnName)); recordConsumer.endField(columnName, groupType.getFieldIndex(columnName)); } recordConsumer.endGroup(); + if (nonMissingChildren != groupType.getFieldCount()) { + LOGGER.info("Some Missing fields in group type: {}.", LogRedactionUtil.userData(groupType.toString())); + throw RuntimeDataException.create(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, "Non-Missing", "Missing", + groupType.getName()); + } + return null; } @@ -142,15 +255,22 @@ public class ParquetRecordLazyVisitor implements ILazyVisitablePointableVisitor< for (int i = 0; i < pointable.getNumberOfChildren(); i++) { pointable.nextChild(); AbstractLazyVisitablePointable child = pointable.getChildVisitablePointable(); - + if (child.getTypeTag() == ATypeTag.MISSING) { + LOGGER.info("Missing value in list type: {}", LogRedactionUtil.userData(groupType.getName())); + throw new HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, "Non-Missing", "Missing", + groupType.getName()); + } recordConsumer.startGroup(); + if (child.getTypeTag() == ATypeTag.NULL) { + recordConsumer.endGroup(); + continue; + } + recordConsumer.startField(ELEMENT_FIELD, listType.getFieldIndex(ELEMENT_FIELD)); child.accept(this, listType.getType(ELEMENT_FIELD)); recordConsumer.endField(ELEMENT_FIELD, listType.getFieldIndex(ELEMENT_FIELD)); recordConsumer.endGroup(); - } - recordConsumer.endField(LIST_FIELD, groupType.getFieldIndex(LIST_FIELD)); } @@ -174,24 +294,34 @@ public class ParquetRecordLazyVisitor implements ILazyVisitablePointableVisitor< throws HyracksDataException { rec.set(valueReference); this.recordConsumer = recordConsumer; + int nonMissingChildren = 0; recordConsumer.startMessage(); for (int i = 0; i < rec.getNumberOfChildren(); i++) { rec.nextChild(); String columnName = fieldNamesDictionary.getOrCreateFieldNameIndex(rec.getFieldName()); AbstractLazyVisitablePointable child = rec.getChildVisitablePointable(); - + if (child.getTypeTag() == ATypeTag.MISSING) { + continue; + } + nonMissingChildren++; if (!schema.containsField(columnName)) { LOGGER.info("Schema: {} does not contain field: {}", LogRedactionUtil.userData(schema.toString()), LogRedactionUtil.userData(columnName)); - throw new HyracksDataException(ErrorCode.EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA, columnName, - schema.getName()); + throw new HyracksDataException(ErrorCode.EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA, columnName, "root"); + } + if (child.getTypeTag() == ATypeTag.NULL) { + continue; } - recordConsumer.startField(columnName, schema.getFieldIndex(columnName)); child.accept(this, schema.getType(columnName)); recordConsumer.endField(columnName, schema.getFieldIndex(columnName)); } + if (nonMissingChildren != schema.getFieldCount()) { + LOGGER.info("Some Missing fields in group type: {}.", LogRedactionUtil.userData(schema.toString())); + throw RuntimeDataException.create(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, "Non-Missing", "Missing", + "root"); + } recordConsumer.endMessage(); } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java index 2278372b4c..3f0de608d1 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java @@ -48,6 +48,7 @@ public class ParquetSchemaLazyVisitor implements ILazyVisitablePointableVisitor< private final RecordLazyVisitablePointable rec; private final FieldNamesDictionary fieldNamesDictionary; private final static String SCHEMA_NAME = "asterix_schema"; + private boolean foundMissing = false; public ParquetSchemaLazyVisitor(IAType typeInfo) { this.fieldNamesDictionary = new FieldNamesDictionary(); @@ -66,12 +67,11 @@ public class ParquetSchemaLazyVisitor implements ILazyVisitablePointableVisitor< if (schemaNode.getType() == null) { schemaNode.setType(new ParquetSchemaTree.RecordType()); } - if (!(schemaNode.getType() instanceof ParquetSchemaTree.RecordType)) { + if (!(schemaNode.getType() instanceof ParquetSchemaTree.RecordType recordType)) { LOGGER.info("Incompatible type found in record: {} and {}", LogRedactionUtil.userData(schemaNode.toString()), pointable.getTypeTag()); throw RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY); } - ParquetSchemaTree.RecordType recordType = (ParquetSchemaTree.RecordType) schemaNode.getType(); for (int i = 0; i < pointable.getNumberOfChildren(); i++) { pointable.nextChild(); AbstractLazyVisitablePointable child = pointable.getChildVisitablePointable(); @@ -83,6 +83,7 @@ public class ParquetSchemaLazyVisitor implements ILazyVisitablePointableVisitor< childType = new ParquetSchemaTree.SchemaNode(); recordType.add(childColumnName, childType); } + // Can optimize by reducing new object creation child.accept(this, childType); } return null; @@ -91,6 +92,7 @@ public class ParquetSchemaLazyVisitor implements ILazyVisitablePointableVisitor< @Override public Void visit(AbstractListLazyVisitablePointable pointable, ParquetSchemaTree.SchemaNode schemaNode) throws HyracksDataException { + if (schemaNode.getType() == null) { schemaNode.setType(new ParquetSchemaTree.ListType()); } @@ -102,6 +104,11 @@ public class ParquetSchemaLazyVisitor implements ILazyVisitablePointableVisitor< for (int i = 0; i < numChildren; i++) { pointable.nextChild(); AbstractLazyVisitablePointable child = pointable.getChildVisitablePointable(); + + if(child.getTypeTag()==ATypeTag.MISSING) { + throw RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY); + } + if (listType.isEmpty()) { listType.setChild(new ParquetSchemaTree.SchemaNode()); } @@ -111,36 +118,57 @@ public class ParquetSchemaLazyVisitor implements ILazyVisitablePointableVisitor< } @Override - public Void visit(FlatLazyVisitablePointable pointable, ParquetSchemaTree.SchemaNode schemaNode) + public Void visit(FlatLazyVisitablePointable pointable,ParquetSchemaTree.SchemaNode schemaNode) throws HyracksDataException { + if(pointable.getTypeTag() == ATypeTag.NULL) { + return null; + } + if (schemaNode.getType() == null) { + if (pointable.getTypeTag() == ATypeTag.MISSING) + { + foundMissing = true; + schemaNode.setType(new ParquetSchemaTree.FlatType(ATypeTag.MISSING)); + return null; + } if (!AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP.containsKey(pointable.getTypeTag())) { throw RuntimeDataException.create(TYPE_UNSUPPORTED_PARQUET_WRITE, pointable.getTypeTag()); } schemaNode.setType(new ParquetSchemaTree.FlatType(pointable.getTypeTag())); return null; } - if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType)) { + if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType flatType)) { LOGGER.info("Incompatible type found: {} and {}", LogRedactionUtil.userData(schemaNode.toString()), pointable.getTypeTag()); throw RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY); } - ParquetSchemaTree.FlatType flatType = (ParquetSchemaTree.FlatType) schemaNode.getType(); - if (!flatType.isCompatibleWith(pointable.getTypeTag())) { LOGGER.info("Incompatible type found: {} and {}", flatType, pointable.getTypeTag()); throw RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY); } flatType.coalesce(pointable.getTypeTag()); - return null; } + public void updateSchema(IValueReference valueReference, ParquetSchemaTree.SchemaNode previousSchema) + throws HyracksDataException { + rec.set(valueReference); + foundMissing = false; + rec.accept(this, previousSchema); + if (foundMissing) { + removeMissing(previousSchema); + } + } + public ParquetSchemaTree.SchemaNode inferSchema(IValueReference valueReference) throws HyracksDataException { ParquetSchemaTree.SchemaNode schemaNode = new ParquetSchemaTree.SchemaNode(); rec.set(valueReference); + foundMissing = false; rec.accept(this, schemaNode); + if (foundMissing) { + removeMissing(schemaNode); + } return schemaNode; } @@ -156,4 +184,24 @@ public class ParquetSchemaLazyVisitor implements ILazyVisitablePointableVisitor< return builder.named(SCHEMA_NAME); } + private static void removeMissing(ParquetSchemaTree.SchemaNode schemaNode) { + if (schemaNode.getType() == null) { + return; + } + if (schemaNode.getType() instanceof ParquetSchemaTree.RecordType recordType) { + recordType.getChildren().entrySet().removeIf( + entry -> (entry.getValue().getType() instanceof ParquetSchemaTree.FlatType flatType && flatType.getTypeTag() == ATypeTag.MISSING)); + + for (Map.Entry<String, ParquetSchemaTree.SchemaNode> entry : recordType.getChildren().entrySet()) { + removeMissing(entry.getValue()); + } + } + + if (schemaNode.getType() instanceof ParquetSchemaTree.ListType listType) { + if (listType.isEmpty()) { + return; + } + removeMissing(listType.getChild()); + } + } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java index dae5295fef..5fb2ac7c6e 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java @@ -115,6 +115,22 @@ public class ParquetSchemaTree { } } + public boolean isStrictParentOf(ATypeTag childTypeTag) { + if (!isHierarchical || !AsterixParquetTypeMap.HIERARCHIAL_TYPES.containsKey(childTypeTag)) { + return false; + } + return AsterixParquetTypeMap.HIERARCHIAL_TYPES.get(this.typeTag) > AsterixParquetTypeMap.HIERARCHIAL_TYPES + .get(childTypeTag); + } + + public boolean isStrictChildOf(ATypeTag parentTypeTag) { + if (!isHierarchical || !AsterixParquetTypeMap.HIERARCHIAL_TYPES.containsKey(parentTypeTag)) { + return false; + } + return AsterixParquetTypeMap.HIERARCHIAL_TYPES.get(this.typeTag) < AsterixParquetTypeMap.HIERARCHIAL_TYPES + .get(parentTypeTag); + } + public void coalesce(ATypeTag typeTag) { if (!isCompatibleWith(typeTag) || !isHierarchical) { return; @@ -129,6 +145,10 @@ public class ParquetSchemaTree { public PrimitiveType.PrimitiveTypeName getPrimitiveTypeName() { return AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP.get(typeTag); } + + public ATypeTag getTypeTag() { + return typeTag; + } } static class ListType extends AbstractType { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java index 28d4247f14..e484e84035 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java @@ -52,28 +52,33 @@ public class SchemaCheckerLazyVisitor implements ISchemaChecker, public ISchemaChecker.SchemaComparisonType visit(RecordLazyVisitablePointable pointable, ParquetSchemaTree.SchemaNode schemaNode) throws HyracksDataException { if (schemaNode.getType() == null) { - return ISchemaChecker.SchemaComparisonType.GROWING; + return SchemaComparisonType.GROWING; } - if (!(schemaNode.getType() instanceof ParquetSchemaTree.RecordType)) { + if (!(schemaNode.getType() instanceof ParquetSchemaTree.RecordType recordType)) { return ISchemaChecker.SchemaComparisonType.CONFLICTING; } - ParquetSchemaTree.RecordType recordType = (ParquetSchemaTree.RecordType) schemaNode.getType(); ISchemaChecker.SchemaComparisonType schemaComparisonType = ISchemaChecker.SchemaComparisonType.EQUIVALENT; - + int nonMissingChildren = 0; for (int i = 0; i < pointable.getNumberOfChildren(); i++) { pointable.nextChild(); AbstractLazyVisitablePointable child = pointable.getChildVisitablePointable(); + if(child.getTypeTag() == ATypeTag.MISSING){ + continue; + } + nonMissingChildren++; String childColumnName = fieldNamesDictionary.getOrCreateFieldNameIndex(pointable.getFieldName()); ParquetSchemaTree.SchemaNode childType = recordType.getChildren().get(childColumnName); if (childType == null) { - schemaComparisonType = - ISchemaChecker.max(schemaComparisonType, ISchemaChecker.SchemaComparisonType.GROWING); + schemaComparisonType = ISchemaChecker.max(schemaComparisonType, SchemaComparisonType.CONFLICTING); continue; } schemaComparisonType = ISchemaChecker.max(schemaComparisonType, child.accept(this, childType)); } + if(nonMissingChildren!= recordType.getChildren().size()) { + return SchemaComparisonType.CONFLICTING; + } return schemaComparisonType; } @@ -81,7 +86,7 @@ public class SchemaCheckerLazyVisitor implements ISchemaChecker, public ISchemaChecker.SchemaComparisonType visit(AbstractListLazyVisitablePointable pointable, ParquetSchemaTree.SchemaNode schemaNode) throws HyracksDataException { if (schemaNode.getType() == null) { - return ISchemaChecker.SchemaComparisonType.GROWING; + return SchemaComparisonType.GROWING; } if (!(schemaNode.getType() instanceof ParquetSchemaTree.ListType)) { return ISchemaChecker.SchemaComparisonType.CONFLICTING; @@ -93,6 +98,9 @@ public class SchemaCheckerLazyVisitor implements ISchemaChecker, for (int i = 0; i < pointable.getNumberOfChildren(); i++) { pointable.nextChild(); AbstractLazyVisitablePointable child = pointable.getChildVisitablePointable(); + if (child.getTypeTag() == ATypeTag.MISSING) { + throw new HyracksDataException("Missing values are not allowed in lists for parquet printing."); + } if (listType.isEmpty()) { schemaComparisonType = ISchemaChecker.max(schemaComparisonType, ISchemaChecker.SchemaComparisonType.GROWING); @@ -104,22 +112,33 @@ public class SchemaCheckerLazyVisitor implements ISchemaChecker, } @Override - public ISchemaChecker.SchemaComparisonType visit(FlatLazyVisitablePointable pointable, + public ISchemaChecker.SchemaComparisonType visit(FlatLazyVisitablePointable currentValue, ParquetSchemaTree.SchemaNode schemaNode) throws HyracksDataException { if (schemaNode.getType() == null) { - return ISchemaChecker.SchemaComparisonType.GROWING; + return SchemaComparisonType.GROWING; + } + // SchemaNode.getTypeTag can never be MISSING here + if(currentValue.getTypeTag()==ATypeTag.NULL){ + return SchemaComparisonType.EQUIVALENT; } - if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType)) { + if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType inferredType)) { return ISchemaChecker.SchemaComparisonType.CONFLICTING; } - ParquetSchemaTree.FlatType flatType = (ParquetSchemaTree.FlatType) schemaNode.getType(); - - if (!flatType.isCompatibleWith(pointable.getTypeTag())) { + if (inferredType.getTypeTag() == currentValue.getTypeTag()) { + return ISchemaChecker.SchemaComparisonType.EQUIVALENT; + } + if (!inferredType.isCompatibleWith(currentValue.getTypeTag())) { return ISchemaChecker.SchemaComparisonType.CONFLICTING; } + if(inferredType.isStrictChildOf(currentValue.getTypeTag())) { + return ISchemaChecker.SchemaComparisonType.GROWING; + } + if (inferredType.isStrictParentOf(currentValue.getTypeTag())) { + return ISchemaChecker.SchemaComparisonType.EQUIVALENT; + } - return ISchemaChecker.SchemaComparisonType.EQUIVALENT; + return SchemaComparisonType.CONFLICTING; } @Override