This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit e865c48b9582b4b41097f0e1fad97dbf10f9ee46
Author: preetham0202 <preetham.polupar...@couchbase.com>
AuthorDate: Wed May 14 14:44:45 2025 +0530

    [ASTERIXDB-3392] Handle NULL/Empty types in Copy to parquet
    
     - user model changes: no
     - storage format changes: no
     - interface changes: no
    Details : Fix to handle null types especially when inside array types in 
copy to parquet.
    
    Ext-ref: MB-66710
    Change-Id: I79af2b66200063b6d09fe0cd2dfda3d9e7939925
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20029
    Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Ali Alsuliman <ali.al.solai...@gmail.com>
    Reviewed-by: Preetham Poluparthi <preetha...@apache.org>
    Tested-by: Preetham Poluparthi <preetha...@apache.org>
---
 .../parquet-error-checks.23.ddl.sqlpp}             |  15 +-
 .../parquet-error-checks.24.update.sqlpp}          |  17 +-
 .../parquet-error-checks.25.update.sqlpp}          |  16 +-
 .../parquet-heterogeneous.02.update.sqlpp          |  20 +--
 .../parquet-null-type.01.ddl.sqlpp}                |  19 +--
 .../parquet-null-type.02.update.sqlpp              |  29 ++++
 .../parquet-null-type.03.update.sqlpp}             |  30 +++-
 .../parquet-null-type.04.ddl.sqlpp}                |  23 +--
 .../parquet-null-type.05.query.sqlpp}              |  14 +-
 .../parquet-null-type.06.update.sqlpp}             |   9 +-
 .../parquet-null-type.07.ddl.sqlpp}                |  20 +--
 .../parquet-null-type.08.query.sqlpp}              |  14 +-
 .../parquet-tweet/parquet-tweet.03.update.sqlpp    | 181 ---------------------
 .../parquet-type-hierarchy.02.update.sqlpp         |   2 +-
 .../parquet-utf8/parquet-utf8.03.update.sqlpp      |   1 -
 .../parquet-tweet/parquet-tweet.05.adm             |   4 +-
 .../parquet-null-type/parquet-null-type.05.adm     |   5 +
 .../parquet-null-type/parquet-null-type.08.adm     |   5 +
 .../copy-to/parquet-tweet/parquet-tweet.05.adm     |   4 +-
 .../parquet-type-hierarchy.05.adm                  |   8 +-
 .../copy-to/parquet-utf8/parquet-utf8.05.adm       |  16 +-
 .../runtimets/testsuite_external_dataset_s3.xml    |  12 ++
 .../parquet/ParquetSchemaInferPoolWriter.java      |  11 +-
 .../writer/printer/ParquetExternalFilePrinter.java |   5 +-
 .../printer/parquet/ParquetRecordLazyVisitor.java  | 146 ++++++++++++++++-
 .../printer/parquet/ParquetSchemaLazyVisitor.java  |  62 ++++++-
 .../writer/printer/parquet/ParquetSchemaTree.java  |  20 +++
 .../printer/parquet/SchemaCheckerLazyVisitor.java  |  47 ++++--
 28 files changed, 421 insertions(+), 334 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.23.ddl.sqlpp
similarity index 76%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.23.ddl.sqlpp
index ad94373b1d..a2dbe7f51d 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.23.ddl.sqlpp
@@ -19,17 +19,10 @@
 
 USE test;
 
-COPY (
-   SELECT c.* FROM NameCommentDataset c
-) toWriter
-TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
-WITH {
-    %template_colons%,
-    %additionalProperties%
-    "format":"parquet"
-};
+
+
+CREATE COLLECTION TestCollection6(ColumnType1) PRIMARY KEY id;
+CREATE COLLECTION TestCollection7(ColumnType1) PRIMARY KEY id;
 
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.24.update.sqlpp
similarity index 79%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.24.update.sqlpp
index ad94373b1d..b9bd7124af 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.24.update.sqlpp
@@ -19,17 +19,24 @@
 
 USE test;
 
+
+
+
+insert into TestCollection6({"id":10, "ranks": [ 1 , 18 ] });
+
+
 COPY (
-   SELECT c.* FROM NameCommentDataset c
-) toWriter
+select * from TestCollection6 c
+    ) toWriter
 TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks24")
+TYPE ( { id : int, name : string , ranks : [int] } )
 WITH {
     %template_colons%,
     %additionalProperties%
     "format":"parquet"
-};
+    }
+
 
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.25.update.sqlpp
similarity index 82%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.25.update.sqlpp
index ad94373b1d..acc640430d 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.25.update.sqlpp
@@ -19,17 +19,23 @@
 
 USE test;
 
+
+
+
+insert into TestCollection7({"id":10, "ranks": [ 1 , missing, 2 ] });
+
+
 COPY (
-   SELECT c.* FROM NameCommentDataset c
-) toWriter
+select * from TestCollection7 c
+    ) toWriter
 TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
+PATH (%pathprefix% "copy-to-result", "parquet-error-checks25")
 WITH {
     %template_colons%,
     %additionalProperties%
     "format":"parquet"
-};
+    }
+
 
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-heterogeneous/parquet-heterogeneous.02.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-heterogeneous/parquet-heterogeneous.02.update.sqlpp
index 308d3b99aa..3483cf66eb 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-heterogeneous/parquet-heterogeneous.02.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-heterogeneous/parquet-heterogeneous.02.update.sqlpp
@@ -35,16 +35,16 @@ use test;
 
 
 insert into TestCollection({"id":2});
-insert into TestCollection({"id":5,"name":"virat"});
-insert into TestCollection({"id":8,"name":{"first":"virat"}});
-insert into TestCollection({"id":10,"name":{"first":"virat"},"age":18});
-insert into TestCollection({"id":12,"name":123});
-insert into TestCollection({"id":15,"name":[123,456]});
-insert into TestCollection({"id":17,"name":765});
-insert into TestCollection({"id":20,"name":[789]});
-insert into TestCollection({"id":21,"name":[{"first":"virat"}]});
-insert into 
TestCollection({"id":27,"name":[{"first":"virat","second":"kohli"}]});
-insert into TestCollection({"id":28,"name":{"first":"virat"}});
+insert into TestCollection({"id":5, "name":"virat"});
+insert into TestCollection({"id":8, "name":{"first":"virat"}});
+insert into TestCollection({"id":10, "name":{"first":"virat"},"age":18});
+insert into TestCollection({"id":12, "name":123});
+insert into TestCollection({"id":15, "name":[123,456]});
+insert into TestCollection({"id":17, "name":765});
+insert into TestCollection({"id":20, "name":[789]});
+insert into TestCollection({"id":21, "name":[{"first":"virat"}]});
+insert into TestCollection({"id":27, 
"name":[{"first":"virat","second":"kohli"}]});
+insert into TestCollection({"id":28, "name":{"first":"virat"}});
 
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.01.ddl.sqlpp
similarity index 75%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.01.ddl.sqlpp
index ad94373b1d..f4a6aaed14 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.01.ddl.sqlpp
@@ -17,19 +17,14 @@
  * under the License.
  */
 
+DROP DATAVERSE test if exists;
+CREATE DATAVERSE test;
 USE test;
 
-COPY (
-   SELECT c.* FROM NameCommentDataset c
-) toWriter
-TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
-WITH {
-    %template_colons%,
-    %additionalProperties%
-    "format":"parquet"
-};
-
 
+CREATE TYPE ColumnType1 AS {
+    id: integer
+    ,a : {b: int?, missing_test: int?}?
+    };
 
+CREATE COLLECTION TestCollection(ColumnType1) PRIMARY KEY id;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.02.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.02.update.sqlpp
new file mode 100644
index 0000000000..c6d33bb452
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.02.update.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+
+
+insert into TestCollection(  {"id":8, "name":null, "nested" : { 
"List":[100,null,300], "A" : null , "randomK" : null   } , "obj_array" : 
[{"first":"first"},{"first":null},{"first":"second"}]  ,   "a" : { "b" : null } 
, "c" : null , "f" : null  , "arr" : [[1,null,2], null]  });
+insert into TestCollection(  {"id":10,"name":"Virat", "nested" : { "List":[] , 
"A" : null , "randomK" : null    }  , "obj_array": 
[{"first":"first"},{"first":"second"}]  , "a" : { "b" : null }  , "c" : { "d" : 
null , "e" : null  } , "f" : [] ,"arr" : [ [1,2], [] ] } );
+insert into TestCollection({"id":28,"name":"Virat", "nested" : { "List":[null] 
, "A" : "a" , "randomK" : null    } ,  "obj_array": 
[{"first":"first"},{"first":"second"}]  , "a" : {"b" : 1} , "c" : { "d" : 1 , 
"e" : null  } , "f" : [1,null] , "arr" : [ [1,2], [null] ]  } );
+insert into TestCollection({"id":34, "name":null ,"nested" : { "List":null , 
"A" : null  , "randomK" : "randomV"   } , "obj_array": 
[{"first":"first"},{"first":"second"}]  , "a" : null  , "c" : { "d" : null , 
"e" : 1  } , "f" : [2,null,3.0] , "arr" : [[]]   });
+insert into TestCollection({"id":37 , "name" : "Kohli", "nested" : { 
"List":[1,2,3] , "A" : "a" , "randomK" : null   } , "obj_array": 
[{"first":"first"},{"first":"second"}]  , "a" : { "b" : 1 }  , "c" : { "d" : 1 
, "e" : 1  }, "f" : [3.6,4.0] ,   "arr" : [[1,2,3]]   });
+-- insert into TestCollection({"id":41  });
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.03.update.sqlpp
similarity index 66%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.03.update.sqlpp
index ad94373b1d..6c5bf3d4bb 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.03.update.sqlpp
@@ -20,16 +20,36 @@
 USE test;
 
 COPY (
-   SELECT c.* FROM NameCommentDataset c
-) toWriter
+select c.* from TestCollection c
+    ) toWriter
 TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
+PATH (%pathprefix% "copy-to-result", "parquet-null-type")
+TYPE (
+    {
+      id : int,
+      name : string,
+      nested : {
+       List : [ int ],
+         A : string,
+       randomK : string
+      },
+        obj_array : [ { first : string } ],
+        a : {
+            b : int
+        },
+        c : {
+            d : int,
+            e : int
+        },
+        f : [ float ],
+        arr : [[int]]
+    }
+)
 WITH {
     %template_colons%,
     %additionalProperties%
     "format":"parquet"
-};
+    };
 
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.04.ddl.sqlpp
similarity index 72%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.04.ddl.sqlpp
index ad94373b1d..077374510a 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.04.ddl.sqlpp
@@ -19,17 +19,18 @@
 
 USE test;
 
-COPY (
-   SELECT c.* FROM NameCommentDataset c
-) toWriter
-TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
-WITH {
-    %template_colons%,
-    %additionalProperties%
-    "format":"parquet"
-};
 
+CREATE TYPE ColumnType2 AS {
+    };
 
 
+
+CREATE EXTERNAL DATASET TestDataset(ColumnType2) USING %adapter%
+(
+  %template%,
+  %additional_Properties%,
+  ("definition"="%path_prefix%copy-to-result/parquet-null-type/"),
+  ("include"="*.parquet"),
+  ("requireVersionChangeDetection"="false"),
+  ("format" = "parquet")
+);
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.05.query.sqlpp
similarity index 76%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.05.query.sqlpp
index ad94373b1d..b03fc5e726 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.05.query.sqlpp
@@ -19,17 +19,9 @@
 
 USE test;
 
-COPY (
-   SELECT c.* FROM NameCommentDataset c
-) toWriter
-TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
-WITH {
-    %template_colons%,
-    %additionalProperties%
-    "format":"parquet"
-};
 
+SELECT c.*
+FROM TestDataset c
+ORDER BY c.id;
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.06.update.sqlpp
similarity index 85%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.06.update.sqlpp
index ad94373b1d..9e508ad87e 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.06.update.sqlpp
@@ -20,16 +20,15 @@
 USE test;
 
 COPY (
-   SELECT c.* FROM NameCommentDataset c
-) toWriter
+select c.* from TestCollection c
+    ) toWriter
 TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
+PATH (%pathprefix% "copy-to-result", "parquet-null-type-schemaless")
 WITH {
     %template_colons%,
     %additionalProperties%
     "format":"parquet"
-};
+    };
 
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.07.ddl.sqlpp
similarity index 73%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.07.ddl.sqlpp
index ad94373b1d..1922853ffd 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.07.ddl.sqlpp
@@ -19,17 +19,15 @@
 
 USE test;
 
-COPY (
-   SELECT c.* FROM NameCommentDataset c
-) toWriter
-TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
-WITH {
-    %template_colons%,
-    %additionalProperties%
-    "format":"parquet"
-};
 
 
 
+CREATE EXTERNAL DATASET TestDatasetSchemaless(ColumnType2) USING %adapter%
+(
+  %template%,
+  %additional_Properties%,
+  ("definition"="%path_prefix%copy-to-result/parquet-null-type-schemaless/"),
+  ("include"="*.parquet"),
+  ("requireVersionChangeDetection"="false"),
+  ("format" = "parquet")
+);
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.08.query.sqlpp
similarity index 76%
copy from 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
copy to 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.08.query.sqlpp
index ad94373b1d..670cba6b04 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-null-type/parquet-null-type.08.query.sqlpp
@@ -19,17 +19,9 @@
 
 USE test;
 
-COPY (
-   SELECT c.* FROM NameCommentDataset c
-) toWriter
-TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
-WITH {
-    %template_colons%,
-    %additionalProperties%
-    "format":"parquet"
-};
 
+SELECT c.*
+FROM TestDatasetSchemaless c
+ORDER BY c.id;
 
 
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.03.update.sqlpp
index 28872dda0c..1a93794d24 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.03.update.sqlpp
@@ -24,187 +24,6 @@ COPY (
 ) toWriter
 TO %adapter%
 PATH (%pathprefix% "copy-to-result", "parquet-tweet")
-TYPE ( {
-               coordinates: {
-                 coordinates: [
-                   double
-                 ],
-                 `type` : string
-               },
-               created_at: string,
-               entities: {
-                 urls: [
-                   {
-                     display_url: string,
-                     expanded_url: string,
-                     indices: [
-                       int
-                     ],
-                     url: string
-                   }
-                 ],
-                 user_mentions: [
-                   {
-                     id: int,
-                     id_str: string,
-                     indices: [
-                       int
-                     ],
-                     name: string,
-                     screen_name: string
-                   }
-                 ]
-               },
-               favorite_count: int,
-               favorited: boolean,
-               filter_level: string,
-               geo: {
-                 coordinates: [
-                   double
-                 ],
-                 `type`: string
-               },
-               id: string,
-               id_str: string,
-               in_reply_to_screen_name: string,
-               in_reply_to_status_id: int,
-               in_reply_to_status_id_str: string,
-               in_reply_to_user_id: int,
-               in_reply_to_user_id_str: string,
-               is_quote_status: boolean,
-               lang: string,
-               place: {
-                 bounding_box: {
-                   coordinates: [
-                     [
-                       [
-                         double
-                       ]
-                     ]
-                   ],
-                   `type`: string
-                 },
-                 country: string,
-                 country_code: string,
-                 full_name: string,
-                 id: string,
-                 name: string,
-                 place_type: string,
-                 url: string
-               },
-               possibly_sensitive: boolean,
-               quoted_status: {
-                 created_at: string,
-                 entities: {
-                   user_mentions: [
-                     {
-                       id: int,
-                       id_str: string,
-                       indices: [
-                         int
-                       ],
-                       name: string,
-                       screen_name: string
-                     }
-                   ]
-                 },
-                 favorite_count: int,
-                 favorited: boolean,
-                 filter_level: string,
-                 id: int,
-                 id_str: string,
-                 in_reply_to_screen_name: string,
-                 in_reply_to_status_id: int,
-                 in_reply_to_status_id_str: string,
-                 in_reply_to_user_id: int,
-                 in_reply_to_user_id_str: string,
-                 is_quote_status: boolean,
-                 lang: string,
-                 retweet_count: int,
-                 retweeted: boolean,
-                 source: string,
-                 text: string,
-                 truncated: boolean,
-                 user: {
-                   contributors_enabled: boolean,
-                   created_at: string,
-                   default_profile: boolean,
-                   default_profile_image: boolean,
-                   description: string,
-                   favourites_count: int,
-                   followers_count: int,
-                   friends_count: int,
-                   geo_enabled: boolean,
-                   id: int,
-                   id_str: string,
-                   is_translator: boolean,
-                   lang: string,
-                   listed_count: int,
-                   name: string,
-                   profile_background_color: string,
-                   profile_background_image_url: string,
-                   profile_background_image_url_https: string,
-                   profile_background_tile: boolean,
-                   profile_banner_url: string,
-                   profile_image_url: string,
-                   profile_image_url_https: string,
-                   profile_link_color: string,
-                   profile_sidebar_border_color: string,
-                   profile_sidebar_fill_color: string,
-                   profile_text_color: string,
-                   profile_use_background_image: boolean,
-                   protected: boolean,
-                   screen_name: string,
-                   statuses_count: int,
-                   verified: boolean
-                 }
-               },
-               quoted_status_id: int,
-               quoted_status_id_str: string,
-               retweet_count: int,
-               retweeted: boolean,
-               source: string,
-               text: string,
-               timestamp_ms: string,
-               truncated: boolean,
-               user: {
-                 contributors_enabled: boolean,
-                 created_at: string,
-                 default_profile: boolean,
-                 default_profile_image: boolean,
-                 description: string,
-                 favourites_count: int,
-                 followers_count: int,
-                 friends_count: int,
-                 geo_enabled: boolean,
-                 id: int,
-                 id_str: string,
-                 is_translator: boolean,
-                 lang: string,
-                 listed_count: int,
-                 location: string,
-                 name: string,
-                 profile_background_color: string,
-                 profile_background_image_url: string,
-                 profile_background_image_url_https: string,
-                 profile_background_tile: boolean,
-                 profile_banner_url: string,
-                 profile_image_url: string,
-                 profile_image_url_https: string,
-                 profile_link_color: string,
-                 profile_sidebar_border_color: string,
-                 profile_sidebar_fill_color: string,
-                 profile_text_color: string,
-                 profile_use_background_image: boolean,
-                 protected: boolean,
-                 screen_name: string,
-                 statuses_count: int,
-                 time_zone: string,
-                 url: string,
-                 utc_offset: int,
-                 verified: boolean
-               }
-             } )
 WITH {
     %template_colons%,
     %additionalProperties%
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.02.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.02.update.sqlpp
index 9a6f3c408d..b0f7cb402c 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.02.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.02.update.sqlpp
@@ -20,7 +20,7 @@
 use test;
 
 
-insert into TestCollection({"id":2});
+insert into TestCollection({"id":2, "rating" : null, "ratings" : [null]});
 insert into TestCollection({"id":5,"rating" :1 , "ratings" : [] });
 insert into TestCollection({"id":8,"rating" :2 , "ratings" : [ 1 ] });
 insert into TestCollection({"id":10,"rating" :3.0 , "ratings" : [ 1, 2, 3] });
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
index ad94373b1d..a1283b6b14 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
@@ -24,7 +24,6 @@ COPY (
 ) toWriter
 TO %adapter%
 PATH (%pathprefix% "copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
 WITH {
     %template_colons%,
     %additionalProperties%
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-tweet/parquet-tweet.05.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-tweet/parquet-tweet.05.adm
index 5e0df967f3..aec4326c48 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-tweet/parquet-tweet.05.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-tweet/parquet-tweet.05.adm
@@ -1,2 +1,2 @@
-{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": 
"string", "entities": { "urls": [ { "display_url": "string", "expanded_url": 
"string", "indices": [ 1 ], "url": "string" } ], "user_mentions": [ { "id": 1, 
"id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" 
} ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": 
{ "coordinates": [ 1.1 ], "type": "string" }, "id": "0000000", "id_str": 
"string", "in_reply_to_scr [...]
-{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": 
"string", "favorite_count": 1, "favorited": true, "filter_level": "string", 
"geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": 
"11111111111111111111", "id_str": "string", "in_reply_to_screen_name": 
"string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", 
"in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", 
"is_quote_status": true, "lang": "string", "place": { "bounding_box": [...]
\ No newline at end of file
+{ "quoted_status": { "in_reply_to_status_id_str": "string", 
"in_reply_to_status_id": 1, "created_at": "string", "in_reply_to_user_id_str": 
"string", "truncated": true, "source": "string", "retweet_count": 1, 
"retweeted": true, "filter_level": "string", "in_reply_to_screen_name": 
"string", "is_quote_status": true, "entities": { "user_mentions": [ { 
"indices": [ 1 ], "screen_name": "string", "id_str": "string", "name": 
"string", "id": 1 } ] }, "id_str": "string", "in_reply_to_user_id": 1,  [...]
+{ "quoted_status": { "in_reply_to_status_id_str": "string", 
"in_reply_to_status_id": 1, "created_at": "string", "in_reply_to_user_id_str": 
"string", "truncated": true, "source": "string", "retweet_count": 1, 
"retweeted": true, "filter_level": "string", "in_reply_to_screen_name": 
"string", "is_quote_status": true, "entities": { "user_mentions": [ { 
"indices": [ 1 ], "screen_name": "string", "id_str": "string", "name": 
"string", "id": 1 } ] }, "id_str": "string", "in_reply_to_user_id": 1,  [...]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm
new file mode 100644
index 0000000000..763b652306
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm
@@ -0,0 +1,5 @@
+{ "id": 8, "nested": { "List": [ 100, 300 ] }, "obj_array": [ { "first": 
"first" }, {  }, { "first": "second" } ], "a": {  }, "arr": [ [ 1, 2 ] ] }
+{ "id": 10, "name": "Virat", "nested": { "List": [  ] }, "obj_array": [ { 
"first": "first" }, { "first": "second" } ], "a": {  }, "c": {  }, "f": [  ], 
"arr": [ [ 1, 2 ], [  ] ] }
+{ "id": 28, "name": "Virat", "nested": { "List": [  ], "A": "a" }, 
"obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { "b": 1 }, 
"c": { "d": 1 }, "f": [ 1.0 ], "arr": [ [ 1, 2 ], [  ] ] }
+{ "id": 34, "nested": { "randomK": "randomV" }, "obj_array": [ { "first": 
"first" }, { "first": "second" } ], "c": { "e": 1 }, "f": [ 2.0, 3.0 ], "arr": 
[ [  ] ] }
+{ "id": 37, "name": "Kohli", "nested": { "List": [ 1, 2, 3 ], "A": "a" }, 
"obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { "b": 1 }, 
"c": { "d": 1, "e": 1 }, "f": [ 3.5999999046325684, 4.0 ], "arr": [ [ 1, 2, 3 ] 
] }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm
new file mode 100644
index 0000000000..628b82f725
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm
@@ -0,0 +1,5 @@
+{ "arr": [ [ 1, 2 ] ], "a": {  }, "id": 8, "nested": { "List": [ 100, 300 ] }, 
"obj_array": [ { "first": "first" }, {  }, { "first": "second" } ] }
+{ "arr": [ [ 1, 2 ], [  ] ], "a": {  }, "c": {  }, "f": [  ], "name": "Virat", 
"id": 10, "nested": { "List": [  ] }, "obj_array": [ { "first": "first" }, { 
"first": "second" } ] }
+{ "arr": [ [ 1, 2 ], [  ] ], "a": { "b": 1 }, "c": { "d": 1 }, "f": [ 1.0 ], 
"name": "Virat", "id": 28, "nested": { "A": "a", "List": [  ] }, "obj_array": [ 
{ "first": "first" }, { "first": "second" } ] }
+{ "arr": [ [  ] ], "c": { "e": 1 }, "f": [ 2.0, 3.0 ], "id": 34, "nested": { 
"randomK": "randomV" }, "obj_array": [ { "first": "first" }, { "first": 
"second" } ] }
+{ "arr": [ [ 1, 2, 3 ] ], "a": { "b": 1 }, "c": { "d": 1, "e": 1 }, "f": [ 
3.6, 4.0 ], "name": "Kohli", "id": 37, "nested": { "A": "a", "List": [ 1, 2, 3 
] }, "obj_array": [ { "first": "first" }, { "first": "second" } ] }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-tweet/parquet-tweet.05.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-tweet/parquet-tweet.05.adm
index 5e0df967f3..aec4326c48 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-tweet/parquet-tweet.05.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-tweet/parquet-tweet.05.adm
@@ -1,2 +1,2 @@
-{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": 
"string", "entities": { "urls": [ { "display_url": "string", "expanded_url": 
"string", "indices": [ 1 ], "url": "string" } ], "user_mentions": [ { "id": 1, 
"id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" 
} ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": 
{ "coordinates": [ 1.1 ], "type": "string" }, "id": "0000000", "id_str": 
"string", "in_reply_to_scr [...]
-{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": 
"string", "favorite_count": 1, "favorited": true, "filter_level": "string", 
"geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": 
"11111111111111111111", "id_str": "string", "in_reply_to_screen_name": 
"string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", 
"in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", 
"is_quote_status": true, "lang": "string", "place": { "bounding_box": [...]
\ No newline at end of file
+{ "quoted_status": { "in_reply_to_status_id_str": "string", 
"in_reply_to_status_id": 1, "created_at": "string", "in_reply_to_user_id_str": 
"string", "truncated": true, "source": "string", "retweet_count": 1, 
"retweeted": true, "filter_level": "string", "in_reply_to_screen_name": 
"string", "is_quote_status": true, "entities": { "user_mentions": [ { 
"indices": [ 1 ], "screen_name": "string", "id_str": "string", "name": 
"string", "id": 1 } ] }, "id_str": "string", "in_reply_to_user_id": 1,  [...]
+{ "quoted_status": { "in_reply_to_status_id_str": "string", 
"in_reply_to_status_id": 1, "created_at": "string", "in_reply_to_user_id_str": 
"string", "truncated": true, "source": "string", "retweet_count": 1, 
"retweeted": true, "filter_level": "string", "in_reply_to_screen_name": 
"string", "is_quote_status": true, "entities": { "user_mentions": [ { 
"indices": [ 1 ], "screen_name": "string", "id_str": "string", "name": 
"string", "id": 1 } ] }, "id_str": "string", "in_reply_to_user_id": 1,  [...]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm
index 5c4c334aca..4fd973e9bc 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm
@@ -1,11 +1,11 @@
-{ "id": 2 }
+{ "ratings": [  ], "id": 2 }
 { "ratings": [  ], "rating": 1.0, "id": 5 }
-{ "ratings": [ 1 ], "rating": 2, "id": 8 }
-{ "ratings": [ 1, 2, 3 ], "rating": 3, "id": 10 }
+{ "ratings": [ 1 ], "rating": 2.0, "id": 8 }
+{ "ratings": [ 1, 2, 3 ], "rating": 3.0, "id": 10 }
 { "ratings": [ 1.0, 2.0, 3.0, 4.0, 5.0 ], "rating": 4.3, "id": 12 }
 { "ratings": [ 1.0, 2.0, 3.0, 4.0, 5.0 ], "rating": 4.7, "id": 15 }
 { "ratings": [ 1.1111, 2.222222, 3.3333, 4.44444, 5.555555 ], "rating": 
4.22222, "id": 17 }
 { "ratings": [ 1.0, 2.0, 3.0, 4.0, 5.0 ], "rating": 5.455555555, "id": 20 }
 { "ratings": [ 0.0, 6.7 ], "rating": 1.0, "id": 21 }
 { "ratings": [ 1.0 ], "rating": 8.0, "id": 27 }
-{ "ratings": [  ], "rating": 3, "id": 28 }
+{ "ratings": [  ], "rating": 3.0, "id": 28 }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-utf8/parquet-utf8.05.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-utf8/parquet-utf8.05.adm
index c60145d7f4..3ea2e8d765 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-utf8/parquet-utf8.05.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-utf8/parquet-utf8.05.adm
@@ -1,8 +1,8 @@
-{ "id": 1, "name": "John" }
-{ "id": 2, "name": "Abel" }
-{ "id": 3, "name": "Sandy" }
-{ "id": 4, "name": "Alex" }
-{ "id": 5, "name": "Mike" }
-{ "id": 6, "name": "Tom" }
-{ "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا", "id": 7, "name": "Jerry" }
-{ "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. 
حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 
😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee 
☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 
= 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. 
Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffe
 e ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حس [...]
+{ "name": "John", "id": 1 }
+{ "name": "Abel", "id": 2 }
+{ "name": "Sandy", "id": 3 }
+{ "name": "Alex", "id": 4 }
+{ "name": "Mike", "id": 5 }
+{ "name": "Tom", "id": 6 }
+{ "name": "Jerry", "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا", "id": 7 }
+{ "name": "William", "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 
😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee 
☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 
= 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. 
Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. 
حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢
 😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢 [...]
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index ae0a5207b2..a1ca8038a8 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -114,6 +114,16 @@
         <output-dir compare="Text">parquet-cover-data-types</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="copy-to">
+      <compilation-unit name="parquet-null-type">
+        <placeholder name="adapter" value="S3" />
+        <placeholder name="pathprefix" value="" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additionalProperties" 
value='"container":"playground",' />
+        <placeholder name="additional_Properties" 
value='("container"="playground")' />
+        <output-dir compare="Text">parquet-null-type</output-dir>
+      </compilation-unit>
+    </test-case>
     <test-case FilePath="copy-to">
       <compilation-unit name="parquet-file-writers">
         <placeholder name="adapter" value="S3" />
@@ -256,6 +266,8 @@
         <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
         <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
         <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
+        <expected-error>HYR0132: Extra field in the result, field 'c' does not 
exist at 'root' in the schema</expected-error>
+        <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
       </compilation-unit>
     </test-case>
     <test-case FilePath="copy-to/negative">
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java
index ca87cede48..221042aa22 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java
@@ -37,11 +37,11 @@ import org.apache.logging.log4j.Logger;
 public class ParquetSchemaInferPoolWriter {
     private static final Logger LOGGER = LogManager.getLogger();
     private final ParquetExternalWriterFactory writerFactory;
-    private List<ParquetSchemaTree.SchemaNode> schemaNodes;
-    private List<IExternalWriter> writerList;
+    private final List<ParquetSchemaTree.SchemaNode> schemaNodes;
+    private final List<IExternalWriter> writerList;
     private final int maxSchemas;
-    private ISchemaChecker schemaChecker;
-    private ParquetSchemaLazyVisitor schemaLazyVisitor;
+    private final ISchemaChecker schemaChecker;
+    private final ParquetSchemaLazyVisitor schemaLazyVisitor;
 
     public ParquetSchemaInferPoolWriter(ParquetExternalWriterFactory 
writerFactory, ISchemaChecker schemaChecker,
             ParquetSchemaLazyVisitor parquetSchemaLazyVisitor, int maxSchemas) 
{
@@ -57,12 +57,11 @@ public class ParquetSchemaInferPoolWriter {
         for (int i = 0; i < schemaNodes.size(); i++) {
             ISchemaChecker.SchemaComparisonType schemaComparisonType =
                     schemaChecker.checkSchema(schemaNodes.get(i), value);
-
             if 
(schemaComparisonType.equals(ISchemaChecker.SchemaComparisonType.EQUIVALENT)) {
                 return;
             } else if 
(schemaComparisonType.equals(ISchemaChecker.SchemaComparisonType.GROWING)) {
                 // If the schema is growing, close the existing writer and 
create a new one with the new schema.
-                schemaNodes.set(i, schemaLazyVisitor.inferSchema(value));
+                schemaLazyVisitor.updateSchema(value, schemaNodes.get(i));
                 closeWriter(i);
                 return;
             }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
index 046c03f707..1a3aea1c43 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
@@ -40,16 +40,15 @@ public class ParquetExternalFilePrinter implements 
IExternalPrinter {
     private final CompressionCodecName compressionCodecName;
     private final MessageType schema;
     private ParquetOutputFile parquetOutputFile;
-    //    private String parquetSchemaString;
     private ParquetWriter<IValueReference> writer;
     private final long rowGroupSize;
     private final int pageSize;
     private final ParquetProperties.WriterVersion writerVersion;
 
-    public ParquetExternalFilePrinter(CompressionCodecName 
compressionCodecName, MessageType parquetSchemaString,
+    public ParquetExternalFilePrinter(CompressionCodecName 
compressionCodecName, MessageType parquetSchema,
             IAType typeInfo, long rowGroupSize, int pageSize, 
ParquetProperties.WriterVersion writerVersion) {
         this.compressionCodecName = compressionCodecName;
-        this.schema = parquetSchemaString;
+        this.schema = parquetSchema;
         this.typeInfo = typeInfo;
         this.rowGroupSize = rowGroupSize;
         this.pageSize = pageSize;
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
index cffeb2fde7..d57d3c7e11 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
@@ -23,6 +23,7 @@ import static 
org.apache.asterix.external.writer.printer.parquet.ParquetValueWri
 import static 
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.LIST_FIELD;
 import static 
org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.PRIMITIVE_TYPE_ERROR_FIELD;
 
+import org.apache.asterix.common.exceptions.RuntimeDataException;
 import org.apache.asterix.om.lazy.AbstractLazyVisitablePointable;
 import org.apache.asterix.om.lazy.AbstractListLazyVisitablePointable;
 import org.apache.asterix.om.lazy.FlatLazyVisitablePointable;
@@ -43,6 +44,103 @@ import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.Type;
 
+/**
+ *
+ *
+ *
+ *
+ * Lets say we have the following record type:
+ { a : int, b : [ int ] , c : { d : int }, e : [ { f : int } ]  }
+
+ The corresponding parquet Schema :
+ required group schema {
+ optional int64 a;
+ optional group b (List) {
+ repeated group list {
+ optional int64 element;
+ }
+ }
+ optional group c {
+ optional int64 d;
+ }
+ optional group e (List) {
+ repeated group list {
+ optional group element {
+ optional binary f (String);
+ }
+ }
+ }
+ }
+
+ The recordConsumer will be called as follows for different cases:
+
+ 
=======================================================================================================================
+
+ writing into a :
+ startField("a")
+ addValue()
+ endField("a")
+
+ 
=======================================================================================================================
+
+
+ writing into b:                 b is an empty array             write a null 
field
+
+ startField("b")                 startField("b")                 
startField("b")
+ startGroup()                    startGroup()                    startGroup()
+ startField("list")                                              
startField("list")
+ startGroup()                                                    startGroup()
+ startField("element")
+ addValue()
+ endField("element")
+ endGroup()                                                      endGroup()
+ endField("list")                                                
endField("list")
+ endGroup()                      endGroup()                      endGroup()
+ endField("b")                   endField("b")                   endField("b")
+
+ 
=======================================================================================================================
+
+
+ writing into d:                 d is null                       c is an empty 
object
+                                 c : { d : null }                c : {}
+
+ startField("c")                 startField("c")                 
startField("c")
+ startGroup()                    startGroup()                    startGroup()
+ startField("d")
+ addValue()
+ endField("d")
+ endGroup()                      endGroup()                      endGroup()
+ endField("c")                   endField("c")                   endField("c")
+
+
+
+ 
=======================================================================================================================
+
+
+ writing into f:                 e is an empty array             e has nulls   
              e has empty objects
+ e : []                          e : [ null ]                    e : [ {} ]
+
+
+ startField("e")                 startField("e")                 
startField("e")             startField("e")
+ startGroup()                    startGroup()                    startGroup()  
              startGroup()
+ startField("list")                                              
startField("list")          startField("list")
+ startGroup()                                                    startGroup()  
              startGroup()
+ startField("element")                                                         
              startField("element")
+ startGroup()                                                                  
              startGroup()
+ startField("f")
+ addValue()
+ endField("f")
+ endGroup()                                                                    
              endGroup()
+ endField("element")                                                           
              endField("element")
+ endGroup()                                                      endGroup()    
              endGroup()
+ endField("list")                                                
endField("list")            endField("list")
+ endGroup()                      endGroup()                      endGroup()    
              endGroup()
+ endField("e")                   endField("e")                   endField("e") 
              endField("e")
+
+ *
+ *
+ */
+
 public class ParquetRecordLazyVisitor implements 
ILazyVisitablePointableVisitor<Void, Type> {
     private static final Logger LOGGER = LogManager.getLogger();
     private final MessageType schema;
@@ -80,24 +178,39 @@ public class ParquetRecordLazyVisitor implements 
ILazyVisitablePointableVisitor<
                     PRIMITIVE_TYPE_ERROR_FIELD, type.getName());
         }
         GroupType groupType = type.asGroupType();
+        int nonMissingChildren = 0;
         recordConsumer.startGroup();
 
         for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
             pointable.nextChild();
             AbstractLazyVisitablePointable child = 
pointable.getChildVisitablePointable();
             String columnName = 
fieldNamesDictionary.getOrCreateFieldNameIndex(pointable.getFieldName());
-
+            if (child.getTypeTag() == ATypeTag.MISSING) {
+                continue;
+            }
+            nonMissingChildren++;
             if (!groupType.containsField(columnName)) {
                 LOGGER.info("Group type: {} does not contain field in record 
type: {}",
                         LogRedactionUtil.userData(groupType.getName()), 
LogRedactionUtil.userData(columnName));
                 throw new 
HyracksDataException(ErrorCode.EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA, 
columnName,
                         groupType.getName());
             }
+
+            if (child.getTypeTag() == ATypeTag.NULL) {
+                continue;
+            }
+
             recordConsumer.startField(columnName, 
groupType.getFieldIndex(columnName));
             child.accept(this, groupType.getType(columnName));
             recordConsumer.endField(columnName, 
groupType.getFieldIndex(columnName));
         }
         recordConsumer.endGroup();
+        if (nonMissingChildren != groupType.getFieldCount()) {
+            LOGGER.info("Some Missing fields in group type: {}.", 
LogRedactionUtil.userData(groupType.toString()));
+            throw 
RuntimeDataException.create(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, 
"Non-Missing", "Missing",
+                    groupType.getName());
+        }
+
         return null;
     }
 
@@ -142,15 +255,22 @@ public class ParquetRecordLazyVisitor implements 
ILazyVisitablePointableVisitor<
             for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
                 pointable.nextChild();
                 AbstractLazyVisitablePointable child = 
pointable.getChildVisitablePointable();
-
+                if (child.getTypeTag() == ATypeTag.MISSING) {
+                    LOGGER.info("Missing value in list type: {}", 
LogRedactionUtil.userData(groupType.getName()));
+                    throw new 
HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, "Non-Missing", 
"Missing",
+                            groupType.getName());
+                }
                 recordConsumer.startGroup();
+                if (child.getTypeTag() == ATypeTag.NULL) {
+                    recordConsumer.endGroup();
+                    continue;
+                }
+
                 recordConsumer.startField(ELEMENT_FIELD, 
listType.getFieldIndex(ELEMENT_FIELD));
                 child.accept(this, listType.getType(ELEMENT_FIELD));
                 recordConsumer.endField(ELEMENT_FIELD, 
listType.getFieldIndex(ELEMENT_FIELD));
                 recordConsumer.endGroup();
-
             }
-
             recordConsumer.endField(LIST_FIELD, 
groupType.getFieldIndex(LIST_FIELD));
         }
 
@@ -174,24 +294,34 @@ public class ParquetRecordLazyVisitor implements 
ILazyVisitablePointableVisitor<
             throws HyracksDataException {
         rec.set(valueReference);
         this.recordConsumer = recordConsumer;
+        int nonMissingChildren = 0;
 
         recordConsumer.startMessage();
         for (int i = 0; i < rec.getNumberOfChildren(); i++) {
             rec.nextChild();
             String columnName = 
fieldNamesDictionary.getOrCreateFieldNameIndex(rec.getFieldName());
             AbstractLazyVisitablePointable child = 
rec.getChildVisitablePointable();
-
+            if (child.getTypeTag() == ATypeTag.MISSING) {
+                continue;
+            }
+            nonMissingChildren++;
             if (!schema.containsField(columnName)) {
                 LOGGER.info("Schema: {} does not contain field: {}", 
LogRedactionUtil.userData(schema.toString()),
                         LogRedactionUtil.userData(columnName));
-                throw new 
HyracksDataException(ErrorCode.EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA, 
columnName,
-                        schema.getName());
+                throw new 
HyracksDataException(ErrorCode.EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA, 
columnName, "root");
+            }
+            if (child.getTypeTag() == ATypeTag.NULL) {
+                continue;
             }
-
             recordConsumer.startField(columnName, 
schema.getFieldIndex(columnName));
             child.accept(this, schema.getType(columnName));
             recordConsumer.endField(columnName, 
schema.getFieldIndex(columnName));
         }
+        if (nonMissingChildren != schema.getFieldCount()) {
+            LOGGER.info("Some Missing fields in group type: {}.", 
LogRedactionUtil.userData(schema.toString()));
+            throw 
RuntimeDataException.create(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, 
"Non-Missing", "Missing",
+                    "root");
+        }
         recordConsumer.endMessage();
     }
 
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
index 2278372b4c..3f0de608d1 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
@@ -48,6 +48,7 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
     private final RecordLazyVisitablePointable rec;
     private final FieldNamesDictionary fieldNamesDictionary;
     private final static String SCHEMA_NAME = "asterix_schema";
+    private boolean foundMissing = false;
 
     public ParquetSchemaLazyVisitor(IAType typeInfo) {
         this.fieldNamesDictionary = new FieldNamesDictionary();
@@ -66,12 +67,11 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
         if (schemaNode.getType() == null) {
             schemaNode.setType(new ParquetSchemaTree.RecordType());
         }
-        if (!(schemaNode.getType() instanceof ParquetSchemaTree.RecordType)) {
+        if (!(schemaNode.getType() instanceof ParquetSchemaTree.RecordType 
recordType)) {
             LOGGER.info("Incompatible type found in record: {} and {}",
                     LogRedactionUtil.userData(schemaNode.toString()), 
pointable.getTypeTag());
             throw 
RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
-        ParquetSchemaTree.RecordType recordType = 
(ParquetSchemaTree.RecordType) schemaNode.getType();
         for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
             pointable.nextChild();
             AbstractLazyVisitablePointable child = 
pointable.getChildVisitablePointable();
@@ -83,6 +83,7 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
                 childType = new ParquetSchemaTree.SchemaNode();
                 recordType.add(childColumnName, childType);
             }
+            // Can optimize by reducing new object creation
             child.accept(this, childType);
         }
         return null;
@@ -91,6 +92,7 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
     @Override
     public Void visit(AbstractListLazyVisitablePointable pointable, 
ParquetSchemaTree.SchemaNode schemaNode)
             throws HyracksDataException {
+
         if (schemaNode.getType() == null) {
             schemaNode.setType(new ParquetSchemaTree.ListType());
         }
@@ -102,6 +104,11 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
         for (int i = 0; i < numChildren; i++) {
             pointable.nextChild();
             AbstractLazyVisitablePointable child = 
pointable.getChildVisitablePointable();
+
+            if(child.getTypeTag()==ATypeTag.MISSING) {
+                throw 
RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY);
+            }
+
             if (listType.isEmpty()) {
                 listType.setChild(new ParquetSchemaTree.SchemaNode());
             }
@@ -111,36 +118,57 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
     }
 
     @Override
-    public Void visit(FlatLazyVisitablePointable pointable, 
ParquetSchemaTree.SchemaNode schemaNode)
+    public Void visit(FlatLazyVisitablePointable 
pointable,ParquetSchemaTree.SchemaNode schemaNode)
             throws HyracksDataException {
+        if(pointable.getTypeTag() == ATypeTag.NULL) {
+            return  null;
+        }
+
         if (schemaNode.getType() == null) {
+            if (pointable.getTypeTag() == ATypeTag.MISSING)
+            {
+                foundMissing = true;
+                schemaNode.setType(new 
ParquetSchemaTree.FlatType(ATypeTag.MISSING));
+                return null;
+            }
             if 
(!AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP.containsKey(pointable.getTypeTag()))
 {
                 throw 
RuntimeDataException.create(TYPE_UNSUPPORTED_PARQUET_WRITE, 
pointable.getTypeTag());
             }
             schemaNode.setType(new 
ParquetSchemaTree.FlatType(pointable.getTypeTag()));
             return null;
         }
-        if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType)) {
+        if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType 
flatType)) {
             LOGGER.info("Incompatible type found: {} and {}", 
LogRedactionUtil.userData(schemaNode.toString()),
                     pointable.getTypeTag());
             throw 
RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
-        ParquetSchemaTree.FlatType flatType = (ParquetSchemaTree.FlatType) 
schemaNode.getType();
-
         if (!flatType.isCompatibleWith(pointable.getTypeTag())) {
             LOGGER.info("Incompatible type found: {} and {}", flatType, 
pointable.getTypeTag());
             throw 
RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
 
         flatType.coalesce(pointable.getTypeTag());
-
         return null;
     }
 
+    public void updateSchema(IValueReference valueReference, 
ParquetSchemaTree.SchemaNode previousSchema)
+            throws HyracksDataException {
+        rec.set(valueReference);
+        foundMissing = false;
+        rec.accept(this, previousSchema);
+        if (foundMissing) {
+            removeMissing(previousSchema);
+        }
+    }
+
     public ParquetSchemaTree.SchemaNode inferSchema(IValueReference 
valueReference) throws HyracksDataException {
         ParquetSchemaTree.SchemaNode schemaNode = new 
ParquetSchemaTree.SchemaNode();
         rec.set(valueReference);
+        foundMissing = false;
         rec.accept(this, schemaNode);
+        if (foundMissing) {
+            removeMissing(schemaNode);
+        }
         return schemaNode;
     }
 
@@ -156,4 +184,24 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
         return builder.named(SCHEMA_NAME);
     }
 
+    private static void removeMissing(ParquetSchemaTree.SchemaNode schemaNode) 
{
+        if (schemaNode.getType() == null) {
+         return;
+        }
+        if (schemaNode.getType() instanceof ParquetSchemaTree.RecordType 
recordType) {
+            recordType.getChildren().entrySet().removeIf(
+                    entry ->  (entry.getValue().getType() instanceof 
ParquetSchemaTree.FlatType flatType &&  flatType.getTypeTag() == 
ATypeTag.MISSING));
+
+            for (Map.Entry<String, ParquetSchemaTree.SchemaNode> entry : 
recordType.getChildren().entrySet()) {
+                removeMissing(entry.getValue());
+            }
+        }
+
+        if (schemaNode.getType() instanceof ParquetSchemaTree.ListType 
listType) {
+            if (listType.isEmpty()) {
+                return;
+            }
+            removeMissing(listType.getChild());
+        }
+    }
 }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java
index dae5295fef..5fb2ac7c6e 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaTree.java
@@ -115,6 +115,22 @@ public class ParquetSchemaTree {
             }
         }
 
+        public boolean isStrictParentOf(ATypeTag childTypeTag) {
+            if (!isHierarchical || 
!AsterixParquetTypeMap.HIERARCHIAL_TYPES.containsKey(childTypeTag)) {
+                return false;
+            }
+            return AsterixParquetTypeMap.HIERARCHIAL_TYPES.get(this.typeTag) > 
AsterixParquetTypeMap.HIERARCHIAL_TYPES
+                    .get(childTypeTag);
+        }
+
+        public boolean isStrictChildOf(ATypeTag parentTypeTag) {
+            if (!isHierarchical || 
!AsterixParquetTypeMap.HIERARCHIAL_TYPES.containsKey(parentTypeTag)) {
+                return false;
+            }
+            return AsterixParquetTypeMap.HIERARCHIAL_TYPES.get(this.typeTag) < 
AsterixParquetTypeMap.HIERARCHIAL_TYPES
+                    .get(parentTypeTag);
+        }
+
         public void coalesce(ATypeTag typeTag) {
             if (!isCompatibleWith(typeTag) || !isHierarchical) {
                 return;
@@ -129,6 +145,10 @@ public class ParquetSchemaTree {
         public PrimitiveType.PrimitiveTypeName getPrimitiveTypeName() {
             return AsterixParquetTypeMap.PRIMITIVE_TYPE_NAME_MAP.get(typeTag);
         }
+
+        public ATypeTag getTypeTag() {
+            return typeTag;
+        }
     }
 
     static class ListType extends AbstractType {
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java
index 28d4247f14..e484e84035 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java
@@ -52,28 +52,33 @@ public class SchemaCheckerLazyVisitor implements 
ISchemaChecker,
     public ISchemaChecker.SchemaComparisonType 
visit(RecordLazyVisitablePointable pointable,
             ParquetSchemaTree.SchemaNode schemaNode) throws 
HyracksDataException {
         if (schemaNode.getType() == null) {
-            return ISchemaChecker.SchemaComparisonType.GROWING;
+            return SchemaComparisonType.GROWING;
         }
 
-        if (!(schemaNode.getType() instanceof ParquetSchemaTree.RecordType)) {
+        if (!(schemaNode.getType() instanceof ParquetSchemaTree.RecordType 
recordType)) {
             return ISchemaChecker.SchemaComparisonType.CONFLICTING;
         }
 
-        ParquetSchemaTree.RecordType recordType = 
(ParquetSchemaTree.RecordType) schemaNode.getType();
         ISchemaChecker.SchemaComparisonType schemaComparisonType = 
ISchemaChecker.SchemaComparisonType.EQUIVALENT;
-
+        int nonMissingChildren = 0;
         for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
             pointable.nextChild();
             AbstractLazyVisitablePointable child = 
pointable.getChildVisitablePointable();
+            if(child.getTypeTag() == ATypeTag.MISSING){
+            continue;
+            }
+            nonMissingChildren++;
             String childColumnName = 
fieldNamesDictionary.getOrCreateFieldNameIndex(pointable.getFieldName());
             ParquetSchemaTree.SchemaNode childType = 
recordType.getChildren().get(childColumnName);
             if (childType == null) {
-                schemaComparisonType =
-                        ISchemaChecker.max(schemaComparisonType, 
ISchemaChecker.SchemaComparisonType.GROWING);
+                schemaComparisonType = 
ISchemaChecker.max(schemaComparisonType, SchemaComparisonType.CONFLICTING);
                 continue;
             }
             schemaComparisonType = ISchemaChecker.max(schemaComparisonType, 
child.accept(this, childType));
         }
+        if(nonMissingChildren!= recordType.getChildren().size()) {
+            return SchemaComparisonType.CONFLICTING;
+        }
         return schemaComparisonType;
     }
 
@@ -81,7 +86,7 @@ public class SchemaCheckerLazyVisitor implements 
ISchemaChecker,
     public ISchemaChecker.SchemaComparisonType 
visit(AbstractListLazyVisitablePointable pointable,
             ParquetSchemaTree.SchemaNode schemaNode) throws 
HyracksDataException {
         if (schemaNode.getType() == null) {
-            return ISchemaChecker.SchemaComparisonType.GROWING;
+            return SchemaComparisonType.GROWING;
         }
         if (!(schemaNode.getType() instanceof ParquetSchemaTree.ListType)) {
             return ISchemaChecker.SchemaComparisonType.CONFLICTING;
@@ -93,6 +98,9 @@ public class SchemaCheckerLazyVisitor implements 
ISchemaChecker,
         for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
             pointable.nextChild();
             AbstractLazyVisitablePointable child = 
pointable.getChildVisitablePointable();
+            if (child.getTypeTag() == ATypeTag.MISSING) {
+                throw new HyracksDataException("Missing values are not allowed 
in lists for parquet printing.");
+            }
             if (listType.isEmpty()) {
                 schemaComparisonType =
                         ISchemaChecker.max(schemaComparisonType, 
ISchemaChecker.SchemaComparisonType.GROWING);
@@ -104,22 +112,33 @@ public class SchemaCheckerLazyVisitor implements 
ISchemaChecker,
     }
 
     @Override
-    public ISchemaChecker.SchemaComparisonType 
visit(FlatLazyVisitablePointable pointable,
+    public ISchemaChecker.SchemaComparisonType 
visit(FlatLazyVisitablePointable currentValue,
             ParquetSchemaTree.SchemaNode schemaNode) throws 
HyracksDataException {
         if (schemaNode.getType() == null) {
-            return ISchemaChecker.SchemaComparisonType.GROWING;
+            return SchemaComparisonType.GROWING;
+        }
+        // SchemaNode.getTypeTag can never be MISSING here
+        if(currentValue.getTypeTag()==ATypeTag.NULL){
+            return SchemaComparisonType.EQUIVALENT;
         }
-        if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType)) {
+        if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType 
inferredType)) {
             return ISchemaChecker.SchemaComparisonType.CONFLICTING;
         }
 
-        ParquetSchemaTree.FlatType flatType = (ParquetSchemaTree.FlatType) 
schemaNode.getType();
-
-        if (!flatType.isCompatibleWith(pointable.getTypeTag())) {
+        if (inferredType.getTypeTag() == currentValue.getTypeTag()) {
+            return ISchemaChecker.SchemaComparisonType.EQUIVALENT;
+        }
+        if (!inferredType.isCompatibleWith(currentValue.getTypeTag())) {
             return ISchemaChecker.SchemaComparisonType.CONFLICTING;
         }
+        if(inferredType.isStrictChildOf(currentValue.getTypeTag())) {
+            return ISchemaChecker.SchemaComparisonType.GROWING;
+        }
+        if (inferredType.isStrictParentOf(currentValue.getTypeTag())) {
+            return ISchemaChecker.SchemaComparisonType.EQUIVALENT;
+        }
 
-        return ISchemaChecker.SchemaComparisonType.EQUIVALENT;
+        return SchemaComparisonType.CONFLICTING;
     }
 
     @Override

Reply via email to