This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit e5a535fa562f2e3b73c0e79d0a5c1ffafa049e31
Author: preetham0202 <[email protected]>
AuthorDate: Sun Jul 27 01:45:19 2025 +0530

    [ASTERIXDB-3392] Fix Hdfs tests
    
     - user model changes: no
     - storage format changes: no
     - interface changes: no
    
    Ext-ref: MB-66710
    Change-Id: I60fcbc11db2a9b1b4973bdcdb803db8481863071
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20144
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Preetham Poluparthi <[email protected]>
    Reviewed-by: Ali Alsuliman <[email protected]>
---
 .../parquet-tweet/parquet-tweet.03.update.sqlpp    | 181 ---------------------
 .../parquet-utf8/parquet-utf8.03.update.sqlpp      |   1 -
 .../copy-to-hdfs/parquet-utf8/parquet-utf8.05.adm  |  16 +-
 .../resources/runtimets/testsuite_sqlpp_hdfs.xml   |   2 +
 .../printer/parquet/ParquetRecordLazyVisitor.java  |   4 +-
 .../printer/parquet/ParquetSchemaLazyVisitor.java  |  21 +--
 .../printer/parquet/SchemaCheckerLazyVisitor.java  |  13 +-
 7 files changed, 29 insertions(+), 209 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-tweet/parquet-tweet.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-tweet/parquet-tweet.03.update.sqlpp
index 9bae74bfec..2b265dcccc 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-tweet/parquet-tweet.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-tweet/parquet-tweet.03.update.sqlpp
@@ -24,187 +24,6 @@ COPY (
 ) toWriter
 TO hdfs
 PATH ("copy-to-result", "parquet-tweet")
-TYPE ( {
-               coordinates: {
-                 coordinates: [
-                   double
-                 ],
-                 `type` : string
-               },
-               created_at: string,
-               entities: {
-                 urls: [
-                   {
-                     display_url: string,
-                     expanded_url: string,
-                     indices: [
-                       int
-                     ],
-                     url: string
-                   }
-                 ],
-                 user_mentions: [
-                   {
-                     id: int,
-                     id_str: string,
-                     indices: [
-                       int
-                     ],
-                     name: string,
-                     screen_name: string
-                   }
-                 ]
-               },
-               favorite_count: int,
-               favorited: boolean,
-               filter_level: string,
-               geo: {
-                 coordinates: [
-                   double
-                 ],
-                 `type`: string
-               },
-               id: string,
-               id_str: string,
-               in_reply_to_screen_name: string,
-               in_reply_to_status_id: int,
-               in_reply_to_status_id_str: string,
-               in_reply_to_user_id: int,
-               in_reply_to_user_id_str: string,
-               is_quote_status: boolean,
-               lang: string,
-               place: {
-                 bounding_box: {
-                   coordinates: [
-                     [
-                       [
-                         double
-                       ]
-                     ]
-                   ],
-                   `type`: string
-                 },
-                 country: string,
-                 country_code: string,
-                 full_name: string,
-                 id: string,
-                 name: string,
-                 place_type: string,
-                 url: string
-               },
-               possibly_sensitive: boolean,
-               quoted_status: {
-                 created_at: string,
-                 entities: {
-                   user_mentions: [
-                     {
-                       id: int,
-                       id_str: string,
-                       indices: [
-                         int
-                       ],
-                       name: string,
-                       screen_name: string
-                     }
-                   ]
-                 },
-                 favorite_count: int,
-                 favorited: boolean,
-                 filter_level: string,
-                 id: int,
-                 id_str: string,
-                 in_reply_to_screen_name: string,
-                 in_reply_to_status_id: int,
-                 in_reply_to_status_id_str: string,
-                 in_reply_to_user_id: int,
-                 in_reply_to_user_id_str: string,
-                 is_quote_status: boolean,
-                 lang: string,
-                 retweet_count: int,
-                 retweeted: boolean,
-                 source: string,
-                 text: string,
-                 truncated: boolean,
-                 user: {
-                   contributors_enabled: boolean,
-                   created_at: string,
-                   default_profile: boolean,
-                   default_profile_image: boolean,
-                   description: string,
-                   favourites_count: int,
-                   followers_count: int,
-                   friends_count: int,
-                   geo_enabled: boolean,
-                   id: int,
-                   id_str: string,
-                   is_translator: boolean,
-                   lang: string,
-                   listed_count: int,
-                   name: string,
-                   profile_background_color: string,
-                   profile_background_image_url: string,
-                   profile_background_image_url_https: string,
-                   profile_background_tile: boolean,
-                   profile_banner_url: string,
-                   profile_image_url: string,
-                   profile_image_url_https: string,
-                   profile_link_color: string,
-                   profile_sidebar_border_color: string,
-                   profile_sidebar_fill_color: string,
-                   profile_text_color: string,
-                   profile_use_background_image: boolean,
-                   protected: boolean,
-                   screen_name: string,
-                   statuses_count: int,
-                   verified: boolean
-                 }
-               },
-               quoted_status_id: int,
-               quoted_status_id_str: string,
-               retweet_count: int,
-               retweeted: boolean,
-               source: string,
-               text: string,
-               timestamp_ms: string,
-               truncated: boolean,
-               user: {
-                 contributors_enabled: boolean,
-                 created_at: string,
-                 default_profile: boolean,
-                 default_profile_image: boolean,
-                 description: string,
-                 favourites_count: int,
-                 followers_count: int,
-                 friends_count: int,
-                 geo_enabled: boolean,
-                 id: int,
-                 id_str: string,
-                 is_translator: boolean,
-                 lang: string,
-                 listed_count: int,
-                 location: string,
-                 name: string,
-                 profile_background_color: string,
-                 profile_background_image_url: string,
-                 profile_background_image_url_https: string,
-                 profile_background_tile: boolean,
-                 profile_banner_url: string,
-                 profile_image_url: string,
-                 profile_image_url_https: string,
-                 profile_link_color: string,
-                 profile_sidebar_border_color: string,
-                 profile_sidebar_fill_color: string,
-                 profile_text_color: string,
-                 profile_use_background_image: boolean,
-                 protected: boolean,
-                 screen_name: string,
-                 statuses_count: int,
-                 time_zone: string,
-                 url: string,
-                 utc_offset: int,
-                 verified: boolean
-               }
-             } )
 WITH {
     "hdfs":"hdfs://127.0.0.1:31888",
     "format":"parquet"
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-utf8/parquet-utf8.03.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-utf8/parquet-utf8.03.update.sqlpp
index 79b2ddabd7..4d84bed15d 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-utf8/parquet-utf8.03.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to-hdfs/parquet-utf8/parquet-utf8.03.update.sqlpp
@@ -24,7 +24,6 @@ COPY (
 ) toWriter
 TO hdfs
 PATH ("copy-to-result", "parquet-utf8")
-TYPE ( { comment:string, id:bigint, name:string } )
 WITH {
     "hdfs":"hdfs://127.0.0.1:31888",
     "format":"parquet"
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-utf8/parquet-utf8.05.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-utf8/parquet-utf8.05.adm
index c60145d7f4..3ea2e8d765 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-utf8/parquet-utf8.05.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to-hdfs/parquet-utf8/parquet-utf8.05.adm
@@ -1,8 +1,8 @@
-{ "id": 1, "name": "John" }
-{ "id": 2, "name": "Abel" }
-{ "id": 3, "name": "Sandy" }
-{ "id": 4, "name": "Alex" }
-{ "id": 5, "name": "Mike" }
-{ "id": 6, "name": "Tom" }
-{ "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا", "id": 7, "name": "Jerry" }
-{ "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. 
حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 
😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee 
☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 
= 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. 
Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffe
 e ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حس [...]
+{ "name": "John", "id": 1 }
+{ "name": "Abel", "id": 2 }
+{ "name": "Sandy", "id": 3 }
+{ "name": "Alex", "id": 4 }
+{ "name": "Mike", "id": 5 }
+{ "name": "Tom", "id": 6 }
+{ "name": "Jerry", "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا", "id": 7 }
+{ "name": "William", "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 
😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee 
☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 
= 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. 
Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. 
حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢
 😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢 [...]
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
index c163fd2a04..05e871d32a 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
@@ -263,6 +263,8 @@
         <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
         <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
         <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
+        <expected-error>HYR0132: Extra field in the result, field 'c' does not 
exist at 'root' in the schema</expected-error>
+        <expected-error>ASX0072: Parquet does not support arrays containing 
mixed data types</expected-error>
         <source-location>false</source-location>
       </compilation-unit>
     </test-case>
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
index d57d3c7e11..cc6f98fbbd 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
@@ -184,11 +184,11 @@ public class ParquetRecordLazyVisitor implements 
ILazyVisitablePointableVisitor<
         for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
             pointable.nextChild();
             AbstractLazyVisitablePointable child = 
pointable.getChildVisitablePointable();
-            String columnName = 
fieldNamesDictionary.getOrCreateFieldNameIndex(pointable.getFieldName());
             if (child.getTypeTag() == ATypeTag.MISSING) {
                 continue;
             }
             nonMissingChildren++;
+            String columnName = 
fieldNamesDictionary.getOrCreateFieldNameIndex(pointable.getFieldName());
             if (!groupType.containsField(columnName)) {
                 LOGGER.info("Group type: {} does not contain field in record 
type: {}",
                         LogRedactionUtil.userData(groupType.getName()), 
LogRedactionUtil.userData(columnName));
@@ -299,12 +299,12 @@ public class ParquetRecordLazyVisitor implements 
ILazyVisitablePointableVisitor<
         recordConsumer.startMessage();
         for (int i = 0; i < rec.getNumberOfChildren(); i++) {
             rec.nextChild();
-            String columnName = 
fieldNamesDictionary.getOrCreateFieldNameIndex(rec.getFieldName());
             AbstractLazyVisitablePointable child = 
rec.getChildVisitablePointable();
             if (child.getTypeTag() == ATypeTag.MISSING) {
                 continue;
             }
             nonMissingChildren++;
+            String columnName = 
fieldNamesDictionary.getOrCreateFieldNameIndex(rec.getFieldName());
             if (!schema.containsField(columnName)) {
                 LOGGER.info("Schema: {} does not contain field: {}", 
LogRedactionUtil.userData(schema.toString()),
                         LogRedactionUtil.userData(columnName));
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
index 3f0de608d1..d43947c449 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
@@ -97,7 +97,8 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
             schemaNode.setType(new ParquetSchemaTree.ListType());
         }
         if (!(schemaNode.getType() instanceof ParquetSchemaTree.ListType 
listType)) {
-            LOGGER.info("Incompatible type found in list: {} and {}" 
,LogRedactionUtil.userData(schemaNode.toString()) ,pointable.getTypeTag());
+            LOGGER.info("Incompatible type found in list: {} and {}", 
LogRedactionUtil.userData(schemaNode.toString()),
+                    pointable.getTypeTag());
             throw 
RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY);
         }
         int numChildren = pointable.getNumberOfChildren();
@@ -105,7 +106,7 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
             pointable.nextChild();
             AbstractLazyVisitablePointable child = 
pointable.getChildVisitablePointable();
 
-            if(child.getTypeTag()==ATypeTag.MISSING) {
+            if (child.getTypeTag() == ATypeTag.MISSING) {
                 throw 
RuntimeDataException.create(PARQUET_UNSUPPORTED_MIXED_TYPE_ARRAY);
             }
 
@@ -118,15 +119,14 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
     }
 
     @Override
-    public Void visit(FlatLazyVisitablePointable 
pointable,ParquetSchemaTree.SchemaNode schemaNode)
+    public Void visit(FlatLazyVisitablePointable pointable, 
ParquetSchemaTree.SchemaNode schemaNode)
             throws HyracksDataException {
-        if(pointable.getTypeTag() == ATypeTag.NULL) {
-            return  null;
+        if (pointable.getTypeTag() == ATypeTag.NULL) {
+            return null;
         }
 
         if (schemaNode.getType() == null) {
-            if (pointable.getTypeTag() == ATypeTag.MISSING)
-            {
+            if (pointable.getTypeTag() == ATypeTag.MISSING) {
                 foundMissing = true;
                 schemaNode.setType(new 
ParquetSchemaTree.FlatType(ATypeTag.MISSING));
                 return null;
@@ -186,11 +186,12 @@ public class ParquetSchemaLazyVisitor implements 
ILazyVisitablePointableVisitor<
 
     private static void removeMissing(ParquetSchemaTree.SchemaNode schemaNode) 
{
         if (schemaNode.getType() == null) {
-         return;
+            return;
         }
         if (schemaNode.getType() instanceof ParquetSchemaTree.RecordType 
recordType) {
-            recordType.getChildren().entrySet().removeIf(
-                    entry ->  (entry.getValue().getType() instanceof 
ParquetSchemaTree.FlatType flatType &&  flatType.getTypeTag() == 
ATypeTag.MISSING));
+            recordType.getChildren().entrySet()
+                    .removeIf(entry -> (entry.getValue().getType() instanceof 
ParquetSchemaTree.FlatType flatType
+                            && flatType.getTypeTag() == ATypeTag.MISSING));
 
             for (Map.Entry<String, ParquetSchemaTree.SchemaNode> entry : 
recordType.getChildren().entrySet()) {
                 removeMissing(entry.getValue());
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java
index e484e84035..6e03144ac9 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java
@@ -64,19 +64,18 @@ public class SchemaCheckerLazyVisitor implements 
ISchemaChecker,
         for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
             pointable.nextChild();
             AbstractLazyVisitablePointable child = 
pointable.getChildVisitablePointable();
-            if(child.getTypeTag() == ATypeTag.MISSING){
-            continue;
+            if (child.getTypeTag() == ATypeTag.MISSING) {
+                continue;
             }
             nonMissingChildren++;
             String childColumnName = 
fieldNamesDictionary.getOrCreateFieldNameIndex(pointable.getFieldName());
             ParquetSchemaTree.SchemaNode childType = 
recordType.getChildren().get(childColumnName);
             if (childType == null) {
-                schemaComparisonType = 
ISchemaChecker.max(schemaComparisonType, SchemaComparisonType.CONFLICTING);
-                continue;
+                return SchemaComparisonType.CONFLICTING;
             }
             schemaComparisonType = ISchemaChecker.max(schemaComparisonType, 
child.accept(this, childType));
         }
-        if(nonMissingChildren!= recordType.getChildren().size()) {
+        if (nonMissingChildren != recordType.getChildren().size()) {
             return SchemaComparisonType.CONFLICTING;
         }
         return schemaComparisonType;
@@ -118,7 +117,7 @@ public class SchemaCheckerLazyVisitor implements 
ISchemaChecker,
             return SchemaComparisonType.GROWING;
         }
         // SchemaNode.getTypeTag can never be MISSING here
-        if(currentValue.getTypeTag()==ATypeTag.NULL){
+        if (currentValue.getTypeTag() == ATypeTag.NULL) {
             return SchemaComparisonType.EQUIVALENT;
         }
         if (!(schemaNode.getType() instanceof ParquetSchemaTree.FlatType 
inferredType)) {
@@ -131,7 +130,7 @@ public class SchemaCheckerLazyVisitor implements 
ISchemaChecker,
         if (!inferredType.isCompatibleWith(currentValue.getTypeTag())) {
             return ISchemaChecker.SchemaComparisonType.CONFLICTING;
         }
-        if(inferredType.isStrictChildOf(currentValue.getTypeTag())) {
+        if (inferredType.isStrictChildOf(currentValue.getTypeTag())) {
             return ISchemaChecker.SchemaComparisonType.GROWING;
         }
         if (inferredType.isStrictParentOf(currentValue.getTypeTag())) {

Reply via email to