>From Hussain Towaileb <[email protected]>:

Hussain Towaileb has submitted this change. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21323?usp=email )

Change subject: [NO ISSUE][EXT]: map the iceberg MAP type to array of key-value 
objects
......................................................................

[NO ISSUE][EXT]: map the iceberg MAP type to array of key-value objects

Ext-ref: MB-72280
Change-Id: I562fcb2c4c62770a67d2adb7c1f8dcaa1fca5d59
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21323
Reviewed-by: Hussain Towaileb <[email protected]>
Reviewed-by: Murtadha Hubail <[email protected]>
Tested-by: Hussain Towaileb <[email protected]>
Integration-Tests: Jenkins <[email protected]>
---
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.020.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.030.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.040.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.050.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.060.adm
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
6 files changed, 23 insertions(+), 20 deletions(-)

Approvals:
  Murtadha Hubail: Looks good to me, approved
  Hussain Towaileb: Looks good to me, but someone else must approve; Verified
  Jenkins: Verified




diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.020.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.020.adm
index d5f405e..cb3d36d 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.020.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.020.adm
@@ -1 +1 @@
-{ "binary_field": "AQIDBAUGBwg=", "fixed_field": "SGVsbG8gV29ybGQ=", 
"geometry_field": "AQIDBAUGBwgJCg==", "geography_field": "AQIDBAUGBwgJCg0=", 
"bool_field": true, "byte_field": 42, "short_field": 1000, "int_field": 42, 
"long_field": 9223372036854775807, "float_field": 3.14, "double_field": 
2.718281828459045, "decimal_field": 12345.6789, "string_field": "hello world", 
"varchar_field": "varchar value one", "char_field": "Hi", "uuid_field": 
uuid("550e8400-e29b-41d4-a716-446655440000"), "date_field": 19723, 
"time_field": 37230000, "timestamp_field": 1707000000000000, 
"timestamp_ntz_field": 1707048000000000, "timestamp_nano_field": 
1707000000000000000, "interval_ym_field": 14, "interval_dt_field": 37230000000, 
"struct_field": { "name": "Alice", "age": 30, "active": true }, "list_field": [ 
"a", "b", "c" ], "map_field": { "key1": "value1", "key2": "100" }, 
"variant_field": "string value", "unknown_field": null }
\ No newline at end of file
+{ "binary_field": "AQIDBAUGBwg=", "fixed_field": "SGVsbG8gV29ybGQ=", 
"geometry_field": "AQIDBAUGBwgJCg==", "geography_field": "AQIDBAUGBwgJCg0=", 
"bool_field": true, "byte_field": 42, "short_field": 1000, "int_field": 42, 
"long_field": 9223372036854775807, "float_field": 3.14, "double_field": 
2.718281828459045, "decimal_field": 12345.6789, "string_field": "hello world", 
"varchar_field": "varchar value one", "char_field": "Hi", "uuid_field": 
uuid("550e8400-e29b-41d4-a716-446655440000"), "date_field": 19723, 
"time_field": 37230000, "timestamp_field": 1707000000000000, 
"timestamp_ntz_field": 1707048000000000, "timestamp_nano_field": 
1707000000000000000, "interval_ym_field": 14, "interval_dt_field": 37230000000, 
"struct_field": { "name": "Alice", "age": 30, "active": true }, "list_field": [ 
"a", "b", "c" ], "map_field": [ { "key": "key1", "value": "value1" }, { "key": 
"key2", "value": "100" } ], "variant_field": "string value", "unknown_field": 
null }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.030.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.030.adm
index 4dfcb3a..618e682 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.030.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.030.adm
@@ -1 +1 @@
-{ "binary_field": "AQIDBAUGBwg=", "fixed_field": "SGVsbG8gV29ybGQ=", 
"geometry_field": "AQIDBAUGBwgJCg==", "geography_field": "AQIDBAUGBwgJCg0=", 
"bool_field": true, "byte_field": 42, "short_field": 1000, "int_field": 42, 
"long_field": 9223372036854775807, "float_field": 3.14, "double_field": 
2.718281828459045, "decimal_field": 12345.6789, "string_field": "hello world", 
"varchar_field": "varchar value one", "char_field": "Hi", "uuid_field": 
uuid("550e8400-e29b-41d4-a716-446655440000"), "date_field": date("2024-01-01"), 
"time_field": time("10:20:30.000"), "timestamp_field": 
datetime("2024-02-03T22:40:00.000"), "timestamp_ntz_field": 
datetime("2024-02-04T12:00:00.000"), "timestamp_nano_field": 
datetime("2024-02-03T22:40:00.000"), "interval_ym_field": 14, 
"interval_dt_field": 37230000000, "struct_field": { "name": "Alice", "age": 30, 
"active": true }, "list_field": [ "a", "b", "c" ], "map_field": { "key1": 
"value1", "key2": "100" }, "variant_field": "string value", "unknown_field": 
null }
\ No newline at end of file
+{ "binary_field": "AQIDBAUGBwg=", "fixed_field": "SGVsbG8gV29ybGQ=", 
"geometry_field": "AQIDBAUGBwgJCg==", "geography_field": "AQIDBAUGBwgJCg0=", 
"bool_field": true, "byte_field": 42, "short_field": 1000, "int_field": 42, 
"long_field": 9223372036854775807, "float_field": 3.14, "double_field": 
2.718281828459045, "decimal_field": 12345.6789, "string_field": "hello world", 
"varchar_field": "varchar value one", "char_field": "Hi", "uuid_field": 
uuid("550e8400-e29b-41d4-a716-446655440000"), "date_field": date("2024-01-01"), 
"time_field": time("10:20:30.000"), "timestamp_field": 
datetime("2024-02-03T22:40:00.000"), "timestamp_ntz_field": 
datetime("2024-02-04T12:00:00.000"), "timestamp_nano_field": 
datetime("2024-02-03T22:40:00.000"), "interval_ym_field": 14, 
"interval_dt_field": 37230000000, "struct_field": { "name": "Alice", "age": 30, 
"active": true }, "list_field": [ "a", "b", "c" ], "map_field": [ { "key": 
"key1", "value": "value1" }, { "key": "key2", "value": "100" } ], 
"variant_field": "string value", "unknown_field": null }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.040.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.040.adm
index 4dfcb3a..618e682 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.040.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.040.adm
@@ -1 +1 @@
-{ "binary_field": "AQIDBAUGBwg=", "fixed_field": "SGVsbG8gV29ybGQ=", 
"geometry_field": "AQIDBAUGBwgJCg==", "geography_field": "AQIDBAUGBwgJCg0=", 
"bool_field": true, "byte_field": 42, "short_field": 1000, "int_field": 42, 
"long_field": 9223372036854775807, "float_field": 3.14, "double_field": 
2.718281828459045, "decimal_field": 12345.6789, "string_field": "hello world", 
"varchar_field": "varchar value one", "char_field": "Hi", "uuid_field": 
uuid("550e8400-e29b-41d4-a716-446655440000"), "date_field": date("2024-01-01"), 
"time_field": time("10:20:30.000"), "timestamp_field": 
datetime("2024-02-03T22:40:00.000"), "timestamp_ntz_field": 
datetime("2024-02-04T12:00:00.000"), "timestamp_nano_field": 
datetime("2024-02-03T22:40:00.000"), "interval_ym_field": 14, 
"interval_dt_field": 37230000000, "struct_field": { "name": "Alice", "age": 30, 
"active": true }, "list_field": [ "a", "b", "c" ], "map_field": { "key1": 
"value1", "key2": "100" }, "variant_field": "string value", "unknown_field": 
null }
\ No newline at end of file
+{ "binary_field": "AQIDBAUGBwg=", "fixed_field": "SGVsbG8gV29ybGQ=", 
"geometry_field": "AQIDBAUGBwgJCg==", "geography_field": "AQIDBAUGBwgJCg0=", 
"bool_field": true, "byte_field": 42, "short_field": 1000, "int_field": 42, 
"long_field": 9223372036854775807, "float_field": 3.14, "double_field": 
2.718281828459045, "decimal_field": 12345.6789, "string_field": "hello world", 
"varchar_field": "varchar value one", "char_field": "Hi", "uuid_field": 
uuid("550e8400-e29b-41d4-a716-446655440000"), "date_field": date("2024-01-01"), 
"time_field": time("10:20:30.000"), "timestamp_field": 
datetime("2024-02-03T22:40:00.000"), "timestamp_ntz_field": 
datetime("2024-02-04T12:00:00.000"), "timestamp_nano_field": 
datetime("2024-02-03T22:40:00.000"), "interval_ym_field": 14, 
"interval_dt_field": 37230000000, "struct_field": { "name": "Alice", "age": 30, 
"active": true }, "list_field": [ "a", "b", "c" ], "map_field": [ { "key": 
"key1", "value": "value1" }, { "key": "key2", "value": "100" } ], 
"variant_field": "string value", "unknown_field": null }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.050.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.050.adm
index a6c1db5..5d49e61 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.050.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.050.adm
@@ -1 +1 @@
-{ "binary_field": "AQIDBAUGBwg=", "fixed_field": "SGVsbG8gV29ybGQ=", 
"geometry_field": "AQIDBAUGBwgJCg==", "geography_field": "AQIDBAUGBwgJCg0=", 
"bool_field": true, "byte_field": 42, "short_field": 1000, "int_field": 42, 
"long_field": 9223372036854775807, "float_field": 3.14, "double_field": 
2.718281828459045, "decimal_field": 12345.6789, "string_field": "hello world", 
"varchar_field": "varchar value one", "char_field": "Hi", "uuid_field": 
uuid("550e8400-e29b-41d4-a716-446655440000"), "date_field": date("2024-01-01"), 
"time_field": time("10:20:30.000"), "timestamp_field": 
datetime("2024-02-03T23:40:00.000"), "timestamp_ntz_field": 
datetime("2024-02-04T12:00:00.000"), "timestamp_nano_field": 
datetime("2024-02-03T23:40:00.000"), "interval_ym_field": 14, 
"interval_dt_field": 37230000000, "struct_field": { "name": "Alice", "age": 30, 
"active": true }, "list_field": [ "a", "b", "c" ], "map_field": { "key1": 
"value1", "key2": "100" }, "variant_field": "string value", "unknown_field": 
null }
\ No newline at end of file
+{ "binary_field": "AQIDBAUGBwg=", "fixed_field": "SGVsbG8gV29ybGQ=", 
"geometry_field": "AQIDBAUGBwgJCg==", "geography_field": "AQIDBAUGBwgJCg0=", 
"bool_field": true, "byte_field": 42, "short_field": 1000, "int_field": 42, 
"long_field": 9223372036854775807, "float_field": 3.14, "double_field": 
2.718281828459045, "decimal_field": 12345.6789, "string_field": "hello world", 
"varchar_field": "varchar value one", "char_field": "Hi", "uuid_field": 
uuid("550e8400-e29b-41d4-a716-446655440000"), "date_field": date("2024-01-01"), 
"time_field": time("10:20:30.000"), "timestamp_field": 
datetime("2024-02-03T23:40:00.000"), "timestamp_ntz_field": 
datetime("2024-02-04T12:00:00.000"), "timestamp_nano_field": 
datetime("2024-02-03T23:40:00.000"), "interval_ym_field": 14, 
"interval_dt_field": 37230000000, "struct_field": { "name": "Alice", "age": 30, 
"active": true }, "list_field": [ "a", "b", "c" ], "map_field": [ { "key": 
"key1", "value": "value1" }, { "key": "key2", "value": "100" } ], 
"variant_field": "string value", "unknown_field": null }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.060.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.060.adm
index a6c1db5..5d49e61 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.060.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/iceberg/all-data-types/result.060.adm
@@ -1 +1 @@
-{ "binary_field": "AQIDBAUGBwg=", "fixed_field": "SGVsbG8gV29ybGQ=", 
"geometry_field": "AQIDBAUGBwgJCg==", "geography_field": "AQIDBAUGBwgJCg0=", 
"bool_field": true, "byte_field": 42, "short_field": 1000, "int_field": 42, 
"long_field": 9223372036854775807, "float_field": 3.14, "double_field": 
2.718281828459045, "decimal_field": 12345.6789, "string_field": "hello world", 
"varchar_field": "varchar value one", "char_field": "Hi", "uuid_field": 
uuid("550e8400-e29b-41d4-a716-446655440000"), "date_field": date("2024-01-01"), 
"time_field": time("10:20:30.000"), "timestamp_field": 
datetime("2024-02-03T23:40:00.000"), "timestamp_ntz_field": 
datetime("2024-02-04T12:00:00.000"), "timestamp_nano_field": 
datetime("2024-02-03T23:40:00.000"), "interval_ym_field": 14, 
"interval_dt_field": 37230000000, "struct_field": { "name": "Alice", "age": 30, 
"active": true }, "list_field": [ "a", "b", "c" ], "map_field": { "key1": 
"value1", "key2": "100" }, "variant_field": "string value", "unknown_field": 
null }
\ No newline at end of file
+{ "binary_field": "AQIDBAUGBwg=", "fixed_field": "SGVsbG8gV29ybGQ=", 
"geometry_field": "AQIDBAUGBwgJCg==", "geography_field": "AQIDBAUGBwgJCg0=", 
"bool_field": true, "byte_field": 42, "short_field": 1000, "int_field": 42, 
"long_field": 9223372036854775807, "float_field": 3.14, "double_field": 
2.718281828459045, "decimal_field": 12345.6789, "string_field": "hello world", 
"varchar_field": "varchar value one", "char_field": "Hi", "uuid_field": 
uuid("550e8400-e29b-41d4-a716-446655440000"), "date_field": date("2024-01-01"), 
"time_field": time("10:20:30.000"), "timestamp_field": 
datetime("2024-02-03T23:40:00.000"), "timestamp_ntz_field": 
datetime("2024-02-04T12:00:00.000"), "timestamp_nano_field": 
datetime("2024-02-03T23:40:00.000"), "interval_ym_field": 14, 
"interval_dt_field": 37230000000, "struct_field": { "name": "Alice", "age": 30, 
"active": true }, "list_field": [ "a", "b", "c" ], "map_field": [ { "key": 
"key1", "value": "value1" }, { "key": "key2", "value": "100" } ], 
"variant_field": "string value", "unknown_field": null }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
index aaf37d0..7ed1b24 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
@@ -232,28 +232,31 @@
     }

     private void parseMap(Types.MapType mapSchema, Map<?, ?> map, DataOutput 
out) throws IOException {
-        IMutableValueStorage valueBuffer = parserContext.enterObject();
+        final IMutableValueStorage item = parserContext.enterCollection();
+        final IMutableValueStorage valueBuffer = parserContext.enterObject();
         IARecordBuilder objectBuilder = 
parserContext.getObjectBuilder(DefaultOpenFieldType.NESTED_OPEN_RECORD_TYPE);
-        valueEmbedder.enterObject();
+        IAsterixListBuilder listBuilder =
+                
parserContext.getCollectionBuilder(DefaultOpenFieldType.NESTED_OPEN_AORDERED_LIST_TYPE);

         Type keyType = mapSchema.keyType();
         Type valueType = mapSchema.valueType();

-        // TODO: we can't support non-string keys since we map MAP-TYPE to 
OBJECT-TYPE in AsterixDB
-        if (keyType != Types.StringType.get()) {
-            throw new RuntimeDataException(ErrorCode.TYPE_UNSUPPORTED, 
"Iceberg Parser", "MAP with non-string keys");
-        }
-
         for (Map.Entry<?, ?> entry : map.entrySet()) {
-            String fieldName = (String) entry.getKey();
-            Object fieldValue = entry.getValue();
-            parseValueAndAddObjectField(valueBuffer, objectBuilder, valueType, 
fieldName, fieldValue);
+            objectBuilder.reset(DefaultOpenFieldType.NESTED_OPEN_RECORD_TYPE);
+            valueBuffer.reset();
+            parseValue(keyType, entry.getKey(), valueBuffer.getDataOutput());
+            
objectBuilder.addField(parserContext.getSerializedFieldName("key"), 
valueBuffer);
+            valueBuffer.reset();
+            parseValue(valueType, entry.getValue(), 
valueBuffer.getDataOutput());
+            
objectBuilder.addField(parserContext.getSerializedFieldName("value"), 
valueBuffer);
+            item.reset();
+            objectBuilder.write(item.getDataOutput(), true);
+            listBuilder.addItem(item);
         }

-        embedMissingValues(objectBuilder, parserContext, valueEmbedder);
-        objectBuilder.write(out, true);
-        valueEmbedder.exitObject();
+        listBuilder.write(out, true);
         parserContext.exitObject(valueBuffer, null, objectBuilder);
+        parserContext.exitCollection(item, listBuilder);
     }

     private void parseValueAndAddObjectField(IMutableValueStorage valueBuffer, 
IARecordBuilder objectBuilder,
@@ -467,8 +470,8 @@
                 ensureDecimalToDoubleEnabled(type, parserContext);
                 yield ATypeTag.DOUBLE;
             }
-            case STRUCT, MAP -> ATypeTag.OBJECT;
-            case LIST -> ATypeTag.ARRAY;
+            case STRUCT -> ATypeTag.OBJECT;
+            case LIST, MAP -> ATypeTag.ARRAY;
             case DATE -> {
                 if (parserContext.isDateAsInt()) {
                     yield ATypeTag.INTEGER;

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21323?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: asterixdb
Gerrit-Branch: lumina
Gerrit-Change-Id: I562fcb2c4c62770a67d2adb7c1f8dcaa1fca5d59
Gerrit-Change-Number: 21323
Gerrit-PatchSet: 2
Gerrit-Owner: Hussain Towaileb <[email protected]>
Gerrit-Reviewer: Anon. E. Moose #1000171
Gerrit-Reviewer: Hussain Towaileb <[email protected]>
Gerrit-Reviewer: Jenkins <[email protected]>
Gerrit-Reviewer: Murtadha Hubail <[email protected]>

Reply via email to