This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new be4f740ee5 Fix the issue that map flatten shouldn't remove the map 
field from the record (#13243)
be4f740ee5 is described below

commit be4f740ee57a9798ecb3aeb9ad23541b89d0111d
Author: Xiang Fu <[email protected]>
AuthorDate: Tue May 28 15:55:56 2024 -0700

    Fix the issue that map flatten shouldn't remove the map field from the 
record (#13243)
---
 .../framework/SegmentProcessorFrameworkTest.java   | 25 ++++++++++++++++------
 .../recordtransformer/ComplexTypeTransformer.java  |  2 +-
 ...thubComplexTypeEvents_offline_table_config.json |  4 ++++
 .../githubComplexTypeEvents_schema.json            |  8 +++++++
 4 files changed, 32 insertions(+), 7 deletions(-)

diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java
index 00631e778b..d0b02fcc0e 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java
@@ -88,6 +88,7 @@ public class SegmentProcessorFrameworkTest {
 
   private Schema _schema;
   private Schema _schemaMV;
+  private Schema _schemaWithComplexType;
 
   private final List<Object[]> _rawData =
       Arrays.asList(new Object[]{"abc", 1000, 1597719600000L}, new 
Object[]{null, 2000, 1597773600000L},
@@ -119,18 +120,27 @@ public class SegmentProcessorFrameworkTest {
     
_tableConfigWithFixedSegmentName.getIndexingConfig().setSegmentNameGeneratorType("fixed");
 
     _schema =
-        new 
Schema.SchemaBuilder().setSchemaName("mySchema").addSingleValueDimension("campaign",
 DataType.STRING, "")
+        new Schema.SchemaBuilder().setSchemaName("mySchema")
+            .addSingleValueDimension("campaign", DataType.STRING, "")
             .addSingleValueDimension("campaign.inner1", DataType.STRING)
             .addSingleValueDimension("campaign.inner1.inner2", DataType.STRING)
             // NOTE: Intentionally put 1000 as default value to test skipping 
null values during rollup
             .addMetric("clicks", DataType.INT, 1000)
             .addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH", 
"1:MILLISECONDS").build();
     _schemaMV =
-        new 
Schema.SchemaBuilder().setSchemaName("mySchema").addMultiValueDimension("campaign",
 DataType.STRING, "")
+        new Schema.SchemaBuilder().setSchemaName("mySchema")
+            .addMultiValueDimension("campaign", DataType.STRING, "")
+            // NOTE: Intentionally put 1000 as default value to test skipping 
null values during rollup
+            .addMetric("clicks", DataType.INT, 1000)
+            .addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH", 
"1:MILLISECONDS").build();
+    _schemaWithComplexType =
+        new Schema.SchemaBuilder().setSchemaName("mySchema")
+            .addSingleValueDimension("campaign", DataType.JSON)
+            .addSingleValueDimension("campaign.inner1", DataType.STRING)
+            .addSingleValueDimension("campaign.inner1.inner2", DataType.STRING)
             // NOTE: Intentionally put 1000 as default value to test skipping 
null values during rollup
             .addMetric("clicks", DataType.INT, 1000)
             .addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH", 
"1:MILLISECONDS").build();
-
     // create segments in many folders
     _singleSegment = createInputSegments(new File(TEMP_DIR, "single_segment"), 
_rawData, 1, _schema);
     _multipleSegments = createInputSegments(new File(TEMP_DIR, 
"multiple_segments"), _rawData, 3, _schema);
@@ -258,16 +268,19 @@ public class SegmentProcessorFrameworkTest {
     _tableConfig.setIngestionConfig(ingestionConfig);
     // Default configs
     SegmentProcessorConfig config =
-        new 
SegmentProcessorConfig.Builder().setTableConfig(_tableConfig).setSchema(_schema).build();
+        new 
SegmentProcessorConfig.Builder().setTableConfig(_tableConfig).setSchema(_schemaWithComplexType).build();
     SegmentProcessorFramework framework =
         new SegmentProcessorFramework(_recordReaderWithComplexType, config, 
workingDir);
     List<File> outputSegments = framework.process();
     ImmutableSegment segment = 
ImmutableSegmentLoader.load(outputSegments.get(0), ReadMode.mmap);
     SegmentMetadata segmentMetadata = segment.getSegmentMetadata();
     // Pick the column created from complex type
-    ColumnMetadata campaignMetadata = 
segmentMetadata.getColumnMetadataFor("campaign.inner1.inner2");
+    ColumnMetadata campaignInner2Metadata = 
segmentMetadata.getColumnMetadataFor("campaign.inner1.inner2");
     // Verify we see a specific value parsed from the complexType
-    Assert.assertEquals(campaignMetadata.getMinValue().compareTo("inner2v"), 
0);
+    
Assert.assertEquals(campaignInner2Metadata.getMinValue().compareTo("inner2v"), 
0);
+    ColumnMetadata campaignMetadata = 
segmentMetadata.getColumnMetadataFor("campaign");
+    Assert.assertEquals(
+        
campaignMetadata.getMinValue().compareTo("{\"inner1\":{\"inner2\":\"inner2v\"},\"inner\":\"innerv\"}"),
 0);
   }
 
   @Test
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java
index a67bc54d46..48db4c6ad3 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java
@@ -257,7 +257,7 @@ public class ComplexTypeTransformer implements 
RecordTransformer {
     for (String column : columns) {
       Object value = record.getValue(column);
       if (value instanceof Map) {
-        Map<String, Object> map = (Map) record.removeValue(column);
+        Map<String, Object> map = (Map) value;
         List<String> mapColumns = new ArrayList<>();
         for (Map.Entry<String, Object> entry : new 
ArrayList<>(map.entrySet())) {
           String flattenName = concat(column, entry.getKey());
diff --git 
a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json
 
b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json
index d434773aab..357151aadf 100644
--- 
a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json
+++ 
b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json
@@ -16,6 +16,10 @@
       {
         "columnName": "created_at_timestamp",
         "transformFunction": "fromDateTime(created_at, 
'yyyy-MM-dd''T''HH:mm:ss''Z''')"
+      },
+      {
+        "columnName": "payload_str",
+        "transformFunction": "jsonFormat(payload)"
       }
     ],
     "complexTypeConfig": {
diff --git 
a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json
 
b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json
index 1c79af79cf..dbacd3d213 100644
--- 
a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json
+++ 
b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json
@@ -8,6 +8,14 @@
       "name": "type",
       "dataType": "STRING"
     },
+    {
+      "name": "payload",
+      "dataType": "JSON"
+    },
+    {
+      "name": "payload_str",
+      "dataType": "STRING"
+    },
     {
       "name": "payload.push_id",
       "dataType": "LONG"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to