This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new be4f740ee5 Fix the issue that map flatten shouldn't remove the map
field from the record (#13243)
be4f740ee5 is described below
commit be4f740ee57a9798ecb3aeb9ad23541b89d0111d
Author: Xiang Fu <[email protected]>
AuthorDate: Tue May 28 15:55:56 2024 -0700
Fix the issue that map flatten shouldn't remove the map field from the
record (#13243)
---
.../framework/SegmentProcessorFrameworkTest.java | 25 ++++++++++++++++------
.../recordtransformer/ComplexTypeTransformer.java | 2 +-
...thubComplexTypeEvents_offline_table_config.json | 4 ++++
.../githubComplexTypeEvents_schema.json | 8 +++++++
4 files changed, 32 insertions(+), 7 deletions(-)
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java
index 00631e778b..d0b02fcc0e 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java
@@ -88,6 +88,7 @@ public class SegmentProcessorFrameworkTest {
private Schema _schema;
private Schema _schemaMV;
+ private Schema _schemaWithComplexType;
private final List<Object[]> _rawData =
Arrays.asList(new Object[]{"abc", 1000, 1597719600000L}, new
Object[]{null, 2000, 1597773600000L},
@@ -119,18 +120,27 @@ public class SegmentProcessorFrameworkTest {
_tableConfigWithFixedSegmentName.getIndexingConfig().setSegmentNameGeneratorType("fixed");
_schema =
- new
Schema.SchemaBuilder().setSchemaName("mySchema").addSingleValueDimension("campaign",
DataType.STRING, "")
+ new Schema.SchemaBuilder().setSchemaName("mySchema")
+ .addSingleValueDimension("campaign", DataType.STRING, "")
.addSingleValueDimension("campaign.inner1", DataType.STRING)
.addSingleValueDimension("campaign.inner1.inner2", DataType.STRING)
// NOTE: Intentionally put 1000 as default value to test skipping
null values during rollup
.addMetric("clicks", DataType.INT, 1000)
.addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH",
"1:MILLISECONDS").build();
_schemaMV =
- new
Schema.SchemaBuilder().setSchemaName("mySchema").addMultiValueDimension("campaign",
DataType.STRING, "")
+ new Schema.SchemaBuilder().setSchemaName("mySchema")
+ .addMultiValueDimension("campaign", DataType.STRING, "")
+ // NOTE: Intentionally put 1000 as default value to test skipping
null values during rollup
+ .addMetric("clicks", DataType.INT, 1000)
+ .addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH",
"1:MILLISECONDS").build();
+ _schemaWithComplexType =
+ new Schema.SchemaBuilder().setSchemaName("mySchema")
+ .addSingleValueDimension("campaign", DataType.JSON)
+ .addSingleValueDimension("campaign.inner1", DataType.STRING)
+ .addSingleValueDimension("campaign.inner1.inner2", DataType.STRING)
// NOTE: Intentionally put 1000 as default value to test skipping
null values during rollup
.addMetric("clicks", DataType.INT, 1000)
.addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH",
"1:MILLISECONDS").build();
-
// create segments in many folders
_singleSegment = createInputSegments(new File(TEMP_DIR, "single_segment"),
_rawData, 1, _schema);
_multipleSegments = createInputSegments(new File(TEMP_DIR,
"multiple_segments"), _rawData, 3, _schema);
@@ -258,16 +268,19 @@ public class SegmentProcessorFrameworkTest {
_tableConfig.setIngestionConfig(ingestionConfig);
// Default configs
SegmentProcessorConfig config =
- new
SegmentProcessorConfig.Builder().setTableConfig(_tableConfig).setSchema(_schema).build();
+ new
SegmentProcessorConfig.Builder().setTableConfig(_tableConfig).setSchema(_schemaWithComplexType).build();
SegmentProcessorFramework framework =
new SegmentProcessorFramework(_recordReaderWithComplexType, config,
workingDir);
List<File> outputSegments = framework.process();
ImmutableSegment segment =
ImmutableSegmentLoader.load(outputSegments.get(0), ReadMode.mmap);
SegmentMetadata segmentMetadata = segment.getSegmentMetadata();
// Pick the column created from complex type
- ColumnMetadata campaignMetadata =
segmentMetadata.getColumnMetadataFor("campaign.inner1.inner2");
+ ColumnMetadata campaignInner2Metadata =
segmentMetadata.getColumnMetadataFor("campaign.inner1.inner2");
// Verify we see a specific value parsed from the complexType
- Assert.assertEquals(campaignMetadata.getMinValue().compareTo("inner2v"),
0);
+
Assert.assertEquals(campaignInner2Metadata.getMinValue().compareTo("inner2v"),
0);
+ ColumnMetadata campaignMetadata =
segmentMetadata.getColumnMetadataFor("campaign");
+ Assert.assertEquals(
+
campaignMetadata.getMinValue().compareTo("{\"inner1\":{\"inner2\":\"inner2v\"},\"inner\":\"innerv\"}"),
0);
}
@Test
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java
index a67bc54d46..48db4c6ad3 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/ComplexTypeTransformer.java
@@ -257,7 +257,7 @@ public class ComplexTypeTransformer implements
RecordTransformer {
for (String column : columns) {
Object value = record.getValue(column);
if (value instanceof Map) {
- Map<String, Object> map = (Map) record.removeValue(column);
+ Map<String, Object> map = (Map) value;
List<String> mapColumns = new ArrayList<>();
for (Map.Entry<String, Object> entry : new
ArrayList<>(map.entrySet())) {
String flattenName = concat(column, entry.getKey());
diff --git
a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json
b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json
index d434773aab..357151aadf 100644
---
a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json
+++
b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_offline_table_config.json
@@ -16,6 +16,10 @@
{
"columnName": "created_at_timestamp",
"transformFunction": "fromDateTime(created_at,
'yyyy-MM-dd''T''HH:mm:ss''Z''')"
+ },
+ {
+ "columnName": "payload_str",
+ "transformFunction": "jsonFormat(payload)"
}
],
"complexTypeConfig": {
diff --git
a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json
b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json
index 1c79af79cf..dbacd3d213 100644
---
a/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json
+++
b/pinot-tools/src/main/resources/examples/batch/githubComplexTypeEvents/githubComplexTypeEvents_schema.json
@@ -8,6 +8,14 @@
"name": "type",
"dataType": "STRING"
},
+ {
+ "name": "payload",
+ "dataType": "JSON"
+ },
+ {
+ "name": "payload_str",
+ "dataType": "STRING"
+ },
{
"name": "payload.push_id",
"dataType": "LONG"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]