This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new be9b7d6231 Update the table config in quick start (#11652)
be9b7d6231 is described below
commit be9b7d623146429d00c138c1f7fbaebf1fc0c86e
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Fri Sep 22 16:50:49 2023 -0700
Update the table config in quick start (#11652)
---
.../airlineStats_realtime_table_config.json | 67 +++++++++---------
.../airlineStats_realtime_table_config.json | 35 ---------
.../githubEvents_realtime_table_config.json | 50 ++++++-------
.../meetupRsvp_realtime_table_config.json | 47 -------------
.../meetupRsvp_realtime_table_config.json | 49 ++++++-------
...eetupRsvpComplexType_realtime_table_config.json | 14 +---
.../meetupRsvpJson_realtime_table_config.json | 37 +++++-----
...lRequestMergedEvents_realtime_table_config.json | 37 +++++-----
...MergedEvents_kinesis_realtime_table_config.json | 42 +++++------
...lRequestMergedEvents_realtime_table_config.json | 36 +++++-----
...upsertJsonMeetupRsvp_realtime_table_config.json | 70 +++++++++++-------
.../upsertMeetupRsvp_realtime_table_config.json | 73 ++++++++++---------
...ertPartialMeetupRsvp_realtime_table_config.json | 82 ++++++++++++++--------
13 files changed, 296 insertions(+), 343 deletions(-)
diff --git
a/pinot-tools/src/main/resources/examples/stream/airlineStats/airlineStats_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/airlineStats/airlineStats_realtime_table_config.json
index 4d227d6c77..9d1dcd7e14 100644
---
a/pinot-tools/src/main/resources/examples/stream/airlineStats/airlineStats_realtime_table_config.json
+++
b/pinot-tools/src/main/resources/examples/stream/airlineStats/airlineStats_realtime_table_config.json
@@ -1,44 +1,53 @@
{
"tableName": "airlineStats",
"tableType": "REALTIME",
+ "tenants": {},
"segmentsConfig": {
"timeColumnName": "DaysSinceEpoch",
- "timeType": "DAYS",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "5",
- "segmentPushType": "APPEND",
- "segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy",
- "schemaName": "airlineStats",
- "replication": "1",
- "replicasPerPartition": "1"
+ "replication": "1"
},
+ "tableIndexConfig": {},
"routing": {
"segmentPrunerTypes": [
"time"
]
},
- "tenants": {},
- "tableIndexConfig": {
- "loadMode": "MMAP",
- "streamConfigs": {
- "streamType": "kafka",
- "stream.kafka.consumer.type": "simple",
- "stream.kafka.topic.name": "flights-realtime",
- "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
- "stream.kafka.hlc.zk.connect.string": "localhost:2191/kafka",
- "stream.kafka.zk.broker.url": "localhost:2191/kafka",
- "stream.kafka.broker.list": "localhost:19092",
- "realtime.segment.flush.threshold.time": "3600000",
- "realtime.segment.flush.threshold.size": "50000",
- "stream.kafka.consumer.prop.auto.offset.reset": "smallest"
- }
+ "ingestionConfig": {
+ "streamIngestionConfig": {
+ "streamConfigMaps": [
+ {
+ "streamType": "kafka",
+ "stream.kafka.topic.name": "flights-realtime",
+ "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
+ "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
+ "stream.kafka.consumer.prop.auto.offset.reset": "smallest",
+ "stream.kafka.zk.broker.url": "localhost:2191/kafka",
+ "stream.kafka.broker.list": "localhost:19092",
+ "realtime.segment.flush.threshold.time": "3600000",
+ "realtime.segment.flush.threshold.size": "50000"
+ }
+ ]
+ },
+ "transformConfigs": [
+ {
+ "columnName": "ts",
+ "transformFunction": "fromEpochDays(DaysSinceEpoch)"
+ },
+ {
+ "columnName": "tsRaw",
+ "transformFunction": "fromEpochDays(DaysSinceEpoch)"
+ }
+ ]
},
"fieldConfigList": [
{
"name": "ts",
"encodingType": "DICTIONARY",
- "indexTypes": ["TIMESTAMP"],
+ "indexTypes": [
+ "TIMESTAMP"
+ ],
"timestampConfig": {
"granularities": [
"DAY",
@@ -50,17 +59,5 @@
],
"metadata": {
"customConfigs": {}
- },
- "ingestionConfig": {
- "transformConfigs": [
- {
- "columnName": "ts",
- "transformFunction": "fromEpochDays(DaysSinceEpoch)"
- },
- {
- "columnName": "tsRaw",
- "transformFunction": "fromEpochDays(DaysSinceEpoch)"
- }
- ]
}
}
diff --git
a/pinot-tools/src/main/resources/examples/stream/airlineStats/kafka_2.0/airlineStats_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/airlineStats/kafka_2.0/airlineStats_realtime_table_config.json
deleted file mode 100644
index cb5d690628..0000000000
---
a/pinot-tools/src/main/resources/examples/stream/airlineStats/kafka_2.0/airlineStats_realtime_table_config.json
+++ /dev/null
@@ -1,35 +0,0 @@
-{
- "tableName": "airlineStats",
- "tableType": "REALTIME",
- "segmentsConfig": {
- "timeColumnName": "DaysSinceEpoch",
- "timeType": "DAYS",
- "retentionTimeUnit": "DAYS",
- "retentionTimeValue": "5",
- "segmentPushType": "APPEND",
- "segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy",
- "schemaName": "airlineStats",
- "replication": "1",
- "replicasPerPartition": "1"
- },
- "tenants": {},
- "tableIndexConfig": {
- "loadMode": "MMAP",
- "streamConfigs": {
- "streamType": "kafka",
- "stream.kafka.consumer.type": "simple",
- "stream.kafka.topic.name": "flights-realtime",
- "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
- "stream.kafka.hlc.zk.connect.string": "localhost:2191/kafka",
- "stream.kafka.zk.broker.url": "localhost:2191/kafka",
- "stream.kafka.broker.list": "localhost:19092",
- "realtime.segment.flush.threshold.time": "3600000",
- "realtime.segment.flush.threshold.size": "50000",
- "stream.kafka.consumer.prop.auto.offset.reset": "smallest"
- }
- },
- "metadata": {
- "customConfigs": {}
- }
-}
diff --git
a/pinot-tools/src/main/resources/examples/stream/githubEvents/githubEvents_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/githubEvents/githubEvents_realtime_table_config.json
index 2204771256..563abaa742 100644
---
a/pinot-tools/src/main/resources/examples/stream/githubEvents/githubEvents_realtime_table_config.json
+++
b/pinot-tools/src/main/resources/examples/stream/githubEvents/githubEvents_realtime_table_config.json
@@ -1,17 +1,33 @@
{
"tableName": "githubEvents",
"tableType": "REALTIME",
+ "tenants": {},
"segmentsConfig": {
"timeColumnName": "created_at_timestamp",
- "segmentPushType": "APPEND",
- "segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy",
- "schemaName": "githubEvents",
- "replication": "1",
- "replicasPerPartition": "1"
+ "replication": "1"
},
- "tenants": {
+ "tableIndexConfig": {
+ "invertedIndexColumns": [
+ "id"
+ ],
+ "segmentNameGeneratorType": "normalizedDate"
},
"ingestionConfig": {
+ "streamIngestionConfig": {
+ "streamConfigMaps": [
+ {
+ "streamType": "kafka",
+ "stream.kafka.topic.name": "githubEvents",
+ "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
+ "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
+ "stream.kafka.consumer.prop.auto.offset.reset": "smallest",
+ "stream.kafka.zk.broker.url": "localhost:2191/kafka",
+ "stream.kafka.broker.list": "localhost:19092",
+ "realtime.segment.flush.threshold.time": "1h",
+ "realtime.segment.flush.threshold.size": "100"
+ }
+ ]
+ },
"transformConfigs": [
{
"columnName": "created_at_timestamp",
@@ -19,27 +35,7 @@
}
]
},
- "tableIndexConfig": {
- "loadMode": "MMAP",
- "invertedIndexColumns": [
- "id"
- ],
- "segmentNameGeneratorType": "normalizedDate",
- "streamConfigs": {
- "streamType": "kafka",
- "stream.kafka.consumer.type": "simple",
- "stream.kafka.topic.name": "githubEvents",
- "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
- "stream.kafka.zk.broker.url": "localhost:2191/kafka",
- "stream.kafka.broker.list": "localhost:19092",
- "realtime.segment.flush.threshold.time": "1h",
- "realtime.segment.flush.threshold.size": "100",
- "stream.kafka.consumer.prop.auto.offset.reset": "smallest"
- }
- },
"metadata": {
- "customConfigs": {
- }
+ "customConfigs": {}
}
}
diff --git
a/pinot-tools/src/main/resources/examples/stream/meetupRsvp/kafka_2.0/meetupRsvp_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/meetupRsvp/kafka_2.0/meetupRsvp_realtime_table_config.json
deleted file mode 100644
index 41379c2569..0000000000
---
a/pinot-tools/src/main/resources/examples/stream/meetupRsvp/kafka_2.0/meetupRsvp_realtime_table_config.json
+++ /dev/null
@@ -1,47 +0,0 @@
-{
- "tableName": "meetupRsvp",
- "tableType": "REALTIME",
- "segmentsConfig": {
- "timeColumnName": "mtime",
- "timeType": "MILLISECONDS",
- "segmentPushType": "APPEND",
- "segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy",
- "schemaName": "meetupRsvp",
- "replication": "1",
- "replicasPerPartition": "1",
- "retentionTimeUnit": "DAYS",
- "retentionTimeValue": "1"
- },
- "tenants": {},
- "fieldConfigList": [
- {
- "name": "mtime",
- "encodingType": "DICTIONARY",
- "indexTypes": ["TIMESTAMP"],
- "timestampConfig": {
- "granularities": [
- "DAY",
- "WEEK",
- "MONTH"
- ]
- }
- }
- ],
- "tableIndexConfig": {
- "loadMode": "MMAP",
- "streamConfigs": {
- "streamType": "kafka",
- "stream.kafka.consumer.type": "lowLevel",
- "stream.kafka.topic.name": "meetupRSVPEvents",
- "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
- "stream.kafka.broker.list": "localhost:19092",
- "stream.kafka.consumer.prop.auto.offset.reset": "largest",
- "realtime.segment.flush.threshold.time": "12h",
- "realtime.segment.flush.threshold.size": "100M"
- }
- },
- "metadata": {
- "customConfigs": {}
- }
-}
diff --git
a/pinot-tools/src/main/resources/examples/stream/meetupRsvp/meetupRsvp_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/meetupRsvp/meetupRsvp_realtime_table_config.json
index 0a85aaf577..8cdbcf7bf3 100644
---
a/pinot-tools/src/main/resources/examples/stream/meetupRsvp/meetupRsvp_realtime_table_config.json
+++
b/pinot-tools/src/main/resources/examples/stream/meetupRsvp/meetupRsvp_realtime_table_config.json
@@ -1,23 +1,39 @@
{
"tableName": "meetupRsvp",
"tableType": "REALTIME",
+ "tenants": {},
"segmentsConfig": {
"timeColumnName": "mtime",
- "timeType": "MILLISECONDS",
- "segmentPushType": "APPEND",
- "segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy",
- "schemaName": "meetupRsvp",
- "replication": "1",
- "replicasPerPartition": "1",
"retentionTimeUnit": "DAYS",
- "retentionTimeValue": "1"
+ "retentionTimeValue": "1",
+ "replication": "1"
+ },
+ "tableIndexConfig": {},
+ "ingestionConfig": {
+ "streamIngestionConfig": {
+ "streamConfigMaps": [
+ {
+ "streamType": "kafka",
+ "stream.kafka.topic.name": "meetupRSVPEvents",
+ "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
+ "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
+ "stream.kafka.consumer.prop.auto.offset.reset": "largest",
+ "stream.kafka.zk.broker.url": "localhost:2191/kafka",
+ "stream.kafka.broker.list": "localhost:19092",
+ "stream.kafka.metadata.populate": "true",
+ "realtime.segment.flush.threshold.time": "12h",
+ "realtime.segment.flush.threshold.size": "10K"
+ }
+ ]
+ }
},
- "tenants": {},
"fieldConfigList": [
{
"name": "mtime",
"encodingType": "DICTIONARY",
- "indexTypes": ["TIMESTAMP"],
+ "indexTypes": [
+ "TIMESTAMP"
+ ],
"timestampConfig": {
"granularities": [
"DAY",
@@ -27,21 +43,6 @@
}
}
],
- "tableIndexConfig": {
- "loadMode": "MMAP",
- "streamConfigs": {
- "streamType": "kafka",
- "stream.kafka.consumer.type": "lowLevel",
- "stream.kafka.topic.name": "meetupRSVPEvents",
- "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
- "stream.kafka.broker.list": "localhost:19092",
- "stream.kafka.consumer.prop.auto.offset.reset": "largest",
- "realtime.segment.flush.threshold.time": "12h",
- "realtime.segment.flush.threshold.size": "10K",
- "stream.kafka.metadata.populate": "true"
- }
- },
"metadata": {
"customConfigs": {}
}
diff --git
a/pinot-tools/src/main/resources/examples/stream/meetupRsvpComplexType/meetupRsvpComplexType_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/meetupRsvpComplexType/meetupRsvpComplexType_realtime_table_config.json
index 43662de8fd..16e25ac700 100644
---
a/pinot-tools/src/main/resources/examples/stream/meetupRsvpComplexType/meetupRsvpComplexType_realtime_table_config.json
+++
b/pinot-tools/src/main/resources/examples/stream/meetupRsvpComplexType/meetupRsvpComplexType_realtime_table_config.json
@@ -4,38 +4,30 @@
"tenants": {},
"segmentsConfig": {
"timeColumnName": "mtime",
- "timeType": "MILLISECONDS",
- "segmentPushType": "APPEND",
- "replicasPerPartition": "1",
"retentionTimeUnit": "DAYS",
- "retentionTimeValue": "1"
+ "retentionTimeValue": "1",
+ "replication": "1"
},
+ "tableIndexConfig": {},
"ingestionConfig": {
"streamIngestionConfig": {
"streamConfigMaps": [
{
"streamType": "kafka",
- "stream.kafka.consumer.type": "lowLevel",
"stream.kafka.topic.name": "meetupRSVPComplexTypeEvents",
"stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.hlc.zk.connect.string": "localhost:2191/kafka",
"stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
"stream.kafka.zk.broker.url": "localhost:2191/kafka",
"stream.kafka.broker.list": "localhost:19092"
}
]
},
- "transformConfigs": [
- ],
"complexTypeConfig": {
"fieldsToUnnest": [
"group.group_topics"
]
}
},
- "tableIndexConfig": {
- "loadMode": "MMAP"
- },
"metadata": {
"customConfigs": {}
}
diff --git
a/pinot-tools/src/main/resources/examples/stream/meetupRsvpJson/meetupRsvpJson_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/meetupRsvpJson/meetupRsvpJson_realtime_table_config.json
index 6ab8dd4f36..56fffee5ae 100644
---
a/pinot-tools/src/main/resources/examples/stream/meetupRsvpJson/meetupRsvpJson_realtime_table_config.json
+++
b/pinot-tools/src/main/resources/examples/stream/meetupRsvpJson/meetupRsvpJson_realtime_table_config.json
@@ -4,21 +4,31 @@
"tenants": {},
"segmentsConfig": {
"timeColumnName": "mtime",
- "timeType": "MILLISECONDS",
- "segmentPushType": "APPEND",
- "replicasPerPartition": "1",
"retentionTimeUnit": "DAYS",
- "retentionTimeValue": "1"
+ "retentionTimeValue": "1",
+ "replication": "1"
+ },
+ "tableIndexConfig": {
+ "noDictionaryColumns": [
+ "event_json",
+ "group_json",
+ "member_json",
+ "venue_json"
+ ],
+ "jsonIndexColumns": [
+ "event_json",
+ "group_json",
+ "member_json",
+ "venue_json"
+ ]
},
"ingestionConfig": {
"streamIngestionConfig": {
"streamConfigMaps": [
{
"streamType": "kafka",
- "stream.kafka.consumer.type": "lowLevel",
"stream.kafka.topic.name": "meetupRSVPJsonEvents",
"stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.hlc.zk.connect.string": "localhost:2191/kafka",
"stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
"stream.kafka.zk.broker.url": "localhost:2191/kafka",
"stream.kafka.broker.list": "localhost:19092"
@@ -44,21 +54,6 @@
}
]
},
- "tableIndexConfig": {
- "loadMode": "MMAP",
- "noDictionaryColumns": [
- "event_json",
- "group_json",
- "member_json",
- "venue_json"
- ],
- "jsonIndexColumns": [
- "event_json",
- "group_json",
- "member_json",
- "venue_json"
- ]
- },
"metadata": {
"customConfigs": {}
}
diff --git
a/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/docker/pullRequestMergedEvents_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/docker/pullRequestMergedEvents_realtime_table_config.json
index 8c6ddea3e1..3ba5a54a7b 100644
---
a/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/docker/pullRequestMergedEvents_realtime_table_config.json
+++
b/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/docker/pullRequestMergedEvents_realtime_table_config.json
@@ -1,33 +1,34 @@
{
"tableName": "pullRequestMergedEvents",
"tableType": "REALTIME",
+ "tenants": {},
"segmentsConfig": {
"timeColumnName": "mergedTimeMillis",
- "timeType": "MILLISECONDS",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "60",
- "schemaName": "pullRequestMergedEvents",
- "replication": "1",
- "replicasPerPartition": "1"
+ "replication": "1"
},
- "tenants": {},
"tableIndexConfig": {
- "loadMode": "MMAP",
"invertedIndexColumns": [
"organization",
"repo"
- ],
- "streamConfigs": {
- "streamType": "kafka",
- "stream.kafka.consumer.type": "simple",
- "stream.kafka.topic.name": "pullRequestMergedEvents",
- "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
- "stream.kafka.zk.broker.url": "pinot-zookeeper:2181/kafka",
- "stream.kafka.broker.list": "kafka:9092",
- "realtime.segment.flush.threshold.time": "12h",
- "realtime.segment.flush.threshold.size": "100000",
- "stream.kafka.consumer.prop.auto.offset.reset": "smallest"
+ ]
+ },
+ "ingestionConfig": {
+ "streamIngestionConfig": {
+ "streamConfigMaps": [
+ {
+ "streamType": "kafka",
+ "stream.kafka.topic.name": "pullRequestMergedEvents",
+ "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
+ "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
+ "stream.kafka.consumer.prop.auto.offset.reset": "smallest",
+ "stream.kafka.zk.broker.url": "pinot-zookeeper:2181/kafka",
+ "stream.kafka.broker.list": "kafka:9092",
+ "realtime.segment.flush.threshold.time": "12h",
+ "realtime.segment.flush.threshold.size": "100000"
+ }
+ ]
}
},
"metadata": {
diff --git
a/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/pullRequestMergedEvents_kinesis_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/pullRequestMergedEvents_kinesis_realtime_table_config.json
index 7cdd27bdbc..a99ee4ef98 100644
---
a/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/pullRequestMergedEvents_kinesis_realtime_table_config.json
+++
b/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/pullRequestMergedEvents_kinesis_realtime_table_config.json
@@ -1,35 +1,37 @@
{
"tableName": "pullRequestMergedEvents",
"tableType": "REALTIME",
+ "tenants": {},
"segmentsConfig": {
"timeColumnName": "mergedTimeMillis",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "60",
- "schemaName": "pullRequestMergedEvents",
- "replication": "1",
- "replicasPerPartition": "1"
+ "replication": "1"
},
- "tenants": {},
"tableIndexConfig": {
- "loadMode": "MMAP",
"invertedIndexColumns": [
"organization",
"repo"
- ],
- "streamConfigs": {
- "streamType": "kinesis",
- "stream.kinesis.consumer.type": "lowlevel",
- "stream.kinesis.topic.name": "pullRequestMergedEvents",
- "stream.kinesis.decoder.class.name":
"org.apache.pinot.plugin.inputformat.json.JSONMessageDecoder",
- "stream.kinesis.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kinesis.KinesisConsumerFactory",
- "realtime.segment.flush.threshold.time": "12h",
- "realtime.segment.flush.threshold.size": "100000",
- "stream.kinesis.consumer.prop.auto.offset.reset": "smallest",
- "region": "us-east-1",
- "shardIteratorType": "TRIM_HORIZON",
- "endpoint" : "http://localhost:4566",
- "accessKey" : "access",
- "secretKey": "secret"
+ ]
+ },
+ "ingestionConfig": {
+ "streamIngestionConfig": {
+ "streamConfigMaps": [
+ {
+ "streamType": "kinesis",
+ "stream.kinesis.topic.name": "pullRequestMergedEvents",
+ "stream.kinesis.decoder.class.name":
"org.apache.pinot.plugin.inputformat.json.JSONMessageDecoder",
+ "stream.kinesis.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kinesis.KinesisConsumerFactory",
+ "stream.kinesis.consumer.prop.auto.offset.reset": "smallest",
+ "region": "us-east-1",
+ "shardIteratorType": "TRIM_HORIZON",
+ "endpoint": "http://localhost:4566",
+ "accessKey": "access",
+ "secretKey": "secret",
+ "realtime.segment.flush.threshold.time": "12h",
+ "realtime.segment.flush.threshold.size": "100000"
+ }
+ ]
}
},
"metadata": {
diff --git
a/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/pullRequestMergedEvents_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/pullRequestMergedEvents_realtime_table_config.json
index f1cd5067df..9ba9b86d8c 100644
---
a/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/pullRequestMergedEvents_realtime_table_config.json
+++
b/pinot-tools/src/main/resources/examples/stream/pullRequestMergedEvents/pullRequestMergedEvents_realtime_table_config.json
@@ -1,32 +1,34 @@
{
"tableName": "pullRequestMergedEvents",
"tableType": "REALTIME",
+ "tenants": {},
"segmentsConfig": {
"timeColumnName": "mergedTimeMillis",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "60",
- "schemaName": "pullRequestMergedEvents",
- "replication": "1",
- "replicasPerPartition": "1"
+ "replication": "1"
},
- "tenants": {},
"tableIndexConfig": {
- "loadMode": "MMAP",
"invertedIndexColumns": [
"organization",
"repo"
- ],
- "streamConfigs": {
- "streamType": "kafka",
- "stream.kafka.consumer.type": "simple",
- "stream.kafka.topic.name": "pullRequestMergedEvents",
- "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
- "stream.kafka.zk.broker.url": "localhost:2191/kafka",
- "stream.kafka.broker.list": "localhost:19092",
- "realtime.segment.flush.threshold.time": "12h",
- "realtime.segment.flush.threshold.size": "100000",
- "stream.kafka.consumer.prop.auto.offset.reset": "smallest"
+ ]
+ },
+ "ingestionConfig": {
+ "streamIngestionConfig": {
+ "streamConfigMaps": [
+ {
+ "streamType": "kafka",
+ "stream.kafka.topic.name": "pullRequestMergedEvents",
+ "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
+ "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
+ "stream.kafka.consumer.prop.auto.offset.reset": "smallest",
+ "stream.kafka.zk.broker.url": "localhost:2191/kafka",
+ "stream.kafka.broker.list": "localhost:19092",
+ "realtime.segment.flush.threshold.time": "12h",
+ "realtime.segment.flush.threshold.size": "100000"
+ }
+ ]
}
},
"metadata": {
diff --git
a/pinot-tools/src/main/resources/examples/stream/upsertJsonMeetupRsvp/upsertJsonMeetupRsvp_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/upsertJsonMeetupRsvp/upsertJsonMeetupRsvp_realtime_table_config.json
index baa77af751..886a071dd2 100644
---
a/pinot-tools/src/main/resources/examples/stream/upsertJsonMeetupRsvp/upsertJsonMeetupRsvp_realtime_table_config.json
+++
b/pinot-tools/src/main/resources/examples/stream/upsertJsonMeetupRsvp/upsertJsonMeetupRsvp_realtime_table_config.json
@@ -4,21 +4,57 @@
"tenants": {},
"segmentsConfig": {
"timeColumnName": "mtime",
- "timeType": "MILLISECONDS",
- "segmentPushType": "APPEND",
- "replicasPerPartition": "1",
"retentionTimeUnit": "DAYS",
- "retentionTimeValue": "1"
+ "retentionTimeValue": "1",
+ "replication": "1"
+ },
+ "tableIndexConfig": {
+ "noDictionaryColumns": [
+ "event_json",
+ "group_json",
+ "member_json",
+ "venue_json"
+ ],
+ "jsonIndexColumns": [
+ "event_json",
+ "group_json",
+ "member_json",
+ "venue_json"
+ ],
+ "columnPartitionMap": {
+ "rsvp_id": {
+ "functionName": "Hashcode",
+ "numPartitions": 2
+ }
+ }
+ },
+ "instanceAssignmentConfigMap": {
+ "CONSUMING": {
+ "tagPoolConfig": {
+ "tag": "DefaultTenant_REALTIME"
+ },
+ "replicaGroupPartitionConfig": {
+ "replicaGroupBased": true,
+ "numReplicaGroups": 1,
+ "partitionColumn": "rsvp_id",
+ "numPartitions": 2,
+ "numInstancesPerPartition": 1
+ }
+ }
+ },
+ "routing": {
+ "segmentPrunerTypes": [
+ "partition"
+ ],
+ "instanceSelectorType": "strictReplicaGroup"
},
"ingestionConfig": {
"streamIngestionConfig": {
"streamConfigMaps": [
{
"streamType": "kafka",
- "stream.kafka.consumer.type": "lowLevel",
"stream.kafka.topic.name": "upsertJsonMeetupRSVPEvents",
"stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.hlc.zk.connect.string": "localhost:2191/kafka",
"stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
"stream.kafka.zk.broker.url": "localhost:2191/kafka",
"stream.kafka.broker.list": "localhost:19092"
@@ -44,20 +80,8 @@
}
]
},
- "tableIndexConfig": {
- "loadMode": "MMAP",
- "noDictionaryColumns": [
- "event_json",
- "group_json",
- "member_json",
- "venue_json"
- ],
- "jsonIndexColumns": [
- "event_json",
- "group_json",
- "member_json",
- "venue_json"
- ]
+ "upsertConfig": {
+ "mode": "FULL"
},
"fieldConfigList": [
{
@@ -89,12 +113,6 @@
]
}
],
- "routing": {
- "instanceSelectorType": "strictReplicaGroup"
- },
- "upsertConfig": {
- "mode": "FULL"
- },
"metadata": {
"customConfigs": {}
}
diff --git
a/pinot-tools/src/main/resources/examples/stream/upsertMeetupRsvp/upsertMeetupRsvp_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/upsertMeetupRsvp/upsertMeetupRsvp_realtime_table_config.json
index d7fa20d8fc..6d6c95379b 100644
---
a/pinot-tools/src/main/resources/examples/stream/upsertMeetupRsvp/upsertMeetupRsvp_realtime_table_config.json
+++
b/pinot-tools/src/main/resources/examples/stream/upsertMeetupRsvp/upsertMeetupRsvp_realtime_table_config.json
@@ -1,35 +1,14 @@
{
"tableName": "upsertMeetupRsvp",
"tableType": "REALTIME",
+ "tenants": {},
"segmentsConfig": {
"timeColumnName": "mtime",
- "timeType": "MILLISECONDS",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "1",
- "segmentPushType": "APPEND",
- "segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy",
- "schemaName": "upsertMeetupRsvp",
- "replicasPerPartition": "1",
- "replicaGroupStrategyConfig": {
- "partitionColumn": "event_id",
- "numInstancesPerPartition": 1
- }
+ "replication": "1"
},
- "tenants": {},
"tableIndexConfig": {
- "loadMode": "MMAP",
- "streamConfigs": {
- "streamType": "kafka",
- "stream.kafka.consumer.type": "lowLevel",
- "stream.kafka.topic.name": "upsertMeetupRSVPEvents",
- "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.hlc.zk.connect.string": "localhost:2191/kafka",
- "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
- "stream.kafka.zk.broker.url": "localhost:2191/kafka",
- "stream.kafka.broker.list": "localhost:19092",
- "realtime.segment.flush.threshold.size": 30,
- "realtime.segment.flush.threshold.rows": 30
- },
"segmentPartitionConfig": {
"columnPartitionMap": {
"event_id": {
@@ -39,6 +18,45 @@
}
}
},
+ "instanceAssignmentConfigMap": {
+ "CONSUMING": {
+ "tagPoolConfig": {
+ "tag": "DefaultTenant_REALTIME"
+ },
+ "replicaGroupPartitionConfig": {
+ "replicaGroupBased": true,
+ "numReplicaGroups": 1,
+ "partitionColumn": "event_id",
+ "numPartitions": 2,
+ "numInstancesPerPartition": 1
+ }
+ }
+ },
+ "routing": {
+ "segmentPrunerTypes": [
+ "partition"
+ ],
+ "instanceSelectorType": "strictReplicaGroup"
+ },
+ "ingestionConfig": {
+ "streamIngestionConfig": {
+ "streamConfigMaps": [
+ {
+ "streamType": "kafka",
+ "stream.kafka.topic.name": "upsertMeetupRSVPEvents",
+ "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
+ "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
+ "stream.kafka.zk.broker.url": "localhost:2191/kafka",
+ "stream.kafka.broker.list": "localhost:19092"
+ }
+ ]
+ }
+ },
+ "upsertConfig": {
+ "mode": "FULL",
+ "enableSnapshot": true,
+ "enablePreload": true
+ },
"fieldConfigList": [
{
"name": "location",
@@ -51,14 +69,5 @@
],
"metadata": {
"customConfigs": {}
- },
- "routing": {
- "segmentPrunerTypes": ["partition"],
- "instanceSelectorType": "strictReplicaGroup"
- },
- "upsertConfig": {
- "mode": "FULL",
- "enableSnapshot": "true",
- "enablePreload": "true"
}
}
diff --git
a/pinot-tools/src/main/resources/examples/stream/upsertPartialMeetupRsvp/upsertPartialMeetupRsvp_realtime_table_config.json
b/pinot-tools/src/main/resources/examples/stream/upsertPartialMeetupRsvp/upsertPartialMeetupRsvp_realtime_table_config.json
index e712f5f13d..80ce012e55 100644
---
a/pinot-tools/src/main/resources/examples/stream/upsertPartialMeetupRsvp/upsertPartialMeetupRsvp_realtime_table_config.json
+++
b/pinot-tools/src/main/resources/examples/stream/upsertPartialMeetupRsvp/upsertPartialMeetupRsvp_realtime_table_config.json
@@ -1,49 +1,58 @@
{
"tableName": "upsertPartialMeetupRsvp",
"tableType": "REALTIME",
+ "tenants": {},
"segmentsConfig": {
"timeColumnName": "mtime",
- "timeType": "MILLISECONDS",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "1",
- "segmentPushType": "APPEND",
- "segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy",
- "schemaName": "upsertPartialMeetupRsvp",
- "replicasPerPartition": "1"
+ "replication": "1"
},
- "tenants": {},
"tableIndexConfig": {
- "loadMode": "MMAP",
- "nullHandlingEnabled": true,
- "streamConfigs": {
- "streamType": "kafka",
- "stream.kafka.consumer.type": "lowLevel",
- "stream.kafka.topic.name": "upsertPartialMeetupRSVPEvents",
- "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
- "stream.kafka.hlc.zk.connect.string": "localhost:2191/kafka",
- "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
- "stream.kafka.zk.broker.url": "localhost:2191/kafka",
- "stream.kafka.broker.list": "localhost:19092",
- "realtime.segment.flush.threshold.size": 30,
- "realtime.segment.flush.threshold.rows": 30
- }
+ "segmentPartitionConfig": {
+ "columnPartitionMap": {
+ "event_id": {
+ "functionName": "Hashcode",
+ "numPartitions": 2
+ }
+ }
+ },
+ "nullHandlingEnabled": true
},
- "fieldConfigList": [
- {
- "name": "location",
- "encodingType": "RAW",
- "indexType": "H3",
- "properties": {
- "resolutions": "5"
+ "instanceAssignmentConfigMap": {
+ "CONSUMING": {
+ "tagPoolConfig": {
+ "tag": "DefaultTenant_REALTIME"
+ },
+ "replicaGroupPartitionConfig": {
+ "replicaGroupBased": true,
+ "numReplicaGroups": 1,
+ "partitionColumn": "event_id",
+ "numPartitions": 2,
+ "numInstancesPerPartition": 1
}
}
- ],
- "metadata": {
- "customConfigs": {}
},
"routing": {
+ "segmentPrunerTypes": [
+ "partition"
+ ],
"instanceSelectorType": "strictReplicaGroup"
},
+ "ingestionConfig": {
+ "streamIngestionConfig": {
+ "streamConfigMaps": [
+ {
+ "streamType": "kafka",
+ "stream.kafka.topic.name": "upsertPartialMeetupRSVPEvents",
+ "stream.kafka.decoder.class.name":
"org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
+ "stream.kafka.consumer.factory.class.name":
"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
+ "stream.kafka.zk.broker.url": "localhost:2191/kafka",
+ "stream.kafka.broker.list": "localhost:19092"
+ }
+ ]
+ }
+ },
"upsertConfig": {
"mode": "PARTIAL",
"partialUpsertStrategies": {
@@ -51,5 +60,18 @@
"group_name": "UNION",
"venue_name": "APPEND"
}
+ },
+ "fieldConfigList": [
+ {
+ "name": "location",
+ "encodingType": "RAW",
+ "indexType": "H3",
+ "properties": {
+ "resolutions": "5"
+ }
+ }
+ ],
+ "metadata": {
+ "customConfigs": {}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]