ColeAtCharter commented on issue #16257:
URL: https://github.com/apache/druid/issues/16257#issuecomment-2176459570
Update: the spec below has the same features as the original spec but with
fewer columns (fewer dimensions, metrics, flattenSpec, and transforms). It
seems to work correctly. So this problem appears only with the larger spec.
Example test:
- tested against the late May release
```
$ echo "$spec" | jq '.spec.tuningConfig.indexSpec'
{
"bitmap": {
"type": "roaring"
},
"dimensionCompression": "lz4",
"stringDictionaryEncoding": {
"type": "frontCoded",
"bucketSize": 8,
"formatVersion": 1
},
"metricCompression": "none",
"longEncoding": "auto",
"jsonCompression": "lz4"
}
$ curl -k -XPOST -H content-type:application/json -H "Authorization: Basic
$pwd" 'https://router-lb:9088/druid/indexer/v1/supervisor' -d "$spec"
{"id":"druid_streaming_source"}
$ curl -XGET -H content-type:application/json -H "Authorization: Basic $pwd"
'https://router-lb:9088/druid/indexer/v1/supervisor/druid_streaming_source' |
jq '.spec.tuningConfig.indexSpec' #-d "$spec"
% Total % Received % Xferd Average Speed Time Time Time
Current
Dload Upload Total Spent Left Speed
100 7228 100 7228 0 0 19790 0 --:--:-- --:--:-- --:--:--
19802
{
"bitmap": {
"type": "roaring"
},
"dimensionCompression": "lz4",
"stringDictionaryEncoding": {
"type": "frontCoded",
"bucketSize": 8,
"formatVersion": 1
},
"metricCompression": "none",
"longEncoding": "auto",
"jsonCompression": "lz4"
}
```
```
{
"type": "kafka",
"spec": {
"dataSchema": {
"dataSource": "druid_streaming_source",
"timestampSpec": {
"column": "ts_column",
"format": "millis",
"missingValue": null
},
"dimensionsSpec": {
"dimensions": [
{
"type": "string",
"name": "concat_dimension",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
},
{
"type": "string",
"name": "primitive_column_a",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
}
],
"dimensionExclusions": [
"__time",
"ts_column",
"event_count",
"sketch_column_a",
"max_column_a"
],
"includeAllDimensions": false,
"useSchemaDiscovery": false
},
"metricsSpec": [
{
"type": "count",
"name": "event_count"
},
{
"type": "quantilesDoublesSketch",
"name": "sketch_column_a",
"fieldName": "numeric_source_column_a",
"k": 128,
"maxStreamLength": 1000000000
},
{
"type": "longMax",
"name": "max_column_a",
"fieldName": "numeric_source_column_a"
}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "HOUR",
"queryGranularity": {
"type": "duration",
"duration": 300000,
"origin": "1970-01-01T00:00:00.000Z"
},
"rollup": true,
"intervals": []
},
"transformSpec": {
"filter": {
"type": "and",
"fields": [
{
"type": "in",
"dimension": "filter_dim_a",
"values": [
"value-1",
"value-2"
]
},
{
"type": "or",
"fields": [
{
"type": "selector",
"dimension": "filter_dim_b",
"value": "value-3"
},
{
"type": "selector",
"dimension": "filter_dim_c",
"value": "value-4"
}
]
}
]
},
"transforms": [
{
"type": "expression",
"name": "concat_dimension",
"expression": "concat(\"field_a\", '_', \"field_b\")"
}
]
}
},
"ioConfig": {
"topic": "kafka_topic_a",
"topicPattern": null,
"inputFormat": {
"type": "avro_stream",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": [
{
"type": "path",
"name": "primitive_column_a",
"expr": "$.primitive_column_a",
"nodes": null
}
]
},
"avroBytesDecoder": {
"type": "schema_registry",
"url": "https://schema-registry",
"capacity": 2147483647,
"urls": null,
"config": null,
"headers": null
},
"binaryAsString": false,
"extractUnionsByType": false
},
"replicas": 1,
"taskCount": 2,
"taskDuration": "PT3600S",
"consumerProperties": {
"bootstrap.servers": "kafka-1,kafka-2,kafka-3"
},
"autoScalerConfig": null,
"pollTimeout": 100,
"startDelay": "PT5S",
"period": "PT30S",
"useEarliestOffset": false,
"completionTimeout": "PT1800S",
"lateMessageRejectionPeriod": null,
"earlyMessageRejectionPeriod": null,
"lateMessageRejectionStartDateTime": null,
"configOverrides": null,
"idleConfig": null,
"stopTaskCount": null,
"stream": "kafka_topic_a",
"useEarliestSequenceNumber": false
},
"tuningConfig": {
"type": "kafka",
"appendableIndexSpec": {
"type": "onheap",
"preserveExistingMetrics": false
},
"maxRowsInMemory": 60000,
"maxBytesInMemory": -1,
"skipBytesInMemoryOverheadCheck": false,
"maxRowsPerSegment": 2000000,
"maxTotalRows": null,
"intermediatePersistPeriod": "PT1M",
"maxPendingPersists": 0,
"indexSpec": {
"bitmap": {
"type": "roaring"
},
"dimensionCompression": "lz4",
"stringDictionaryEncoding": {
"type": "frontCoded",
"bucketSize": 8,
"formatVersion": 1
},
"metricCompression": "none",
"longEncoding": "auto",
"jsonCompression": "lz4"
},
"indexSpecForIntermediatePersists": null,
"reportParseExceptions": false,
"handoffConditionTimeout": 900000,
"resetOffsetAutomatically": false,
"segmentWriteOutMediumFactory": null,
"workerThreads": null,
"chatRetries": 8,
"httpTimeout": "PT10S",
"shutdownTimeout": "PT80S",
"offsetFetchPeriod": "PT30S",
"intermediateHandoffPeriod": "P2147483647D",
"logParseExceptions": false,
"maxParseExceptions": 2147483647,
"maxSavedParseExceptions": 0,
"numPersistThreads": 1,
"skipSequenceNumberAvailabilityCheck": false,
"repartitionTransitionDuration": "PT120S"
}
},
"context": {
"taskLockType": "APPEND",
"useSharedLock": true
},
"suspended": false
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]