mkuthan opened a new issue #9461: SegmentMetadata query returns hasMultipleValues=true for all STRING dims in realtime segment URL: https://github.com/apache/druid/issues/9461 SegmentMetadata query returns hasMultipleValues=true for all STRING dims in realtime segment. Segments moved to the historical nodes reports hasMultipleValues=false as expected. ### Affected Version 0.16.0 (on 0.14.0 the issue didn't happen) ### Description Datasource is defined as follows: ``` "dataSchema": { "dataSource": "dead_letter_queue", "parser": { "type": "json", "parseSpec": { "format": "json", "timestampSpec": { "format": "millis", "column": "collector_timestamp" }, "dimensionsSpec": { "dimensions": [ { "type": "string", "name": "destination", "createBitmapIndex": false }, { "type": "string", "name": "message", "createBitmapIndex": false }, { "type": "string", "name": "stack_trace", "createBitmapIndex": false } ], "dimensionExclusions": null } } }, "metricsSpec": [ { "type": "count", "name": "dlq_count" } ], "granularitySpec": { "type": "uniform", "segmentGranularity": "HOUR", "queryGranularity": "MINUTE", "rollup": true, "intervals": null }, "transformSpec": { "filter": null, "transforms": [] } }, "tuningConfig": { "type": "KafkaTuningConfig", "maxRowsInMemory": 500000, "maxBytesInMemory": 0, "maxRowsPerSegment": 5000000, "maxTotalRows": 20000000, "intermediatePersistPeriod": "PT10M", "basePersistDirectory": "/var/tmp/druid/1575281192030-0", "maxPendingPersists": 0, "indexSpec": { "bitmap": { "type": "concise" }, "dimensionCompression": "lz4", "metricCompression": "lz4", "longEncoding": "longs" }, "indexSpecForIntermediatePersists": { "bitmap": { "type": "concise" }, "dimensionCompression": "lz4", "metricCompression": "lz4", "longEncoding": "longs" }, "buildV9Directly": true, "reportParseExceptions": false, "handoffConditionTimeout": 0, "resetOffsetAutomatically": true, "segmentWriteOutMediumFactory": null, "intermediateHandoffPeriod": "P2147483647D", "logParseExceptions": false, "maxParseExceptions": 2147483647, "maxSavedParseExceptions": 0, "skipSequenceNumberAvailabilityCheck": false }, "ioConfig": { "type": "kafka", (...) } "dataSource": "dead_letter_queue" } ``` Segment metadata query for the latest hour: ``` {"queryType":"segmentMetadata","dataSource":"dead_letter_queue","analysisTypes":["aggregators"],"intervals": ["2020-03-05T12:00:00/2020-03-06"],"merge": false} ``` And the results: ``` [ { "id": "dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z", "intervals": null, "columns": { "__time": { "type": "LONG", "hasMultipleValues": false, "size": 0, "cardinality": null, "minValue": null, "maxValue": null, "errorMessage": null }, "destination": { "type": "STRING", "hasMultipleValues": false, "size": 0, "cardinality": 0, "minValue": null, "maxValue": null, "errorMessage": null }, "dlq_count": { "type": "LONG", "hasMultipleValues": false, "size": 0, "cardinality": null, "minValue": null, "maxValue": null, "errorMessage": null }, "message": { "type": "STRING", "hasMultipleValues": false, "size": 0, "cardinality": 0, "minValue": null, "maxValue": null, "errorMessage": null }, "stack_trace": { "type": "STRING", "hasMultipleValues": false, "size": 0, "cardinality": 0, "minValue": null, "maxValue": null, "errorMessage": null } }, "size": 0, "numRows": 19, "aggregators": { "dlq_count": { "type": "longSum", "name": "dlq_count", "fieldName": "dlq_count", "expression": null } }, "timestampSpec": null, "queryGranularity": null, "rollup": null }, { "id": "dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z_1", "intervals": null, "columns": { "__time": { "type": "LONG", "hasMultipleValues": false, "size": 0, "cardinality": null, "minValue": null, "maxValue": null, "errorMessage": null }, "destination": { "type": "STRING", "hasMultipleValues": true, "size": 0, "cardinality": 0, "minValue": null, "maxValue": null, "errorMessage": null }, "dlq_count": { "type": "LONG", "hasMultipleValues": false, "size": 0, "cardinality": null, "minValue": null, "maxValue": null, "errorMessage": null }, "message": { "type": "STRING", "hasMultipleValues": true, "size": 0, "cardinality": 0, "minValue": null, "maxValue": null, "errorMessage": null }, "stack_trace": { "type": "STRING", "hasMultipleValues": true, "size": 0, "cardinality": 0, "minValue": null, "maxValue": null, "errorMessage": null } }, "size": 0, "numRows": 127, "aggregators": { "dlq_count": { "type": "longSum", "name": "dlq_count", "fieldName": "dlq_count", "expression": null } }, "timestampSpec": null, "queryGranularity": null, "rollup": null } ] ``` For segment "dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z_1" all STRING dims are reported as multivalve ("hasMultipleValues": true). Segment metadata for all other segments (like "dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z") is as expected, all STRING dims are defined as "hasMultipleValues": false.
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
