mkuthan opened a new issue #9461: SegmentMetadata query returns 
hasMultipleValues=true for all STRING dims in realtime segment
URL: https://github.com/apache/druid/issues/9461
 
 
   SegmentMetadata query returns hasMultipleValues=true for all STRING dims in 
realtime segment. Segments moved to the historical nodes reports 
hasMultipleValues=false as expected.
   
   ### Affected Version
   
   0.16.0 (on 0.14.0 the issue didn't happen)
   
   ### Description
   
   Datasource is defined as follows:
   
   ```
   "dataSchema": {
       "dataSource": "dead_letter_queue",
       "parser": {
         "type": "json",
         "parseSpec": {
           "format": "json",
           "timestampSpec": {
             "format": "millis",
             "column": "collector_timestamp"
           },
           "dimensionsSpec": {
             "dimensions": [
               {
                 "type": "string",
                 "name": "destination",
                 "createBitmapIndex": false
               },
               {
                 "type": "string",
                 "name": "message",
                 "createBitmapIndex": false
               },
               {
                 "type": "string",
                 "name": "stack_trace",
                 "createBitmapIndex": false
               }
             ],
             "dimensionExclusions": null
           }
         }
       },
       "metricsSpec": [
         {
           "type": "count",
           "name": "dlq_count"
         }
       ],
       "granularitySpec": {
         "type": "uniform",
         "segmentGranularity": "HOUR",
         "queryGranularity": "MINUTE",
         "rollup": true,
         "intervals": null
       },
       "transformSpec": {
         "filter": null,
         "transforms": []
       }
     },
     "tuningConfig": {
       "type": "KafkaTuningConfig",
       "maxRowsInMemory": 500000,
       "maxBytesInMemory": 0,
       "maxRowsPerSegment": 5000000,
       "maxTotalRows": 20000000,
       "intermediatePersistPeriod": "PT10M",
       "basePersistDirectory": "/var/tmp/druid/1575281192030-0",
       "maxPendingPersists": 0,
       "indexSpec": {
         "bitmap": {
           "type": "concise"
         },
         "dimensionCompression": "lz4",
         "metricCompression": "lz4",
         "longEncoding": "longs"
       },
       "indexSpecForIntermediatePersists": {
         "bitmap": {
           "type": "concise"
         },
         "dimensionCompression": "lz4",
         "metricCompression": "lz4",
         "longEncoding": "longs"
       },
       "buildV9Directly": true,
       "reportParseExceptions": false,
       "handoffConditionTimeout": 0,
       "resetOffsetAutomatically": true,
       "segmentWriteOutMediumFactory": null,
       "intermediateHandoffPeriod": "P2147483647D",
       "logParseExceptions": false,
       "maxParseExceptions": 2147483647,
       "maxSavedParseExceptions": 0,
       "skipSequenceNumberAvailabilityCheck": false
     },
     "ioConfig": {
       "type": "kafka",
       (...)
     }
     "dataSource": "dead_letter_queue"
   }
   ```
   
   Segment metadata query for the latest hour:
   
   ```
   
{"queryType":"segmentMetadata","dataSource":"dead_letter_queue","analysisTypes":["aggregators"],"intervals":
 ["2020-03-05T12:00:00/2020-03-06"],"merge": false}
   ```
   
   And the results:
   
   ```
   [
     {
       "id": 
"dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z",
       "intervals": null,
       "columns": {
         "__time": {
           "type": "LONG",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": null,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "destination": {
           "type": "STRING",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "dlq_count": {
           "type": "LONG",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": null,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "message": {
           "type": "STRING",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "stack_trace": {
           "type": "STRING",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         }
       },
       "size": 0,
       "numRows": 19,
       "aggregators": {
         "dlq_count": {
           "type": "longSum",
           "name": "dlq_count",
           "fieldName": "dlq_count",
           "expression": null
         }
       },
       "timestampSpec": null,
       "queryGranularity": null,
       "rollup": null
     },
     {
       "id": 
"dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z_1",
       "intervals": null,
       "columns": {
         "__time": {
           "type": "LONG",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": null,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "destination": {
           "type": "STRING",
           "hasMultipleValues": true,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "dlq_count": {
           "type": "LONG",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": null,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "message": {
           "type": "STRING",
           "hasMultipleValues": true,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "stack_trace": {
           "type": "STRING",
           "hasMultipleValues": true,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         }
       },
       "size": 0,
       "numRows": 127,
       "aggregators": {
         "dlq_count": {
           "type": "longSum",
           "name": "dlq_count",
           "fieldName": "dlq_count",
           "expression": null
         }
       },
       "timestampSpec": null,
       "queryGranularity": null,
       "rollup": null
     }
   ]
   ```
   
   For segment 
"dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z_1"
 all STRING dims are reported as multivalve ("hasMultipleValues": true). 
Segment metadata for all other segments (like 
"dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z")
 is as expected, all STRING dims are defined as "hasMultipleValues": false. 
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to