ColeAtCharter commented on issue #16257:
URL: https://github.com/apache/druid/issues/16257#issuecomment-2176459570

   Update: the spec below has the same features as the original spec but with 
fewer columns (fewer dimensions, metrics, flattenSpec, and transforms).  It 
seems to work correctly.  So this problem appears only with the larger spec.
   
   Example test:
   - tested against the late May release
   
   ```
   $ echo "$spec" | jq  '.spec.tuningConfig.indexSpec'
   {
     "bitmap": {
       "type": "roaring"
     },
     "dimensionCompression": "lz4",
     "stringDictionaryEncoding": {
       "type": "frontCoded",
       "bucketSize": 8,
       "formatVersion": 1
     },
     "metricCompression": "none",
     "longEncoding": "auto",
     "jsonCompression": "lz4"
   }
   
   
   $ curl -k -XPOST -H content-type:application/json -H "Authorization: Basic 
$pwd" 'https://router-lb:9088/druid/indexer/v1/supervisor' -d "$spec"
   {"id":"druid_streaming_source"}
   
   $ curl -XGET -H content-type:application/json -H "Authorization: Basic $pwd" 
'https://router-lb:9088/druid/indexer/v1/supervisor/druid_streaming_source' | 
jq '.spec.tuningConfig.indexSpec' #-d "$spec"
     % Total    % Received % Xferd  Average Speed   Time    Time     Time  
Current
                                    Dload  Upload   Total   Spent    Left  Speed
   100  7228  100  7228    0     0  19790      0 --:--:-- --:--:-- --:--:-- 
19802
   {
     "bitmap": {
       "type": "roaring"
     },
     "dimensionCompression": "lz4",
     "stringDictionaryEncoding": {
       "type": "frontCoded",
       "bucketSize": 8,
       "formatVersion": 1
     },
     "metricCompression": "none",
     "longEncoding": "auto",
     "jsonCompression": "lz4"
   }
   ```
   
   
   ```
   {
     "type": "kafka",
     "spec": {
       "dataSchema": {
         "dataSource": "druid_streaming_source",
         "timestampSpec": {
           "column": "ts_column",
           "format": "millis",
           "missingValue": null
         },
         "dimensionsSpec": {
           "dimensions": [
             {
               "type": "string",
               "name": "concat_dimension",
               "multiValueHandling": "SORTED_ARRAY",
               "createBitmapIndex": true
             },
             {
               "type": "string",
               "name": "primitive_column_a",
               "multiValueHandling": "SORTED_ARRAY",
               "createBitmapIndex": true
             }
           ],
           "dimensionExclusions": [
             "__time",
             "ts_column",
             "event_count",
             "sketch_column_a",
             "max_column_a"
           ],
           "includeAllDimensions": false,
           "useSchemaDiscovery": false
         },
         "metricsSpec": [
           {
             "type": "count",
             "name": "event_count"
           },
           {
             "type": "quantilesDoublesSketch",
             "name": "sketch_column_a",
             "fieldName": "numeric_source_column_a",
             "k": 128,
             "maxStreamLength": 1000000000
           },
           {
             "type": "longMax",
             "name": "max_column_a",
             "fieldName": "numeric_source_column_a"
           }
         ],
         "granularitySpec": {
           "type": "uniform",
           "segmentGranularity": "HOUR",
           "queryGranularity": {
             "type": "duration",
             "duration": 300000,
             "origin": "1970-01-01T00:00:00.000Z"
           },
           "rollup": true,
           "intervals": []
         },
         "transformSpec": {
           "filter": {
             "type": "and",
             "fields": [
               {
                 "type": "in",
                 "dimension": "filter_dim_a",
                 "values": [
                   "value-1",
                   "value-2"
                 ]
               },
               {
                 "type": "or",
                 "fields": [
                   {
                     "type": "selector",
                     "dimension": "filter_dim_b",
                     "value": "value-3"
                   },
                   {
                     "type": "selector",
                     "dimension": "filter_dim_c",
                     "value": "value-4"
                   }
                 ]
               }
             ]
           },
           "transforms": [
             {
               "type": "expression",
               "name": "concat_dimension",
               "expression": "concat(\"field_a\", '_', \"field_b\")"
             }
           ]
         }
       },
       "ioConfig": {
         "topic": "kafka_topic_a",
         "topicPattern": null,
         "inputFormat": {
           "type": "avro_stream",
           "flattenSpec": {
             "useFieldDiscovery": true,
             "fields": [
               {
                 "type": "path",
                 "name": "primitive_column_a",
                 "expr": "$.primitive_column_a",
                 "nodes": null
               }
             ]
           },
           "avroBytesDecoder": {
             "type": "schema_registry",
             "url": "https://schema-registry";,
             "capacity": 2147483647,
             "urls": null,
             "config": null,
             "headers": null
           },
           "binaryAsString": false,
           "extractUnionsByType": false
         },
         "replicas": 1,
         "taskCount": 2,
         "taskDuration": "PT3600S",
         "consumerProperties": {
           "bootstrap.servers": "kafka-1,kafka-2,kafka-3"
         },
         "autoScalerConfig": null,
         "pollTimeout": 100,
         "startDelay": "PT5S",
         "period": "PT30S",
         "useEarliestOffset": false,
         "completionTimeout": "PT1800S",
         "lateMessageRejectionPeriod": null,
         "earlyMessageRejectionPeriod": null,
         "lateMessageRejectionStartDateTime": null,
         "configOverrides": null,
         "idleConfig": null,
         "stopTaskCount": null,
         "stream": "kafka_topic_a",
         "useEarliestSequenceNumber": false
       },
       "tuningConfig": {
         "type": "kafka",
         "appendableIndexSpec": {
           "type": "onheap",
           "preserveExistingMetrics": false
         },
         "maxRowsInMemory": 60000,
         "maxBytesInMemory": -1,
         "skipBytesInMemoryOverheadCheck": false,
         "maxRowsPerSegment": 2000000,
         "maxTotalRows": null,
         "intermediatePersistPeriod": "PT1M",
         "maxPendingPersists": 0,
         "indexSpec": {
           "bitmap": {
             "type": "roaring"
           },
           "dimensionCompression": "lz4",
           "stringDictionaryEncoding": {
             "type": "frontCoded",
             "bucketSize": 8,
             "formatVersion": 1
           },
           "metricCompression": "none",
           "longEncoding": "auto",
           "jsonCompression": "lz4"
         },
         "indexSpecForIntermediatePersists": null,
         "reportParseExceptions": false,
         "handoffConditionTimeout": 900000,
         "resetOffsetAutomatically": false,
         "segmentWriteOutMediumFactory": null,
         "workerThreads": null,
         "chatRetries": 8,
         "httpTimeout": "PT10S",
         "shutdownTimeout": "PT80S",
         "offsetFetchPeriod": "PT30S",
         "intermediateHandoffPeriod": "P2147483647D",
         "logParseExceptions": false,
         "maxParseExceptions": 2147483647,
         "maxSavedParseExceptions": 0,
         "numPersistThreads": 1,
         "skipSequenceNumberAvailabilityCheck": false,
         "repartitionTransitionDuration": "PT120S"
       }
     },
     "context": {
       "taskLockType": "APPEND",
       "useSharedLock": true
     },
     "suspended": false
   }
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to