camelusluo opened a new issue #9378: kafka indexing service qps very low
URL: https://github.com/apache/druid/issues/9378
 
 
   our druid cluster consumer kafka data by kafka indexing service, but we find 
kis qps very low about ~2000/s per task. 
   
   ### Affected Version
   
   druid 0.13.0
   
   ### Description
   vm config
   <pre><code>
   24cores 96G
   </code></pre>
   
   middlemanger config
   <pre><code>
   druid.service=druid/middleManager
   druid.plaintextPort=8091
   
   # Number of tasks per middleManager
   druid.worker.capacity=16
   
   # Task launch parameters
   druid.indexer.runner.javaOpts=-server -Xmx3g -XX:MaxDirectMemorySize=5g 
-Duser.timezone=UTC+08:00 -Dfile.encoding=UTF-8 
-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager
   druid.indexer.task.baseTaskDir=var/druid/task
   druid.indexer.task.restoreTasksOnRestart=true
   
   # HTTP server threads
   druid.server.http.numThreads=32
   
   # Processing threads and buffers on Peons
   druid.indexer.fork.property.druid.processing.buffer.sizeBytes=268435456
   druid.indexer.fork.property.druid.processing.numThreads=8
   
   # Hadoop indexing
   druid.indexer.task.hadoopWorkingPath=var/druid/hadoop-tmp
   </code></pre>
   
   kafka partition
   <pre><code>
   24
   </code></pre>
   
   supervisor
   <pre><code>
   {
     "type": "kafka",
     "dataSchema": {
       "dataSource": "stream-report-request",
       "parser": {
         "type": "string",
         "parseSpec": {
           "format": "json",
           "timestampSpec": {
             "column": "processTime",
             "format": "auto"
           },
           "dimensionsSpec": {
             "dimensions": [
               "adGroupId",
               "adPositionId",
               "creativeId",
               "orderId",
               "province",
               "city",
               "dspName",
               "adClass",
               "resourceFreqControlMode",
               "companyId",
               "dspPosId",
               "targetGame",
               "targetScene",
               "targetKeyword",
               "targetRoom",
               "gbCode",
               "filterReason",
               "configId",
               "blockIds",
               "strBlockIds",
               "itemId",
               "pctrBucket"
             ],
             "dimensionExclusions": [],
             "spatialDimensions": []
           }
         }
       },
       "metricsSpec": [{
         "type": "longSum",
         "name": "adRequestCount",
         "fieldName": "adRequestCount"
       },
         {
           "type": "longSum",
           "name": "adRecallCount",
           "fieldName": "adRecallCount"
         },
         {
           "type": "thetaSketch",
           "name": "uid",
           "fieldName": "uid"
         }
       ],
       "granularitySpec": {
         "type": "uniform",
         "segmentGranularity": "ten_minute",
         "queryGranularity": "NONE"
       }
     },
     "tuningConfig": {
       "type": "kafka",
       "maxRowsInMemory": 20000,
       "maxBytesInMemory": -1,
       "intermediatePersistPeriod": "PT10M"
     },
     "ioConfig": {
       "topic": "stream-report-request-topic",
       "consumerProperties": {
         "bootstrap.servers": "",
         "group.id": "consumer_group_druid"
       },
       "useEarliestOffset": true,
       "taskCount": 12,
       "replicas": 1,
       "taskDuration": "PT10M",
       "completionTimeout": "PT10M"
     }
   }
   </code></pre>
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to