zhangjh-art opened a new issue #11087:
URL: https://github.com/apache/druid/issues/11087
Please help!
I wirte some avro schema to kafka(record like 'ProducerRecord<String,
TestSchema> record = new ProducerRecord<String, TestSchema>("TestTopic0409",
testSchema)'),and I want to load it to druid. when I start druid locally and
connect to kafka data, the result show garbled code

then I use spec like:
{
"type": "kafka",
"spec": {
"dataSchema": {
"dataSource": "new-data-source",
"timestampSpec": null,
"dimensionsSpec": null,
"metricsSpec": [],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "DAY",
"queryGranularity": {
"type": "none"
},
"rollup": true,
"intervals": null
},
"transformSpec": {
"filter": null,
"transforms": []
},
"parser": {
"type": "avro_stream",
"avroBytesDecoder": {
"type": "schema_inline",
"schema": {
"namespace": "com.airebroker.data",
"name": "Test",
"type": "record",
"fields": [
{
"name": "id",
"type": "int"
},
{
"name": "name",
"type": "string"
},
{
"name": "timestamp",
"type": "long"
}
]
}
},
"parseSpec": {
"format": "avro",
"timestampSpec": {},
"dimensionsSpec": {}
}
}
},
"ioConfig": {
"topic": "TestTopic0409",
"inputFormat": {
"type": "avro_ocf",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": []
},
"binaryAsString": false
},
"replicas": 1,
"taskCount": 1,
"taskDuration": "PT3600S",
"consumerProperties": {
"bootstrap.servers": "localhost:9092"
},
"pollTimeout": 100,
"startDelay": "PT5S",
"period": "PT30S",
"useEarliestOffset": false,
"completionTimeout": "PT1800S",
"lateMessageRejectionPeriod": null,
"earlyMessageRejectionPeriod": null,
"lateMessageRejectionStartDateTime": null,
"stream": "TestTopic0409",
"useEarliestSequenceNumber": false,
"type": "kafka"
},
"tuningConfig": {
"type": "kafka",
"maxRowsInMemory": 1000000,
"maxBytesInMemory": 0,
"maxRowsPerSegment": 5000000,
"maxTotalRows": null,
"intermediatePersistPeriod": "PT10M",
"basePersistDirectory":
"/home/zhangjh/apache-druid-0.20.2/var/tmp/druid-realtime-persist7289903804951562243",
"maxPendingPersists": 0,
"indexSpec": {
"bitmap": {
"type": "roaring",
"compressRunOnSerialization": true
},
"dimensionCompression": "lz4",
"metricCompression": "lz4",
"longEncoding": "longs",
"segmentLoader": null
},
"indexSpecForIntermediatePersists": {
"bitmap": {
"type": "roaring",
"compressRunOnSerialization": true
},
"dimensionCompression": "lz4",
"metricCompression": "lz4",
"longEncoding": "longs",
"segmentLoader": null
},
"buildV9Directly": true,
"reportParseExceptions": false,
"handoffConditionTimeout": 0,
"resetOffsetAutomatically": false,
"segmentWriteOutMediumFactory": null,
"workerThreads": null,
"chatThreads": null,
"chatRetries": 8,
"httpTimeout": "PT10S",
"shutdownTimeout": "PT80S",
"offsetFetchPeriod": "PT30S",
"intermediateHandoffPeriod": "P2147483647D",
"logParseExceptions": false,
"maxParseExceptions": 2147483647,
"maxSavedParseExceptions": 0,
"skipSequenceNumberAvailabilityCheck": false,
"repartitionTransitionDuration": "PT120S"
}
}
}
druid tells me that way: Druid requires flat data (non-nested,
non-hierarchical). Each row should represent a discrete event.
If you have nested data, you can flatten it here. If the provided flattening
capabilities are not sufficient, please pre-process your data before ingesting
it into Druid.
Ensure that your data appears correctly in a row/column orientation.
and give me a result : Error: undefined
thank for your reply!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]