HeartSaVioR commented on code in PR #38517:
URL: https://github.com/apache/spark/pull/38517#discussion_r1049119941
##########
connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala:
##########
@@ -195,6 +200,102 @@ abstract class KafkaMicroBatchSourceSuiteBase extends
KafkaSourceSuiteBase {
true
}
+ /**
+ * Test async progress tracking capability with Kafka source and sink
+ */
+ test("async progress tracking") {
+ val inputTopic = newTopic()
+ testUtils.createTopic(inputTopic, partitions = 5)
+
+ val dataSent = new ListBuffer[String]()
+ testUtils.sendMessages(inputTopic, (0 until 15).map { case x =>
+ val m = s"foo-$x"
+ dataSent += m
+ m
+ }.toArray, Some(0))
+
+ val outputTopic = newTopic()
+ testUtils.createTopic(outputTopic, partitions = 5)
+
+ withTempDir { dir =>
+ val reader = spark
+ .readStream
+ .format("kafka")
+ .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+ .option("kafka.metadata.max.age.ms", "1")
+ .option("maxOffsetsPerTrigger", 5)
+ .option("subscribe", inputTopic)
+ .option("startingOffsets", "earliest")
+ .load()
+
+ def startQuery(): StreamingQuery = {
+ reader.writeStream
+ .format("kafka")
+ .option("checkpointLocation", dir.getCanonicalPath)
+ .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+ .option("kafka.max.block.ms", "5000")
+ .option("topic", outputTopic)
+ .option(ASYNC_PROGRESS_TRACKING_ENABLED, true)
+ .option(ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS, 1000)
+ .queryName("kafkaStream")
+ .start()
+ }
+
+ def readResults(): ListBuffer[String] = {
Review Comment:
The output would be same but the code and actual execution would be much
simpler in batch query. See below code when we just go with batch query:
```
val data = spark.read.format("kafka")...load().select("CAST(value AS
string)").toDS().collect().map(_._1)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]