HeartSaVioR commented on a change in pull request #26292: [SPARK-29635][SS]
Extract base test suites between Kafka micro-batch sink and Kafka continuous
sink
URL: https://github.com/apache/spark/pull/26292#discussion_r339957602
##########
File path:
external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
##########
@@ -270,46 +245,150 @@ class KafkaSinkStreamingSuite extends
KafkaSinkSuiteBase with StreamTest {
withTopic.foreach(stream.option("topic", _))
withOutputMode.foreach(stream.outputMode(_))
withOptions.foreach(opt => stream.option(opt._1, opt._2))
+ defaultTrigger.foreach(stream.trigger(_))
}
stream.start()
}
- private def runAndVerifyStreamingQueryException(
- input: MemoryStream[String],
+ private def runAndVerifyException[T <: Exception : ClassTag](
+ input: MemoryStreamBase[Int],
expectErrorMsg: String)(
writerFn: => StreamingQuery): Unit = {
var writer: StreamingQuery = null
val ex: Exception = try {
- intercept[StreamingQueryException] {
+ intercept[T] {
writer = writerFn
- input.addData("1", "2", "3", "4", "5")
- writer.processAllAvailable()
+ input.addData(1, 2, 3, 4, 5)
+ input match {
+ case _: MemoryStream[Int] => writer.processAllAvailable()
+ case _: ContinuousMemoryStream[Int] =>
+ eventually(timeout(streamingTimeout)) {
+ assert(writer.exception.isDefined)
+ }
+
+ throw writer.exception.get
+ }
}
} finally {
if (writer != null) writer.stop()
}
- assert(ex.getMessage.toLowerCase(Locale.ROOT).contains(expectErrorMsg))
+ TestUtils.assertExceptionLowercaseMsg(ex, expectErrorMsg)
}
private def assertWrongSchema(
- input: MemoryStream[String],
+ input: MemoryStreamBase[Int],
selectExpr: Seq[String],
expectErrorMsg: String): Unit = {
- runAndVerifyStreamingQueryException(input, expectErrorMsg) {
- createKafkaWriter(input.toDF())(withSelectExpr = selectExpr: _*)
+ // just pick common exception of both micro-batch and continuous cases
+ runAndVerifyException[Exception](input, expectErrorMsg) {
+ createKafkaWriter(input.toDF())(
+ withSelectExpr = selectExpr: _*)
}
}
private def assertWrongOption(
- input: MemoryStream[String],
+ input: MemoryStreamBase[Int],
options: Map[String, String],
expectErrorMsg: String): Unit = {
- runAndVerifyStreamingQueryException(input, expectErrorMsg) {
+ // just pick common exception of both micro-batch and continuous cases
+ runAndVerifyException[Exception](input, expectErrorMsg) {
createKafkaWriter(input.toDF(), withOptions = options)()
}
}
}
+class KafkaSinkMicroBatchStreamingSuite extends KafkaSinkStreamingSuiteBase {
+ import testImplicits._
+
+ override val streamingTimeout = 30.seconds
+
+ override protected def createMemoryStream(): MemoryStreamBase[Int] =
MemoryStream[Int]
+
+ override protected def verifyResult(writer: StreamingQuery)(verifyFn: =>
Unit): Unit = {
+ failAfter(streamingTimeout) {
+ writer.processAllAvailable()
+ }
+ verifyFn
+ }
+
+ override protected def defaultTrigger: Option[Trigger] = None
+
+ test("streaming - sink progress is produced") {
+ /* ensure sink progress is correctly produced. */
+ val input = MemoryStream[String]
+ val topic = newTopic()
+ testUtils.createTopic(topic)
+
+ val writer = createKafkaWriter(
+ input.toDF(),
+ withTopic = Some(topic),
+ withOutputMode = Some(OutputMode.Update()))()
+
+ try {
+ input.addData("1", "2", "3")
+ verifyResult(writer) {
+ assert(writer.lastProgress.sink.numOutputRows == 3L)
+ }
+ } finally {
+ writer.stop()
+ }
+ }
+}
+
+class KafkaContinuousSinkSuite extends KafkaSinkStreamingSuiteBase {
+ import testImplicits._
+
+ // We need more than the default local[2] to be able to schedule all
partitions simultaneously.
+ override protected def createSparkSession = new TestSparkSession(
+ new SparkContext(
+ "local[10]",
+ "continuous-stream-test-sql-context",
+ sparkConf.set("spark.sql.testkey", "true")))
+
+ override protected def createMemoryStream(): MemoryStreamBase[Int] = {
+ ContinuousMemoryStream.singlePartition[Int]
+ }
+
+ override protected def verifyResult(writer: StreamingQuery)(verifyFn: =>
Unit): Unit = {
+ eventually(timeout(streamingTimeout), interval(5.seconds)) {
+ verifyFn
+ }
+ }
+
+ override protected def defaultTrigger: Option[Trigger] =
Some(Trigger.Continuous(1000))
+
+ test("generic - write big data with small producer buffer") {
Review comment:
Copied from KafkaContinuousSinkSuite. I guess it's not necessarily needed to
be put here, but would like to leave it as it was.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]