Github user brkyvz commented on a diff in the pull request:
https://github.com/apache/spark/pull/20698#discussion_r171988510
--- Diff:
external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
---
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import scala.collection.JavaConverters._
+
+import org.apache.kafka.common.TopicPartition
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.sources.v2.DataSourceOptions
+
+class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
+
+ def testWithMinPartitions(name: String, minPartition: Int)
+ (f: KafkaOffsetRangeCalculator => Unit): Unit = {
+ val options = new DataSourceOptions(Map("minPartitions" ->
minPartition.toString).asJava)
+ test(s"with minPartition = $minPartition: $name") {
+ f(KafkaOffsetRangeCalculator(options))
+ }
+ }
+
+
+ test("with no minPartition: N TopicPartitions to N offset ranges") {
+ val calc = KafkaOffsetRangeCalculator(DataSourceOptions.empty())
+ assert(
+ calc.getRanges(
+ fromOffsets = Map(tp1 -> 1),
+ untilOffsets = Map(tp1 -> 2)) ==
+ Seq(KafkaOffsetRange(tp1, 1, 2, None)))
+
+ assert(
+ calc.getRanges(
+ fromOffsets = Map(tp1 -> 1),
+ untilOffsets = Map(tp1 -> 2, tp2 -> 1), Seq.empty) ==
+ Seq(KafkaOffsetRange(tp1, 1, 2, None)))
+
+ assert(
+ calc.getRanges(
+ fromOffsets = Map(tp1 -> 1, tp2 -> 1),
+ untilOffsets = Map(tp1 -> 2)) ==
+ Seq(KafkaOffsetRange(tp1, 1, 2, None)))
+
+ assert(
+ calc.getRanges(
+ fromOffsets = Map(tp1 -> 1, tp2 -> 1),
+ untilOffsets = Map(tp1 -> 2),
+ executorLocations = Seq("location")) ==
+ Seq(KafkaOffsetRange(tp1, 1, 2, Some("location"))))
+ }
+
+ test("with no minPartition: empty ranges ignored") {
+ val calc = KafkaOffsetRangeCalculator(DataSourceOptions.empty())
+ assert(
+ calc.getRanges(
+ fromOffsets = Map(tp1 -> 1, tp2 -> 1),
+ untilOffsets = Map(tp1 -> 2, tp2 -> 1)) ==
+ Seq(KafkaOffsetRange(tp1, 1, 2, None)))
+ }
+
+ testWithMinPartitions("N TopicPartitions to N offset ranges", 3) { calc
=>
+ assert(
+ calc.getRanges(
+ fromOffsets = Map(tp1 -> 1, tp2 -> 1, tp3 -> 1),
+ untilOffsets = Map(tp1 -> 2, tp2 -> 2, tp3 -> 2)) ==
+ Seq(
+ KafkaOffsetRange(tp1, 1, 2, None),
+ KafkaOffsetRange(tp2, 1, 2, None),
+ KafkaOffsetRange(tp3, 1, 2, None)))
+ }
+
+ testWithMinPartitions("1 TopicPartition to N offset ranges", 4) { calc =>
+ assert(
+ calc.getRanges(
+ fromOffsets = Map(tp1 -> 1),
+ untilOffsets = Map(tp1 -> 5)) ==
+ Seq(
+ KafkaOffsetRange(tp1, 1, 2, None),
+ KafkaOffsetRange(tp1, 2, 3, None),
+ KafkaOffsetRange(tp1, 3, 4, None),
+ KafkaOffsetRange(tp1, 4, 5, None)))
+
+ assert(
+ calc.getRanges(
+ fromOffsets = Map(tp1 -> 1),
+ untilOffsets = Map(tp1 -> 5),
+ executorLocations = Seq("location")) ==
+ Seq(
+ KafkaOffsetRange(tp1, 1, 2, None),
+ KafkaOffsetRange(tp1, 2, 3, None),
+ KafkaOffsetRange(tp1, 3, 4, None),
+ KafkaOffsetRange(tp1, 4, 5, None))) // location pref not set
when minPartition is set
+ }
+
+ testWithMinPartitions("N skewed TopicPartitions to M offset ranges", 3)
{ calc =>
--- End diff --
can you also add a test:
```
fromOffsets = Map(tp1 -> 1),
untilOffsets = Map(tp1 -> 10)
minPartitions = 3
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]