dongjoon-hyun commented on code in PR #52788:
URL: https://github.com/apache/spark/pull/52788#discussion_r2478935858
##########
connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala:
##########
@@ -167,3 +172,88 @@ private[kafka010] object KafkaOffsetReader extends Logging
{
}
}
}
+
+private[kafka010] abstract class KafkaOffsetReaderBase extends
KafkaOffsetReader with Logging {
+ protected val rangeCalculator: KafkaOffsetRangeCalculator
+
+ private def getSortedExecutorList: Array[String] = {
+ def compare(a: ExecutorCacheTaskLocation, b: ExecutorCacheTaskLocation):
Boolean = {
+ if (a.host == b.host) {
+ a.executorId > b.executorId
+ } else {
+ a.host > b.host
+ }
+ }
+
+ val bm = SparkEnv.get.blockManager
+ bm.master.getPeers(bm.blockManagerId).toArray
+ .map(x => ExecutorCacheTaskLocation(x.host, x.executorId))
+ .sortWith(compare)
+ .map(_.toString)
+ }
+
+ override def getOffsetRangesFromResolvedOffsets(
+ fromPartitionOffsets: PartitionOffsetMap,
+ untilPartitionOffsets: PartitionOffsetMap,
+ reportDataLoss: (String, () => Throwable) => Unit): Seq[KafkaOffsetRange]
= {
+ // Find the new partitions, and get their earliest offsets
+ val newPartitions =
untilPartitionOffsets.keySet.diff(fromPartitionOffsets.keySet)
+ val newPartitionInitialOffsets = fetchEarliestOffsets(newPartitions.toSeq)
+ if (newPartitionInitialOffsets.keySet != newPartitions) {
+ // We cannot get from offsets for some partitions. It means they got
deleted.
+ val deletedPartitions =
newPartitions.diff(newPartitionInitialOffsets.keySet)
+ reportDataLoss(
+ s"Cannot find earliest offsets of ${deletedPartitions}. Some data may
have been missed",
+ () =>
+
KafkaExceptions.initialOffsetNotFoundForPartitions(deletedPartitions))
Review Comment:
ditto.
```
() => KafkaExceptions.initialOffsetNotFoundForPartitions(deletedPartitions))
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]