HeartSaVioR commented on code in PR #45221:
URL: https://github.com/apache/spark/pull/45221#discussion_r1505349482
##########
connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala:
##########
@@ -92,13 +93,18 @@ class KafkaContinuousStream(
val deletedPartitions =
oldStartPartitionOffsets.keySet.diff(currentPartitionSet)
if (deletedPartitions.nonEmpty) {
- val message = if (
-
offsetReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
- s"$deletedPartitions are gone. ${CUSTOM_GROUP_ID_ERROR_MESSAGE}"
- } else {
- s"$deletedPartitions are gone. Some data may have been missed."
- }
- reportDataLoss(message)
+ val (message, config) =
+ if
(offsetReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+ (s"$deletedPartitions are gone.${CUSTOM_GROUP_ID_ERROR_MESSAGE}",
Review Comment:
nit: space between . and $
##########
connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala:
##########
@@ -444,30 +447,40 @@ private[kafka010] class KafkaOffsetReaderAdmin(
override def getOffsetRangesFromResolvedOffsets(
fromPartitionOffsets: PartitionOffsetMap,
untilPartitionOffsets: PartitionOffsetMap,
- reportDataLoss: String => Unit): Seq[KafkaOffsetRange] = {
+ reportDataLoss: (String, () => Throwable) => Unit):
Seq[KafkaOffsetRange] = {
// Find the new partitions, and get their earliest offsets
val newPartitions =
untilPartitionOffsets.keySet.diff(fromPartitionOffsets.keySet)
val newPartitionInitialOffsets = fetchEarliestOffsets(newPartitions.toSeq)
if (newPartitionInitialOffsets.keySet != newPartitions) {
// We cannot get from offsets for some partitions. It means they got
deleted.
val deletedPartitions =
newPartitions.diff(newPartitionInitialOffsets.keySet)
reportDataLoss(
- s"Cannot find earliest offsets of ${deletedPartitions}. Some data may
have been missed")
+ s"Cannot find earliest offsets of ${deletedPartitions}. Some data may
have been missed",
+ () =>
+ QueryExecutionErrors.initialOffsetNotFoundForPartitionsKafkaError(
+ deletedPartitions.toString))
}
logInfo(s"Partitions added: $newPartitionInitialOffsets")
newPartitionInitialOffsets.filter(_._2 != 0).foreach { case (p, o) =>
reportDataLoss(
- s"Added partition $p starts from $o instead of 0. Some data may have
been missed")
+ s"Added partition $p starts from $o instead of 0. Some data may have
been missed",
+ () =>
QueryExecutionErrors.addedPartitionDoesNotStartFromZeroKafkaError(p.toString,
o))
}
val deletedPartitions =
fromPartitionOffsets.keySet.diff(untilPartitionOffsets.keySet)
if (deletedPartitions.nonEmpty) {
- val message = if
(driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
- s"$deletedPartitions are gone.
${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
- } else {
- s"$deletedPartitions are gone. Some data may have been missed."
- }
- reportDataLoss(message)
+ val (message, config) =
+ if (driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+ (s"$deletedPartitions are
gone.${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}",
Review Comment:
nit: space between . and $
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]