gaborgsomogyi commented on a change in pull request #25760: 
[WIP][SPARK-29054][SS] Invalidate Kafka consumer when new delegation token 
available
URL: https://github.com/apache/spark/pull/25760#discussion_r324535253
 
 

 ##########
 File path: 
external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
 ##########
 @@ -516,13 +521,41 @@ private[kafka010] class KafkaDataConsumer(
     fetchedData.withNewPoll(records.listIterator, offsetAfterPoll)
   }
 
-  private def getOrRetrieveConsumer(): InternalKafkaConsumer = _consumer match 
{
-    case None =>
-      _consumer = Option(consumerPool.borrowObject(cacheKey, kafkaParams))
-      require(_consumer.isDefined, "borrowing consumer from pool must always 
succeed.")
-      _consumer.get
+  private[kafka010] def getOrRetrieveConsumer(): InternalKafkaConsumer = {
+    if (!_consumer.isDefined) {
+      retrieveConsumer()
+    }
+    ensureConsumerHasLatestToken()
+    _consumer.get
+  }
 
-    case Some(consumer) => consumer
+  private def retrieveConsumer(): Unit = {
+    _consumer = Option(consumerPool.borrowObject(cacheKey, kafkaParams))
+    require(_consumer.isDefined, "borrowing consumer from pool must always 
succeed.")
+  }
+
+  private def ensureConsumerHasLatestToken(): Unit = {
+    require(_consumer.isDefined, "Consumer must be defined")
+    val params = _consumer.get.kafkaParamsWithSecurity
+    if (params.containsKey(SaslConfigs.SASL_JAAS_CONFIG)) {
+      logDebug("Delegation token used by cached consumer, checking if uses the 
latest token.")
+
+      val jaasParams = 
params.get(SaslConfigs.SASL_JAAS_CONFIG).asInstanceOf[String]
+      val clusterConfig = KafkaTokenUtil.findMatchingToken(SparkEnv.get.conf,
+        
params.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG).asInstanceOf[String])
+      require(clusterConfig.isDefined, "Delegation token must exist for this 
consumer.")
+      val username = new String(clusterConfig.get._1.getIdentifier)
+      // Identifier is changing only when new token obtained (in Spark we're 
not renewing tokens).
+      // As a conclusion if the actual token identifier in UGI is not found in 
the JAAS
+      // configuration then it uses the old token so it can be invalidated.
+      if (!jaasParams.contains(username)) {
+        logDebug("Cached consumer uses and old delegation token, 
invalidating.")
+        releaseConsumer()
+        consumerPool.invalidateKey(cacheKey)
+        fetchedDataPool.invalidate(cacheKey)
 
 Review comment:
   Yeah, the code 
[here](https://github.com/apache/spark/blob/471a3eff514480cfcbda79bde9294408cc8eb125/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala#L616)
 made me think and had the same conclusion. So all in all we should keep data 
invalidation here (to be on the safe side + have only one concept in the code) 
and a separate jira can be filed to discuss whether it's safe to remove it.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to