Ngone51 commented on a change in pull request #26624:
URL: https://github.com/apache/spark/pull/26624#discussion_r417858256
##########
File path: core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
##########
@@ -17,21 +17,104 @@
package org.apache.spark.util
+import java.util
import java.util.concurrent._
import java.util.concurrent.locks.ReentrantLock
+import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder}
import scala.concurrent.{Awaitable, ExecutionContext,
ExecutionContextExecutor, Future}
import scala.concurrent.duration.{Duration, FiniteDuration}
import scala.language.higherKinds
import scala.util.control.NonFatal
-import com.google.common.util.concurrent.ThreadFactoryBuilder
-
import org.apache.spark.SparkException
import org.apache.spark.rpc.RpcAbortException
private[spark] object ThreadUtils {
+ object MDCAwareThreadPoolExecutor {
+ def newCachedThreadPool(threadFactory: ThreadFactory): ThreadPoolExecutor
= {
+ // The values needs to be synced with [[Executors.newCachedThreadPool]]
+ new MDCAwareThreadPoolExecutor(
+ 0,
+ Integer.MAX_VALUE,
+ 60L,
+ TimeUnit.SECONDS,
+ new SynchronousQueue[Runnable],
+ threadFactory)
+ }
+
+ def newFixedThreadPool(nThreads: Int, threadFactory: ThreadFactory):
ThreadPoolExecutor = {
+ // The values needs to be synced with [[Executors.newFixedThreadPool]]
+ new MDCAwareThreadPoolExecutor(
+ nThreads,
+ nThreads,
+ 0L,
+ TimeUnit.MILLISECONDS,
+ new LinkedBlockingQueue[Runnable],
+ threadFactory)
+ }
+
+ def newSingleThreadExecutor(threadFactory: ThreadFactory):
ThreadPoolExecutor = {
+ // The values needs to be synced with
[[Executors.newSingleThreadExecutor]]
+ new MDCAwareThreadPoolExecutor(
+ 1,
+ 1,
+ 0L,
+ TimeUnit.MILLISECONDS,
+ new LinkedBlockingQueue[Runnable],
+ threadFactory)
+ }
+
+ }
+
+ class MDCAwareRunnable(proxy: Runnable) extends Runnable {
+ val callerThreadMDC: util.Map[String, String] = getMDCMap
+
+ @inline
+ private def getMDCMap: util.Map[String, String] = {
+ org.slf4j.MDC.getCopyOfContextMap match {
+ case null => new util.HashMap[String, String]()
Review comment:
Can we do `MDC.clear` instead of creating new object?
##########
File path: core/src/main/scala/org/apache/spark/executor/Executor.scala
##########
@@ -674,6 +677,18 @@ private[spark] class Executor(
}
}
+ private def setMDCForTask(taskDescription: TaskDescription): Unit = {
+ val properties = taskDescription.properties
+
+ org.slf4j.MDC.put("appId", properties.getProperty("spark.app.id"))
+ org.slf4j.MDC.put("appName", properties.getProperty("spark.app.name"))
+
+ properties.asScala.filter(_._1.startsWith("mdc.")).foreach { item =>
+ val key = item._1.substring(4)
+ org.slf4j.MDC.put(key, item._2)
Review comment:
If the only interface for user yet is task runnable, why do we have to
touch the whole `ThreadUtils`?
##########
File path: core/src/main/scala/org/apache/spark/executor/Executor.scala
##########
@@ -674,6 +677,18 @@ private[spark] class Executor(
}
}
+ private def setMDCForTask(taskDescription: TaskDescription): Unit = {
+ val properties = taskDescription.properties
+
+ org.slf4j.MDC.put("appId", properties.getProperty("spark.app.id"))
+ org.slf4j.MDC.put("appName", properties.getProperty("spark.app.name"))
+
+ properties.asScala.filter(_._1.startsWith("mdc.")).foreach { item =>
+ val key = item._1.substring(4)
+ org.slf4j.MDC.put(key, item._2)
Review comment:
Can we use `org.apache.log4j` instead?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]