Github user jiangxb1987 commented on a diff in the pull request:
https://github.com/apache/spark/pull/21898#discussion_r205960887
--- Diff: core/src/main/scala/org/apache/spark/BarrierTaskContextImpl.scala
---
@@ -39,8 +44,51 @@ private[spark] class BarrierTaskContextImpl(
taskMemoryManager, localProperties, metricsSystem, taskMetrics)
with BarrierTaskContext {
- // TODO SPARK-24817 implement global barrier.
- override def barrier(): Unit = {}
+ private val barrierCoordinator: RpcEndpointRef = {
+ val env = SparkEnv.get
+ RpcUtils.makeDriverRef("barrierSync", env.conf, env.rpcEnv)
+ }
+
+ private val timer = new Timer("Barrier task timer for barrier() calls.")
+
+ private var barrierEpoch = 0
+
+ private lazy val numTasks = localProperties.getProperty("numTasks",
"0").toInt
+
+ override def barrier(): Unit = {
+ logInfo(s"Task $taskAttemptId from Stage $stageId(Attempt
$stageAttemptNumber) has entered " +
+ s"the global sync, current barrier epoch is $barrierEpoch.")
+
+ val startTime = System.currentTimeMillis()
+ val timerTask = new TimerTask {
+ override def run(): Unit = {
+ logInfo(s"Task $taskAttemptId from Stage $stageId(Attempt
$stageAttemptNumber) waiting " +
+ s"under the global sync since $startTime, has been waiting for "
+
+ s"${(System.currentTimeMillis() - startTime) / 1000} seconds,
current barrier epoch " +
+ s"is $barrierEpoch.")
+ }
+ }
+ // Log the update of global sync every 60 seconds.
+ timer.schedule(timerTask, 60000, 60000)
+
+ try {
+ barrierCoordinator.askSync[Unit](
+ message = RequestToSync(numTasks, stageId, stageAttemptNumber,
taskAttemptId, barrierEpoch),
+ timeout = new RpcTimeout(31536000 /** = 3600 * 24 * 365 */
seconds, "barrierTimeout"))
--- End diff --
I set a fix timeout for RPC intentionally, so users shall get a
SparkException thrown by BarrierCoordinator, instead of RPCTimeoutException
from the RPC framework.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]