eejbyfeldt commented on code in PR #42487:
URL: https://github.com/apache/spark/pull/42487#discussion_r1293896707
##########
core/src/main/scala/org/apache/spark/rdd/RDD.scala:
##########
@@ -1702,21 +1727,28 @@ abstract class RDD[T: ClassTag](
persist(LocalRDDCheckpointData.transformStorageLevel(storageLevel),
allowOverride = true)
}
- // If this RDD is already checkpointed and materialized, its lineage is
already truncated.
- // We must not override our `checkpointData` in this case because it is
needed to recover
- // the checkpointed data. If it is overridden, next time materializing on
this RDD will
- // cause error.
- if (isCheckpointedAndMaterialized) {
- logWarning("Not marking RDD for local checkpoint because it was already
" +
- "checkpointed and materialized")
- } else {
- // Lineage is not truncated yet, so just override any existing
checkpoint data with ours
- checkpointData match {
- case Some(_: ReliableRDDCheckpointData[_]) => logWarning(
- "RDD was already marked for reliable checkpointing: overriding with
local checkpoint.")
- case _ =>
- }
+
+ RDDCheckpointData.synchronized {
Review Comment:
Holding this lock while calling `doCheckpoint` causes deadlock. So this code
was changed to hold the lock only during the needed period.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]