shunping commented on code in PR #36163:
URL: https://github.com/apache/beam/pull/36163#discussion_r2349948086


##########
sdks/go/pkg/beam/runners/prism/internal/engine/elementmanager.go:
##########
@@ -892,70 +892,75 @@ func (em *ElementManager) PersistBundle(rb RunBundle, 
col2Coders map[string]PCol
        // Clear out the inprogress elements associated with the completed 
bundle.
        // Must be done after adding the new pending elements to avoid an 
incorrect
        // watermark advancement.
-       stage.mu.Lock()
-       completed := stage.inprogress[rb.BundleID]
-       em.addPending(-len(completed.es))
-       delete(stage.inprogress, rb.BundleID)
-       for k := range stage.inprogressKeysByBundle[rb.BundleID] {
-               delete(stage.inprogressKeys, k)
-       }
-       delete(stage.inprogressKeysByBundle, rb.BundleID)
-
-       // Adjust holds as needed.
-       for h, c := range newHolds {
-               if c > 0 {
-                       stage.watermarkHolds.Add(h, c)
-               } else if c < 0 {
-                       stage.watermarkHolds.Drop(h, -c)
-               }
-       }
-       for hold, v := range stage.inprogressHoldsByBundle[rb.BundleID] {
-               stage.watermarkHolds.Drop(hold, v)
-       }
-       delete(stage.inprogressHoldsByBundle, rb.BundleID)
-
-       // Clean up OnWindowExpiration bundle accounting, so window state
-       // may be garbage collected.
-       if stage.expiryWindowsByBundles != nil {
-               win, ok := stage.expiryWindowsByBundles[rb.BundleID]
-               if ok {
-                       stage.inProgressExpiredWindows[win] -= 1
-                       if stage.inProgressExpiredWindows[win] == 0 {
-                               delete(stage.inProgressExpiredWindows, win)
+       func() {
+               stage.mu.Lock()
+               // Defer unlocking the mutex within an anonymous function to 
ensure it's released
+               // even if a panic occurs during `em.addPending`. This prevents 
potential deadlocks
+               // if the waitgroup unexpectedly drops below zero due to a 
runner bug.
+               defer stage.mu.Unlock()
+               completed := stage.inprogress[rb.BundleID]
+               em.addPending(-len(completed.es))

Review Comment:
   This is where a panic can happens.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@beam.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to