Github user tdas commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16947#discussion_r101869570
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
 ---
    @@ -250,53 +249,56 @@ class StreamExecution(
           updateStatusMessage("Initializing sources")
           // force initialization of the logical plan so that the sources can 
be created
           logicalPlan
    -      state = ACTIVE
    -      // Unblock `awaitInitialization`
    -      initializationLatch.countDown()
    -
    -      triggerExecutor.execute(() => {
    -        startTrigger()
    -
    -        val isTerminated =
    -          if (isActive) {
    -            reportTimeTaken("triggerExecution") {
    -              if (currentBatchId < 0) {
    -                // We'll do this initialization only once
    -                populateStartOffsets()
    -                logDebug(s"Stream running from $committedOffsets to 
$availableOffsets")
    -              } else {
    -                constructNextBatch()
    +      if (state.compareAndSet(INITIALIZING, ACTIVE)) {
    +        // Unblock `awaitInitialization`
    +        initializationLatch.countDown()
    +
    +        triggerExecutor.execute(() => {
    +          startTrigger()
    +
    +          val continueToRun =
    +            if (isActive) {
    +              reportTimeTaken("triggerExecution") {
    +                if (currentBatchId < 0) {
    +                  // We'll do this initialization only once
    +                  populateStartOffsets()
    +                  logDebug(s"Stream running from $committedOffsets to 
$availableOffsets")
    +                } else {
    +                  constructNextBatch()
    +                }
    +                if (dataAvailable) {
    +                  currentStatus = currentStatus.copy(isDataAvailable = 
true)
    +                  updateStatusMessage("Processing new data")
    +                  runBatch()
    +                }
                   }
    +
    +              // Report trigger as finished and construct progress object.
    +              finishTrigger(dataAvailable)
                   if (dataAvailable) {
    -                currentStatus = currentStatus.copy(isDataAvailable = true)
    -                updateStatusMessage("Processing new data")
    -                runBatch()
    +                // We'll increase currentBatchId after we complete 
processing current batch's data
    +                currentBatchId += 1
    +              } else {
    +                currentStatus = currentStatus.copy(isDataAvailable = false)
    +                updateStatusMessage("Waiting for data to arrive")
    +                Thread.sleep(pollingDelayMs)
                   }
    -            }
    -
    -            // Report trigger as finished and construct progress object.
    -            finishTrigger(dataAvailable)
    -            if (dataAvailable) {
    -              // We'll increase currentBatchId after we complete 
processing current batch's data
    -              currentBatchId += 1
    +              true
                 } else {
    -              currentStatus = currentStatus.copy(isDataAvailable = false)
    -              updateStatusMessage("Waiting for data to arrive")
    -              Thread.sleep(pollingDelayMs)
    +              false
                 }
    -            true
    -          } else {
    -            false
    -          }
     
    -        // Update committed offsets.
    -        committedOffsets ++= availableOffsets
    -        updateStatusMessage("Waiting for next trigger")
    -        isTerminated
    -      })
    -      updateStatusMessage("Stopped")
    +          // Update committed offsets.
    +          committedOffsets ++= availableOffsets
    +          updateStatusMessage("Waiting for next trigger")
    +          continueToRun
    +        })
    +        updateStatusMessage("Stopped")
    +      } else {
    +        // `stop()` is already called. Let `finally` finish the rest work.
    --- End diff --
    
    finish the cleanup


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to