jrmccluskey commented on code in PR #17386:
URL: https://github.com/apache/beam/pull/17386#discussion_r853152635


##########
sdks/go/pkg/beam/core/runtime/exec/datasource.go:
##########
@@ -320,6 +321,72 @@ func (n *DataSource) Progress() ProgressReportSnapshot {
        return ProgressReportSnapshot{ID: n.SID.PtransformID, Name: n.Name, 
Count: c, pcol: pcol}
 }
 
+func (n *DataSource) getProcessContinuation() sdf.ProcessContinuation {
+       if u, ok := n.Out.(*ProcessSizedElementsAndRestrictions); ok {
+               return u.continuation
+       }
+       return nil
+}
+
+// Checkpoint attempts to split an SDF that has self-checkpointed (e.g. 
returned a
+// ProcessContinuation) and needs to be resumed later. If the underlying DoFn 
is not
+// splittable or has not returned a resuming continuation, the function 
returns an empty
+// SplitResult, a negative resumption time, and a false boolean to indicate 
that no split
+// occurred.
+func (n *DataSource) Checkpoint() (SplitResult, time.Duration, bool, error) {
+       n.mu.Lock()
+       defer n.mu.Unlock()
+
+       pc := n.getProcessContinuation()
+       if pc == nil {
+               return SplitResult{}, -1 * time.Minute, false, nil
+       }
+       if !pc.ShouldResume() {
+               return SplitResult{}, -1 * time.Minute, false, nil
+       }
+
+       su := SplittableUnit(n.Out.(*ProcessSizedElementsAndRestrictions))
+
+       // Get the output watermark before splitting to avoid accidentally 
overestimating
+       ow := su.GetOutputWatermark()
+
+       // Always split at fraction 0.0, should have no primaries left.
+       ps, rs, err := su.Split(0.0)
+       if err != nil {
+               return SplitResult{}, -1 * time.Minute, false, err
+       }
+       if ps != nil {
+               return SplitResult{}, -1 * time.Minute, false, 
fmt.Errorf("failed to checkpoint: got %v primary roots, want nil", ps)
+       }
+
+       wc := MakeWindowEncoder(n.Coder.Window)
+       ec := MakeElementEncoder(coder.SkipW(n.Coder))
+       encodeElms := func(fvs []*FullValue) ([][]byte, error) {
+               encElms := make([][]byte, len(fvs))
+               for i, fv := range fvs {
+                       enc, err := encodeElm(fv, wc, ec)
+                       if err != nil {
+                               return nil, err
+                       }
+                       encElms[i] = enc
+               }
+               return encElms, nil
+       }
+
+       rsEnc, err := encodeElms(rs)
+       if err != nil {
+               return SplitResult{}, -1 * time.Minute, false, err
+       }

Review Comment:
   I started with pulling out the code that made the encodeElms function and 
turning that into a helper. We could go a step further and move the full 
encoding step too, but I like the somewhat explicit handling here since we'd 
only really be saving three lines or so. 



##########
sdks/go/pkg/beam/core/runtime/exec/datasource.go:
##########
@@ -320,6 +321,72 @@ func (n *DataSource) Progress() ProgressReportSnapshot {
        return ProgressReportSnapshot{ID: n.SID.PtransformID, Name: n.Name, 
Count: c, pcol: pcol}
 }
 
+func (n *DataSource) getProcessContinuation() sdf.ProcessContinuation {
+       if u, ok := n.Out.(*ProcessSizedElementsAndRestrictions); ok {
+               return u.continuation
+       }
+       return nil
+}
+
+// Checkpoint attempts to split an SDF that has self-checkpointed (e.g. 
returned a
+// ProcessContinuation) and needs to be resumed later. If the underlying DoFn 
is not
+// splittable or has not returned a resuming continuation, the function 
returns an empty
+// SplitResult, a negative resumption time, and a false boolean to indicate 
that no split
+// occurred.
+func (n *DataSource) Checkpoint() (SplitResult, time.Duration, bool, error) {
+       n.mu.Lock()
+       defer n.mu.Unlock()
+
+       pc := n.getProcessContinuation()
+       if pc == nil {
+               return SplitResult{}, -1 * time.Minute, false, nil
+       }
+       if !pc.ShouldResume() {

Review Comment:
   Good catch! Fixed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to