damccorm commented on code in PR #17386:
URL: https://github.com/apache/beam/pull/17386#discussion_r853175887


##########
sdks/go/pkg/beam/core/runtime/exec/datasource.go:
##########
@@ -320,6 +321,72 @@ func (n *DataSource) Progress() ProgressReportSnapshot {
        return ProgressReportSnapshot{ID: n.SID.PtransformID, Name: n.Name, 
Count: c, pcol: pcol}
 }
 
+func (n *DataSource) getProcessContinuation() sdf.ProcessContinuation {
+       if u, ok := n.Out.(*ProcessSizedElementsAndRestrictions); ok {
+               return u.continuation
+       }
+       return nil
+}
+
+// Checkpoint attempts to split an SDF that has self-checkpointed (e.g. 
returned a
+// ProcessContinuation) and needs to be resumed later. If the underlying DoFn 
is not
+// splittable or has not returned a resuming continuation, the function 
returns an empty
+// SplitResult, a negative resumption time, and a false boolean to indicate 
that no split
+// occurred.
+func (n *DataSource) Checkpoint() (SplitResult, time.Duration, bool, error) {
+       n.mu.Lock()
+       defer n.mu.Unlock()
+
+       pc := n.getProcessContinuation()
+       if pc == nil {
+               return SplitResult{}, -1 * time.Minute, false, nil
+       }
+       if !pc.ShouldResume() {
+               return SplitResult{}, -1 * time.Minute, false, nil
+       }
+
+       su := SplittableUnit(n.Out.(*ProcessSizedElementsAndRestrictions))
+
+       // Get the output watermark before splitting to avoid accidentally 
overestimating
+       ow := su.GetOutputWatermark()
+
+       // Always split at fraction 0.0, should have no primaries left.
+       ps, rs, err := su.Split(0.0)
+       if err != nil {
+               return SplitResult{}, -1 * time.Minute, false, err
+       }
+       if ps != nil {
+               return SplitResult{}, -1 * time.Minute, false, 
fmt.Errorf("failed to checkpoint: got %v primary roots, want nil", ps)
+       }
+
+       wc := MakeWindowEncoder(n.Coder.Window)
+       ec := MakeElementEncoder(coder.SkipW(n.Coder))
+       encodeElms := func(fvs []*FullValue) ([][]byte, error) {
+               encElms := make([][]byte, len(fvs))
+               for i, fv := range fvs {
+                       enc, err := encodeElm(fv, wc, ec)
+                       if err != nil {
+                               return nil, err
+                       }
+                       encElms[i] = enc
+               }
+               return encElms, nil
+       }
+
+       rsEnc, err := encodeElms(rs)
+       if err != nil {
+               return SplitResult{}, -1 * time.Minute, false, err
+       }

Review Comment:
   That works for me - I actually was originally considering that and thought 
it wouldn't be as clean, looking at it in practice I like it more though.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to