damccorm commented on code in PR #17386:
URL: https://github.com/apache/beam/pull/17386#discussion_r853175887
##########
sdks/go/pkg/beam/core/runtime/exec/datasource.go:
##########
@@ -320,6 +321,72 @@ func (n *DataSource) Progress() ProgressReportSnapshot {
return ProgressReportSnapshot{ID: n.SID.PtransformID, Name: n.Name,
Count: c, pcol: pcol}
}
+func (n *DataSource) getProcessContinuation() sdf.ProcessContinuation {
+ if u, ok := n.Out.(*ProcessSizedElementsAndRestrictions); ok {
+ return u.continuation
+ }
+ return nil
+}
+
+// Checkpoint attempts to split an SDF that has self-checkpointed (e.g.
returned a
+// ProcessContinuation) and needs to be resumed later. If the underlying DoFn
is not
+// splittable or has not returned a resuming continuation, the function
returns an empty
+// SplitResult, a negative resumption time, and a false boolean to indicate
that no split
+// occurred.
+func (n *DataSource) Checkpoint() (SplitResult, time.Duration, bool, error) {
+ n.mu.Lock()
+ defer n.mu.Unlock()
+
+ pc := n.getProcessContinuation()
+ if pc == nil {
+ return SplitResult{}, -1 * time.Minute, false, nil
+ }
+ if !pc.ShouldResume() {
+ return SplitResult{}, -1 * time.Minute, false, nil
+ }
+
+ su := SplittableUnit(n.Out.(*ProcessSizedElementsAndRestrictions))
+
+ // Get the output watermark before splitting to avoid accidentally
overestimating
+ ow := su.GetOutputWatermark()
+
+ // Always split at fraction 0.0, should have no primaries left.
+ ps, rs, err := su.Split(0.0)
+ if err != nil {
+ return SplitResult{}, -1 * time.Minute, false, err
+ }
+ if ps != nil {
+ return SplitResult{}, -1 * time.Minute, false,
fmt.Errorf("failed to checkpoint: got %v primary roots, want nil", ps)
+ }
+
+ wc := MakeWindowEncoder(n.Coder.Window)
+ ec := MakeElementEncoder(coder.SkipW(n.Coder))
+ encodeElms := func(fvs []*FullValue) ([][]byte, error) {
+ encElms := make([][]byte, len(fvs))
+ for i, fv := range fvs {
+ enc, err := encodeElm(fv, wc, ec)
+ if err != nil {
+ return nil, err
+ }
+ encElms[i] = enc
+ }
+ return encElms, nil
+ }
+
+ rsEnc, err := encodeElms(rs)
+ if err != nil {
+ return SplitResult{}, -1 * time.Minute, false, err
+ }
Review Comment:
That works for me - I actually was originally considering that and thought
it wouldn't be as clean, looking at it in practice I like it more though.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]