damccorm commented on code in PR #17716:
URL: https://github.com/apache/beam/pull/17716#discussion_r877526830


##########
sdks/go/pkg/beam/core/runtime/exec/datasource.go:
##########
@@ -348,16 +350,25 @@ func (n *DataSource) makeEncodeElms() func([]*FullValue) 
([][]byte, error) {
        return encodeElms
 }
 
-func getRTrackerFromRoot(root *FullValue) (sdf.BoundableRTracker, float64, 
bool) {
-       tracker, ok := 
root.Elm.(*FullValue).Elm2.(*FullValue).Elm.(sdf.BoundableRTracker)
+func getBoundedRTrackerFromRoot(root *FullValue) (sdf.BoundableRTracker, 
float64, bool) {
+       tElm := root.Elm.(*FullValue).Elm2.(*FullValue).Elm
+       tracker, ok := tElm.(sdf.RTracker)
        if !ok {
+               log.Warnf(context.Background(), "expected type sdf.RTracker, 
got type %T", tElm)
                return nil, -1.0, false
        }
+       boundTracker, ok := tracker.(sdf.BoundableRTracker)
+       if !ok {
+               log.Warn(context.Background(), "expected type 
sdf.BoundableRTracker; ensure that the RTracker implements IsBounded()")
+               // Assume an RTracker that does not implement IsBounded() will 
always be bounded, wrap so it can be used.
+               boundTracker = wrappedbounded.NewTracker(tracker)
+       }
        size, ok := root.Elm2.(float64)
        if !ok {
+               log.Warnf(context.Background(), "expected size to be type 
float64, got type %T", root.Elm2)

Review Comment:
   ```suggestion
                log.Warnf(context.Background(), "expected restriction size to 
be type float64, got type %T", root.Elm2)
   ```



##########
sdks/go/pkg/beam/core/runtime/exec/datasource.go:
##########
@@ -385,11 +396,16 @@ func (n *DataSource) Checkpoint() (SplitResult, 
time.Duration, bool, error) {
        if err != nil {
                return SplitResult{}, -1 * time.Minute, false, err
        }
+       if len(rs) == 0 {
+               return SplitResult{}, -1 * time.Minute, false, nil

Review Comment:
   I'm trying to wrap my head around when we would ever expect this case - I 
have 2 related questions:
   
   1) If the user has checkpointed but then returns an empty residual, they 
shouldn't have checkpointed, right? I'd expect us to at least warn in that case 
probably.
   2) Even if there are no residuals, don't we still want to validate that they 
haven't set any primaries? That's still an error waiting to happen



##########
sdks/go/pkg/beam/io/rtrackers/wrappedbounded/wrappedbounded.go:
##########
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package offsetrange defines a restriction and restriction tracker for offset
+// ranges. An offset range is just a range, with a start and end, that can
+// begin at an offset, and is commonly used to represent byte ranges for files
+// or indices for iterable containers.
+
+package wrappedbounded
+
+import "github.com/apache/beam/sdks/v2/go/pkg/beam/core/sdf"
+
+// Tracker wraps an implementation of an RTracker and adds an IsBounded() 
function
+// that returns true in order to allow RTrackers to be handled as bounded 
BoundableRTrackers
+// if necessary (like in self-checkpointing evaluation.)
+type Tracker struct {
+       baseTracker sdf.RTracker
+}
+
+// TryClaim attempts to claim a block of work from the underlying RTracker's 
restriction.
+func (t *Tracker) TryClaim(pos interface{}) (ok bool) {
+       return t.baseTracker.TryClaim(pos)
+}
+
+// GetError returns an error from the underlying RTracker if it has stopped 
executing. Returns nil
+// if none has occurred.
+func (t *Tracker) GetError() error {
+       return t.baseTracker.GetError()
+}
+
+// TrySplit splits the underlying RTracker's restriction into a primary (work 
that is currently executing)
+// and a residual (work that will be split off and resumed later.)
+func (t *Tracker) TrySplit(fraction float64) (primary, residual interface{}, 
err error) {
+       return t.baseTracker.TrySplit(fraction)
+}
+
+// GetProgress returns two abstract scalars representing the amount of work 
done and the remaining work
+// left in the underlying RTracker. These are unitless values, only used to 
estimate work in relation to
+// each other.
+func (t *Tracker) GetProgress() (done float64, remaining float64) {
+       return t.baseTracker.GetProgress()
+}
+
+// IsDone() returns a boolean indicating if the work represented by the 
underlying RTracker has

Review Comment:
   ```suggestion
   // IsDone returns a boolean indicating if the work represented by the 
underlying RTracker has
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to