lostluck commented on a change in pull request #17173:
URL: https://github.com/apache/beam/pull/17173#discussion_r834684360



##########
File path: sdks/go/pkg/beam/io/datastoreio/datastore_test.go
##########
@@ -16,11 +16,164 @@
 package datastoreio
 
 import (
+       "context"
+       "errors"
+       "reflect"
+       "strings"
        "testing"
 
        "cloud.google.com/go/datastore"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
+       "google.golang.org/api/option"
 )
 
+// fake client type implements datastoreio.clientType
+type fakeClient struct {
+       runFn   func()
+       closeFn func()
+}
+
+func (client *fakeClient) Run(context.Context, *datastore.Query) 
*datastore.Iterator {
+       client.runFn()
+       // return an empty iterator
+       return new(datastore.Iterator)
+}
+
+func (client *fakeClient) Close() error {
+       client.closeFn()
+       return nil
+}
+
+// mock type for query
+type Foo struct {
+}
+
+type Bar struct {
+}
+
+func TestRead(t *testing.T) {
+       runCounter := 0
+       closeCounter := 0
+
+       // setup a fake newClient caller
+       originalClient := newClient
+       newClient = func(ctx context.Context,
+               projectID string,
+               opts ...option.ClientOption) (clientType, error) {
+               client := fakeClient{
+                       runFn: func() {
+                               runCounter += 1
+                       },
+                       closeFn: func() {
+                               closeCounter += 1
+                       },
+               }
+               return &client, nil
+       }
+
+       testCases := []struct {
+               v           interface{}
+               shard       int
+               expectRun   int
+               expectClose int
+       }{
+               // case 1: shard=1, without split query
+               {Foo{}, 1, 1, 1},
+               // case 2: shard=2 (>1), with split query
+               {Bar{}, 2, 2, 2},
+       }
+       for _, tc := range testCases {
+               itemType := reflect.TypeOf(tc.v)
+               itemKey := runtime.RegisterType(itemType)
+
+               p, s := beam.NewPipelineWithRoot()
+               Read(s, "project", "Item", tc.shard, itemType, itemKey)
+
+               ptest.RunAndValidate(t, p)
+
+               if runCounter != tc.expectRun {
+                       t.Errorf("got number of datastore.Client.Run call: %v, 
wanted %v",
+                               runCounter, tc.expectRun)
+               }
+               if runCounter != tc.expectRun {
+                       t.Errorf("got number of datastore.Client.Run call: %v, 
wanted %v",
+                               closeCounter, tc.expectClose)
+               }
+
+               // reset counter
+               runCounter = 0
+               closeCounter = 0
+       }
+
+       // tear down: recover original newClient caller
+       newClient = originalClient

Review comment:
       Use `t.Cleanup(func() { newClient = originalClient })` at the top of the 
test to ensure that this executes at test completion, and to colocate the reset.
   
   Same in the other tests.

##########
File path: sdks/go/pkg/beam/io/datastoreio/datastore_test.go
##########
@@ -16,11 +16,164 @@
 package datastoreio
 
 import (
+       "context"
+       "errors"
+       "reflect"
+       "strings"
        "testing"
 
        "cloud.google.com/go/datastore"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
+       "google.golang.org/api/option"
 )
 
+// fake client type implements datastoreio.clientType
+type fakeClient struct {
+       runFn   func()
+       closeFn func()
+}
+
+func (client *fakeClient) Run(context.Context, *datastore.Query) 
*datastore.Iterator {
+       client.runFn()
+       // return an empty iterator
+       return new(datastore.Iterator)
+}
+
+func (client *fakeClient) Close() error {
+       client.closeFn()
+       return nil
+}
+
+// mock type for query
+type Foo struct {
+}
+
+type Bar struct {
+}
+
+func TestRead(t *testing.T) {
+       runCounter := 0
+       closeCounter := 0
+
+       // setup a fake newClient caller
+       originalClient := newClient
+       newClient = func(ctx context.Context,
+               projectID string,
+               opts ...option.ClientOption) (clientType, error) {
+               client := fakeClient{
+                       runFn: func() {
+                               runCounter += 1
+                       },
+                       closeFn: func() {
+                               closeCounter += 1
+                       },
+               }
+               return &client, nil
+       }
+
+       testCases := []struct {
+               v           interface{}
+               shard       int
+               expectRun   int
+               expectClose int
+       }{
+               // case 1: shard=1, without split query
+               {Foo{}, 1, 1, 1},
+               // case 2: shard=2 (>1), with split query
+               {Bar{}, 2, 2, 2},
+       }
+       for _, tc := range testCases {
+               itemType := reflect.TypeOf(tc.v)
+               itemKey := runtime.RegisterType(itemType)

Review comment:
       Prefer to call 'beam.RegisterType` here instead of runtime directly. 
Here and below.

##########
File path: sdks/go/pkg/beam/io/datastoreio/datastore_test.go
##########
@@ -16,11 +16,164 @@
 package datastoreio
 
 import (
+       "context"
+       "errors"
+       "reflect"
+       "strings"
        "testing"
 
        "cloud.google.com/go/datastore"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime"
+       "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
+       "google.golang.org/api/option"
 )
 
+// fake client type implements datastoreio.clientType
+type fakeClient struct {
+       runFn   func()
+       closeFn func()
+}
+
+func (client *fakeClient) Run(context.Context, *datastore.Query) 
*datastore.Iterator {
+       client.runFn()
+       // return an empty iterator
+       return new(datastore.Iterator)
+}
+
+func (client *fakeClient) Close() error {
+       client.closeFn()
+       return nil
+}
+
+// mock type for query
+type Foo struct {
+}
+
+type Bar struct {
+}
+
+func TestRead(t *testing.T) {
+       runCounter := 0
+       closeCounter := 0
+
+       // setup a fake newClient caller
+       originalClient := newClient
+       newClient = func(ctx context.Context,
+               projectID string,
+               opts ...option.ClientOption) (clientType, error) {
+               client := fakeClient{
+                       runFn: func() {
+                               runCounter += 1
+                       },
+                       closeFn: func() {
+                               closeCounter += 1
+                       },
+               }
+               return &client, nil
+       }

Review comment:
       Consider consolidating this and the other code into a struct with a 
method for handling the newClient. Closures are convenient in a pinch, but can 
be harder to follow, and much of this is duplicated in the other tests. 
Ideally, something new is created per test run, so that it can't be forgotten 
to be reset, and that each run is independent, since a new instance is created 
each time.
   
   In combination with the change to query above, lets us pass in a specific 
fake instance in per test run, and use that for validating if the calls happen. 
No need to reset anything at the end of the loop or test run.

##########
File path: sdks/go/pkg/beam/io/datastoreio/datastore.go
##########
@@ -32,8 +32,20 @@ import (
        "github.com/apache/beam/sdks/v2/go/pkg/beam/internal/errors"
        "github.com/apache/beam/sdks/v2/go/pkg/beam/log"
        "google.golang.org/api/iterator"
+       "google.golang.org/api/option"
 )
 
+type clientType interface {
+       Run(context.Context, *datastore.Query) *datastore.Iterator
+       Close() error
+}
+
+// call to newClient returns an instance of datastore.Client
+// redirect this call for unit test usage
+var newClient = func(ctx context.Context, projectID string, opts 
...option.ClientOption) (clientType, error) {

Review comment:
       The problem with this approach is that it affects a global/package level 
value, preventing parallel test execution if that's something we'd like to do 
in the future. 
   
   I would recommend the following approach instead:
   * modifying the unexported `query` function to take in a function parameter 
of the signature of this newClient.
   * In the production calls (`Read`) pass a nil in that position.
   * In the DoFns that require the newClient call add an unexported field of 
the newClient function type
   * Populate that field with the parameter.
   * To the affected DoFns, add a Setup() function, that looks at that 
newClient field, and if it's nil, set it to the datastore calling version of 
the function (preferably defined somewhere as a named function, not as a 
variable). Otherwise, don't touch it.
   * in the ProcessElement methods, call the function field.
   
   This will let us override the newClient function in tests, at least when 
using the direct runner. It will also avoid the package level override, and 
won't interrupt parallel execution of the tests at a later time. No package 
level override also avoids needing to set and reset the field.
   
   It won't fix things for portablee runners, as that would require a bit more 
effort. These tests are currently only run on the direct runner, so that's 
fine. This is for unit testing.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to