lostluck commented on a change in pull request #17173:
URL: https://github.com/apache/beam/pull/17173#discussion_r834684360
##########
File path: sdks/go/pkg/beam/io/datastoreio/datastore_test.go
##########
@@ -16,11 +16,164 @@
package datastoreio
import (
+ "context"
+ "errors"
+ "reflect"
+ "strings"
"testing"
"cloud.google.com/go/datastore"
+ "github.com/apache/beam/sdks/v2/go/pkg/beam"
+ "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime"
+ "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
+ "google.golang.org/api/option"
)
+// fake client type implements datastoreio.clientType
+type fakeClient struct {
+ runFn func()
+ closeFn func()
+}
+
+func (client *fakeClient) Run(context.Context, *datastore.Query)
*datastore.Iterator {
+ client.runFn()
+ // return an empty iterator
+ return new(datastore.Iterator)
+}
+
+func (client *fakeClient) Close() error {
+ client.closeFn()
+ return nil
+}
+
+// mock type for query
+type Foo struct {
+}
+
+type Bar struct {
+}
+
+func TestRead(t *testing.T) {
+ runCounter := 0
+ closeCounter := 0
+
+ // setup a fake newClient caller
+ originalClient := newClient
+ newClient = func(ctx context.Context,
+ projectID string,
+ opts ...option.ClientOption) (clientType, error) {
+ client := fakeClient{
+ runFn: func() {
+ runCounter += 1
+ },
+ closeFn: func() {
+ closeCounter += 1
+ },
+ }
+ return &client, nil
+ }
+
+ testCases := []struct {
+ v interface{}
+ shard int
+ expectRun int
+ expectClose int
+ }{
+ // case 1: shard=1, without split query
+ {Foo{}, 1, 1, 1},
+ // case 2: shard=2 (>1), with split query
+ {Bar{}, 2, 2, 2},
+ }
+ for _, tc := range testCases {
+ itemType := reflect.TypeOf(tc.v)
+ itemKey := runtime.RegisterType(itemType)
+
+ p, s := beam.NewPipelineWithRoot()
+ Read(s, "project", "Item", tc.shard, itemType, itemKey)
+
+ ptest.RunAndValidate(t, p)
+
+ if runCounter != tc.expectRun {
+ t.Errorf("got number of datastore.Client.Run call: %v,
wanted %v",
+ runCounter, tc.expectRun)
+ }
+ if runCounter != tc.expectRun {
+ t.Errorf("got number of datastore.Client.Run call: %v,
wanted %v",
+ closeCounter, tc.expectClose)
+ }
+
+ // reset counter
+ runCounter = 0
+ closeCounter = 0
+ }
+
+ // tear down: recover original newClient caller
+ newClient = originalClient
Review comment:
Use `t.Cleanup(func() { newClient = originalClient })` at the top of the
test to ensure that this executes at test completion, and to colocate the reset.
Same in the other tests.
##########
File path: sdks/go/pkg/beam/io/datastoreio/datastore_test.go
##########
@@ -16,11 +16,164 @@
package datastoreio
import (
+ "context"
+ "errors"
+ "reflect"
+ "strings"
"testing"
"cloud.google.com/go/datastore"
+ "github.com/apache/beam/sdks/v2/go/pkg/beam"
+ "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime"
+ "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
+ "google.golang.org/api/option"
)
+// fake client type implements datastoreio.clientType
+type fakeClient struct {
+ runFn func()
+ closeFn func()
+}
+
+func (client *fakeClient) Run(context.Context, *datastore.Query)
*datastore.Iterator {
+ client.runFn()
+ // return an empty iterator
+ return new(datastore.Iterator)
+}
+
+func (client *fakeClient) Close() error {
+ client.closeFn()
+ return nil
+}
+
+// mock type for query
+type Foo struct {
+}
+
+type Bar struct {
+}
+
+func TestRead(t *testing.T) {
+ runCounter := 0
+ closeCounter := 0
+
+ // setup a fake newClient caller
+ originalClient := newClient
+ newClient = func(ctx context.Context,
+ projectID string,
+ opts ...option.ClientOption) (clientType, error) {
+ client := fakeClient{
+ runFn: func() {
+ runCounter += 1
+ },
+ closeFn: func() {
+ closeCounter += 1
+ },
+ }
+ return &client, nil
+ }
+
+ testCases := []struct {
+ v interface{}
+ shard int
+ expectRun int
+ expectClose int
+ }{
+ // case 1: shard=1, without split query
+ {Foo{}, 1, 1, 1},
+ // case 2: shard=2 (>1), with split query
+ {Bar{}, 2, 2, 2},
+ }
+ for _, tc := range testCases {
+ itemType := reflect.TypeOf(tc.v)
+ itemKey := runtime.RegisterType(itemType)
Review comment:
Prefer to call 'beam.RegisterType` here instead of runtime directly.
Here and below.
##########
File path: sdks/go/pkg/beam/io/datastoreio/datastore_test.go
##########
@@ -16,11 +16,164 @@
package datastoreio
import (
+ "context"
+ "errors"
+ "reflect"
+ "strings"
"testing"
"cloud.google.com/go/datastore"
+ "github.com/apache/beam/sdks/v2/go/pkg/beam"
+ "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime"
+ "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest"
+ "google.golang.org/api/option"
)
+// fake client type implements datastoreio.clientType
+type fakeClient struct {
+ runFn func()
+ closeFn func()
+}
+
+func (client *fakeClient) Run(context.Context, *datastore.Query)
*datastore.Iterator {
+ client.runFn()
+ // return an empty iterator
+ return new(datastore.Iterator)
+}
+
+func (client *fakeClient) Close() error {
+ client.closeFn()
+ return nil
+}
+
+// mock type for query
+type Foo struct {
+}
+
+type Bar struct {
+}
+
+func TestRead(t *testing.T) {
+ runCounter := 0
+ closeCounter := 0
+
+ // setup a fake newClient caller
+ originalClient := newClient
+ newClient = func(ctx context.Context,
+ projectID string,
+ opts ...option.ClientOption) (clientType, error) {
+ client := fakeClient{
+ runFn: func() {
+ runCounter += 1
+ },
+ closeFn: func() {
+ closeCounter += 1
+ },
+ }
+ return &client, nil
+ }
Review comment:
Consider consolidating this and the other code into a struct with a
method for handling the newClient. Closures are convenient in a pinch, but can
be harder to follow, and much of this is duplicated in the other tests.
Ideally, something new is created per test run, so that it can't be forgotten
to be reset, and that each run is independent, since a new instance is created
each time.
In combination with the change to query above, lets us pass in a specific
fake instance in per test run, and use that for validating if the calls happen.
No need to reset anything at the end of the loop or test run.
##########
File path: sdks/go/pkg/beam/io/datastoreio/datastore.go
##########
@@ -32,8 +32,20 @@ import (
"github.com/apache/beam/sdks/v2/go/pkg/beam/internal/errors"
"github.com/apache/beam/sdks/v2/go/pkg/beam/log"
"google.golang.org/api/iterator"
+ "google.golang.org/api/option"
)
+type clientType interface {
+ Run(context.Context, *datastore.Query) *datastore.Iterator
+ Close() error
+}
+
+// call to newClient returns an instance of datastore.Client
+// redirect this call for unit test usage
+var newClient = func(ctx context.Context, projectID string, opts
...option.ClientOption) (clientType, error) {
Review comment:
The problem with this approach is that it affects a global/package level
value, preventing parallel test execution if that's something we'd like to do
in the future.
I would recommend the following approach instead:
* modifying the unexported `query` function to take in a function parameter
of the signature of this newClient.
* In the production calls (`Read`) pass a nil in that position.
* In the DoFns that require the newClient call add an unexported field of
the newClient function type
* Populate that field with the parameter.
* To the affected DoFns, add a Setup() function, that looks at that
newClient field, and if it's nil, set it to the datastore calling version of
the function (preferably defined somewhere as a named function, not as a
variable). Otherwise, don't touch it.
* in the ProcessElement methods, call the function field.
This will let us override the newClient function in tests, at least when
using the direct runner. It will also avoid the package level override, and
won't interrupt parallel execution of the tests at a later time. No package
level override also avoids needing to set and reset the field.
It won't fix things for portablee runners, as that would require a bit more
effort. These tests are currently only run on the direct runner, so that's
fine. This is for unit testing.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]