lostluck commented on a change in pull request #16818: URL: https://github.com/apache/beam/pull/16818#discussion_r806360303
########## File path: sdks/go/test/integration/io/xlang/bigquery/bigquery_test.go ########## @@ -0,0 +1,134 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bigquery + +import ( + "math/rand" + "reflect" + "strings" + "testing" + "time" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/bigqueryio" + _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" + "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" + "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" + "github.com/apache/beam/sdks/v2/go/test/integration" +) + +func init() { + beam.RegisterFunction(createTestRows) + beam.RegisterType(reflect.TypeOf((*TestRow)(nil))) + beam.RegisterType(reflect.TypeOf((*RandData)(nil))) +} + +func checkFlags(t *testing.T) { + if *integration.GCPIoExpansionAddr == "" { + t.Skip("No GCP IO expansion address provided.") + } + if *integration.BigQueryDataset == "" { + t.Skip("No BigQuery dataset provided.") + } +} + +const ( + // A text to shuffle to get random words. + text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas eget nulla nec velit hendrerit placerat. Donec eu odio ultricies, fermentum arcu at, mollis lectus. Vestibulum porttitor pharetra sem vitae feugiat. Mauris facilisis neque in mauris feugiat rhoncus. Donec eu ipsum at nibh lobortis euismod. Nam at hendrerit felis. Vivamus et orci ex. Nam dui nisl, rutrum ac pretium eget, vehicula in tortor. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Phasellus ante lorem, pharetra blandit dapibus et, tempus nec purus. Maecenas in posuere sem, vel pharetra nisl. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Donec nec facilisis ex. Praesent euismod commodo efficitur. Fusce in nisi nunc." + // Number of random elements to create for test. Must be less than number of words in text. + inputSize = 50 +) + +func shuffleText() []string { + words := strings.Fields(text) + rand.Shuffle(len(words), func(i, j int) { words[i], words[j] = words[j], words[i] }) + return words +} + +// TestRow is a sample row to write and read from that is expected to contain enough deterministic +// and random data in different data types to provide a reasonable signal that reading and writing +// works at a basic level. +type TestRow struct { + Counter int64 `beam:"counter"` // A deterministic counter, increments for each row generated. + Rand_data RandData `beam:"rand_data"` // An inner struct containing randomized data. +} + +// RandData is a struct of various types of random data. +type RandData struct { + Flip bool `beam:"flip"` // Flip is a bool with a random chance of either result (a coin flip). + Num int64 `beam:"num"` // Num is a random int64. + Word string `beam:"word"` // Word is a randomly selected word from a sample text. +} + +// ddlSchema is a string for BigQuery data definition language that corresponds to TestRow. +const ddlTestRowSchema = "counter INT64 NOT NULL, " + + "rand_data STRUCT<" + + "flip BOOL NOT NULL," + + "num INT64 NOT NULL," + + "word STRING NOT NULL" + + "> NOT NULL" + +// createTestRows creates a number of TestRows, populating the randomized data. +func createTestRows(_ []byte, emit func(TestRow)) { + rand.Seed(time.Now().UnixNano()) + words := shuffleText() + for i := 0; i < inputSize; i++ { + emit(TestRow{ + Counter: int64(i), + Rand_data: RandData{ + Flip: rand.Int63n(2) != 0, + Num: rand.Int63(), + Word: words[i], + }, + }) + } +} + +// TestBigQueryIO_BasicWriteRead runs a pipeline that generates semi-randomized elements, writes +// them to a BigQuery table and then reads from that table, and checks that the result matches the +// original inputs. This requires a pre-existing table to be created. +func TestBigQueryIO_BasicWriteRead(t *testing.T) { + integration.CheckFilters(t) + checkFlags(t) + + // Create a table before running the pipeline + table, err := newTempTable(*integration.BigQueryDataset, "go_bqio_it", ddlTestRowSchema) Review comment: consider printing the table name using `t.Log`, to make it clearer which resources map to this test. ########## File path: sdks/go/test/integration/io/xlang/bigquery/bigquery_test.go ########## @@ -0,0 +1,134 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bigquery + +import ( + "math/rand" + "reflect" + "strings" + "testing" + "time" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/bigqueryio" + _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" + "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" + "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" + "github.com/apache/beam/sdks/v2/go/test/integration" +) + +func init() { + beam.RegisterFunction(createTestRows) + beam.RegisterType(reflect.TypeOf((*TestRow)(nil))) + beam.RegisterType(reflect.TypeOf((*RandData)(nil))) +} + +func checkFlags(t *testing.T) { + if *integration.GCPIoExpansionAddr == "" { + t.Skip("No GCP IO expansion address provided.") + } + if *integration.BigQueryDataset == "" { + t.Skip("No BigQuery dataset provided.") + } +} + +const ( + // A text to shuffle to get random words. + text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas eget nulla nec velit hendrerit placerat. Donec eu odio ultricies, fermentum arcu at, mollis lectus. Vestibulum porttitor pharetra sem vitae feugiat. Mauris facilisis neque in mauris feugiat rhoncus. Donec eu ipsum at nibh lobortis euismod. Nam at hendrerit felis. Vivamus et orci ex. Nam dui nisl, rutrum ac pretium eget, vehicula in tortor. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Phasellus ante lorem, pharetra blandit dapibus et, tempus nec purus. Maecenas in posuere sem, vel pharetra nisl. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Donec nec facilisis ex. Praesent euismod commodo efficitur. Fusce in nisi nunc." + // Number of random elements to create for test. Must be less than number of words in text. + inputSize = 50 +) + +func shuffleText() []string { + words := strings.Fields(text) + rand.Shuffle(len(words), func(i, j int) { words[i], words[j] = words[j], words[i] }) + return words +} + +// TestRow is a sample row to write and read from that is expected to contain enough deterministic +// and random data in different data types to provide a reasonable signal that reading and writing +// works at a basic level. +type TestRow struct { + Counter int64 `beam:"counter"` // A deterministic counter, increments for each row generated. + Rand_data RandData `beam:"rand_data"` // An inner struct containing randomized data. +} + +// RandData is a struct of various types of random data. +type RandData struct { + Flip bool `beam:"flip"` // Flip is a bool with a random chance of either result (a coin flip). + Num int64 `beam:"num"` // Num is a random int64. + Word string `beam:"word"` // Word is a randomly selected word from a sample text. +} + +// ddlSchema is a string for BigQuery data definition language that corresponds to TestRow. +const ddlTestRowSchema = "counter INT64 NOT NULL, " + + "rand_data STRUCT<" + + "flip BOOL NOT NULL," + + "num INT64 NOT NULL," + + "word STRING NOT NULL" + + "> NOT NULL" + +// createTestRows creates a number of TestRows, populating the randomized data. +func createTestRows(_ []byte, emit func(TestRow)) { + rand.Seed(time.Now().UnixNano()) + words := shuffleText() + for i := 0; i < inputSize; i++ { + emit(TestRow{ + Counter: int64(i), + Rand_data: RandData{ + Flip: rand.Int63n(2) != 0, + Num: rand.Int63(), + Word: words[i], + }, + }) + } +} + +// TestBigQueryIO_BasicWriteRead runs a pipeline that generates semi-randomized elements, writes +// them to a BigQuery table and then reads from that table, and checks that the result matches the +// original inputs. This requires a pre-existing table to be created. +func TestBigQueryIO_BasicWriteRead(t *testing.T) { + integration.CheckFilters(t) + checkFlags(t) + + // Create a table before running the pipeline + table, err := newTempTable(*integration.BigQueryDataset, "go_bqio_it", ddlTestRowSchema) + if err != nil { + t.Fatalf("error creating BigQuery table: %v", err) + } + + p := beam.NewPipeline() + s := p.Root() + + // Generate elements and write to table. + rows := beam.ParDo(s, createTestRows, beam.Impulse(s)) + bigqueryio.Write(s, table, rows, + bigqueryio.CreateDisposition(bigqueryio.CreateNever), + bigqueryio.WriteExpansionAddr(*integration.GCPIoExpansionAddr)) + + // Read from table and compare to generated elements. + inType := reflect.TypeOf((*TestRow)(nil)).Elem() + readRows := bigqueryio.Read(s, inType, + bigqueryio.FromTable(table), + bigqueryio.ReadExpansionAddr(*integration.GCPIoExpansionAddr)) + passert.Equals(s, readRows, rows) + + ptest.RunAndValidate(t, p) Review comment: Assuming the test is successful could we please also delete the table manually? I see that we have limited it's lifespan, but I'd prefer that we don't accrue a rolling pile of these over the course of the 24 hour window. ########## File path: sdks/go/test/integration/io/xlang/bigquery/bigquery_test.go ########## @@ -0,0 +1,134 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bigquery + +import ( + "math/rand" + "reflect" + "strings" + "testing" + "time" + + "github.com/apache/beam/sdks/v2/go/pkg/beam" + "github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/bigqueryio" + _ "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/dataflow" + "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/passert" + "github.com/apache/beam/sdks/v2/go/pkg/beam/testing/ptest" + "github.com/apache/beam/sdks/v2/go/test/integration" +) + +func init() { + beam.RegisterFunction(createTestRows) + beam.RegisterType(reflect.TypeOf((*TestRow)(nil))) + beam.RegisterType(reflect.TypeOf((*RandData)(nil))) +} + +func checkFlags(t *testing.T) { + if *integration.GCPIoExpansionAddr == "" { + t.Skip("No GCP IO expansion address provided.") + } + if *integration.BigQueryDataset == "" { + t.Skip("No BigQuery dataset provided.") + } +} + +const ( + // A text to shuffle to get random words. + text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas eget nulla nec velit hendrerit placerat. Donec eu odio ultricies, fermentum arcu at, mollis lectus. Vestibulum porttitor pharetra sem vitae feugiat. Mauris facilisis neque in mauris feugiat rhoncus. Donec eu ipsum at nibh lobortis euismod. Nam at hendrerit felis. Vivamus et orci ex. Nam dui nisl, rutrum ac pretium eget, vehicula in tortor. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Phasellus ante lorem, pharetra blandit dapibus et, tempus nec purus. Maecenas in posuere sem, vel pharetra nisl. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Donec nec facilisis ex. Praesent euismod commodo efficitur. Fusce in nisi nunc." + // Number of random elements to create for test. Must be less than number of words in text. + inputSize = 50 +) + +func shuffleText() []string { + words := strings.Fields(text) + rand.Shuffle(len(words), func(i, j int) { words[i], words[j] = words[j], words[i] }) + return words +} + +// TestRow is a sample row to write and read from that is expected to contain enough deterministic +// and random data in different data types to provide a reasonable signal that reading and writing +// works at a basic level. +type TestRow struct { + Counter int64 `beam:"counter"` // A deterministic counter, increments for each row generated. + Rand_data RandData `beam:"rand_data"` // An inner struct containing randomized data. +} + +// RandData is a struct of various types of random data. +type RandData struct { + Flip bool `beam:"flip"` // Flip is a bool with a random chance of either result (a coin flip). + Num int64 `beam:"num"` // Num is a random int64. + Word string `beam:"word"` // Word is a randomly selected word from a sample text. +} + +// ddlSchema is a string for BigQuery data definition language that corresponds to TestRow. +const ddlTestRowSchema = "counter INT64 NOT NULL, " + + "rand_data STRUCT<" + + "flip BOOL NOT NULL," + + "num INT64 NOT NULL," + + "word STRING NOT NULL" + + "> NOT NULL" + +// createTestRows creates a number of TestRows, populating the randomized data. +func createTestRows(_ []byte, emit func(TestRow)) { + rand.Seed(time.Now().UnixNano()) + words := shuffleText() + for i := 0; i < inputSize; i++ { + emit(TestRow{ + Counter: int64(i), + Rand_data: RandData{ + Flip: rand.Int63n(2) != 0, + Num: rand.Int63(), + Word: words[i], + }, + }) + } +} + +// TestBigQueryIO_BasicWriteRead runs a pipeline that generates semi-randomized elements, writes +// them to a BigQuery table and then reads from that table, and checks that the result matches the +// original inputs. This requires a pre-existing table to be created. +func TestBigQueryIO_BasicWriteRead(t *testing.T) { + integration.CheckFilters(t) + checkFlags(t) + + // Create a table before running the pipeline + table, err := newTempTable(*integration.BigQueryDataset, "go_bqio_it", ddlTestRowSchema) + if err != nil { + t.Fatalf("error creating BigQuery table: %v", err) + } + + p := beam.NewPipeline() + s := p.Root() + + // Generate elements and write to table. + rows := beam.ParDo(s, createTestRows, beam.Impulse(s)) + bigqueryio.Write(s, table, rows, + bigqueryio.CreateDisposition(bigqueryio.CreateNever), + bigqueryio.WriteExpansionAddr(*integration.GCPIoExpansionAddr)) Review comment: Does the bigqueryIO Write not return a pcollection we can have the read block on? As it stands, I think this will have both happen at the same time. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
