[
https://issues.apache.org/jira/browse/BEAM-14035?focusedWorklogId=747878&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-747878
]
ASF GitHub Bot logged work on BEAM-14035:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 25/Mar/22 17:10
Start Date: 25/Mar/22 17:10
Worklog Time Spent: 10m
Work Description: laraschmidt commented on a change in pull request
#17181:
URL: https://github.com/apache/beam/pull/17181#discussion_r835454475
##########
File path:
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformReadConfiguration.java
##########
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.auto.value.AutoValue;
+import org.apache.beam.sdk.schemas.AutoValueSchema;
+import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
+import org.apache.beam.sdk.schemas.io.InvalidConfigurationException;
+
+/**
+ * Configuration for reading from BigQuery.
+ *
+ * <p>This class is meant to be used with {@link
BigQuerySchemaTransformReadProvider}.
+ *
+ * <p><b>Internal only:</b> This class is actively being worked on, and it
will likely change. We
+ * provide no backwards compatibility guarantees, and it should not be
implemented outside the Beam
+ * repository.
+ */
+@DefaultSchema(AutoValueSchema.class)
+@AutoValue
+public abstract class BigQuerySchemaTransformReadConfiguration {
+
+ private static final boolean DEFAULT_USE_STANDARD_SQL = true;
+
+ /**
+ * Instantiates a {@link BigQuerySchemaTransformReadConfiguration.Builder}
from the SQL query.
+ *
+ * <p>The configuration defaults to useStandardSql=true.
+ */
+ public static Builder createQueryBuilder(String query) {
+ return defaultBuilder().setQuery(query).setJobType(JobType.QUERY);
+ }
+
+ /**
+ * Instantiates a {@link BigQuerySchemaTransformReadConfiguration.Builder}
to support BigQuery
+ * extract jobs. See the getTableSpec() getter for details.
+ */
+ public static Builder createExtractBuilder(String tableSpec) {
+ return
defaultBuilder().setTableSpec(tableSpec).setJobType(JobType.EXTRACT);
+ }
+
+ /**
+ * Instantiates a {@link BigQuerySchemaTransformReadConfiguration.Builder}
to support BigQuery
+ * extract jobs.
+ */
+ public static Builder createExtractBuilder(TableReference tableSpec) {
+ if (tableSpec.getProjectId().isEmpty()) {
+ return createExtractBuilder(
+ String.format("%s.%s", tableSpec.getDatasetId(),
tableSpec.getTableId()));
+ }
+ return createExtractBuilder(
+ String.format(
+ "%s:%s.%s",
Review comment:
This doesn't actually work if table has a ':' in it but maybe it's fine?
##########
File path:
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformReadConfiguration.java
##########
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.auto.value.AutoValue;
+import org.apache.beam.sdk.schemas.AutoValueSchema;
+import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
+import org.apache.beam.sdk.schemas.io.InvalidConfigurationException;
+
+/**
+ * Configuration for reading from BigQuery.
+ *
+ * <p>This class is meant to be used with {@link
BigQuerySchemaTransformReadProvider}.
+ *
+ * <p><b>Internal only:</b> This class is actively being worked on, and it
will likely change. We
+ * provide no backwards compatibility guarantees, and it should not be
implemented outside the Beam
+ * repository.
+ */
+@DefaultSchema(AutoValueSchema.class)
+@AutoValue
+public abstract class BigQuerySchemaTransformReadConfiguration {
+
+ private static final boolean DEFAULT_USE_STANDARD_SQL = true;
+
+ /**
+ * Instantiates a {@link BigQuerySchemaTransformReadConfiguration.Builder}
from the SQL query.
+ *
+ * <p>The configuration defaults to useStandardSql=true.
+ */
+ public static Builder createQueryBuilder(String query) {
+ return defaultBuilder().setQuery(query).setJobType(JobType.QUERY);
+ }
+
+ /**
+ * Instantiates a {@link BigQuerySchemaTransformReadConfiguration.Builder}
to support BigQuery
+ * extract jobs. See the getTableSpec() getter for details.
+ */
+ public static Builder createExtractBuilder(String tableSpec) {
+ return
defaultBuilder().setTableSpec(tableSpec).setJobType(JobType.EXTRACT);
+ }
+
+ /**
+ * Instantiates a {@link BigQuerySchemaTransformReadConfiguration.Builder}
to support BigQuery
+ * extract jobs.
+ */
+ public static Builder createExtractBuilder(TableReference tableSpec) {
+ if (tableSpec.getProjectId().isEmpty()) {
+ return createExtractBuilder(
+ String.format("%s.%s", tableSpec.getDatasetId(),
tableSpec.getTableId()));
+ }
+ return createExtractBuilder(
+ String.format(
+ "%s:%s.%s",
+ tableSpec.getProjectId(), tableSpec.getDatasetId(),
tableSpec.getTableId()));
+ }
+
+ private static Builder defaultBuilder() {
+ return new AutoValue_BigQuerySchemaTransformReadConfiguration.Builder()
+ .setJobType(JobType.UNSPECIFIED)
+ .setQuery("")
Review comment:
can we make these nullable if they should be null? The schema inference
will handle that.
##########
File path:
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformReadProvider.java
##########
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.auto.value.AutoValue;
+import java.util.Collections;
+import java.util.List;
+import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.annotations.Experimental.Kind;
+import org.apache.beam.sdk.annotations.Internal;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransform;
+import org.apache.beam.sdk.schemas.transforms.TypedSchemaTransformProvider;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionRowTuple;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptor;
+
+/**
+ * An implementation of {@link TypedSchemaTransformProvider} for BigQuery read
jobs configured using
+ * {@link BigQuerySchemaTransformReadConfiguration}.
+ *
+ * <p><b>Internal only:</b> This class is actively being worked on, and it
will likely change. We
+ * provide no backwards compatibility guarantees, and it should not be
implemented outside the Beam
+ * repository.
+ */
+@Internal
+@Experimental(Kind.SCHEMAS)
+@AutoValue
+public class BigQuerySchemaTransformReadProvider
+ extends
TypedSchemaTransformProvider<BigQuerySchemaTransformReadConfiguration> {
+
+ private static final String API = "bigquery";
+ private static final String VERSION = "v2";
+ private static final String TAG = "ToRows";
+
+ /** Returns the expected class of the configuration. */
+ @Override
+ protected Class<BigQuerySchemaTransformReadConfiguration>
configurationClass() {
+ return BigQuerySchemaTransformReadConfiguration.class;
+ }
+
+ /** Returns the expected {@link SchemaTransform} of the configuration. */
+ @Override
+ protected SchemaTransform from(BigQuerySchemaTransformReadConfiguration
configuration) {
+ return new BigQuerySchemaTransformRead(configuration);
+ }
+
+ /**
+ * An implementation of {@link SchemaTransform} for BigQuery read jobs
configured using {@link
+ * BigQuerySchemaTransformReadConfiguration}.
+ */
+ static class BigQuerySchemaTransformRead implements SchemaTransform {
+ private final BigQuerySchemaTransformReadConfiguration configuration;
+
+ BigQuerySchemaTransformRead(BigQuerySchemaTransformReadConfiguration
configuration) {
+ this.configuration = configuration;
+ }
+
+ BigQuerySchemaTransformReadConfiguration getConfiguration() {
+ return configuration;
+ }
+
+ /** Implements {@link SchemaTransform} buildTransform method. */
+ @Override
+ public PTransform<PCollectionRowTuple, PCollectionRowTuple>
buildTransform() {
+ return new BigQuerySchemaTransformReadTransform(configuration);
+ }
+ }
+
+ /**
+ * An implementation of {@link PTransform} for BigQuery read jobs configured
using {@link
+ * BigQuerySchemaTransformReadConfiguration}.
+ */
+ static class BigQuerySchemaTransformReadTransform
+ extends PTransform<PCollectionRowTuple, PCollectionRowTuple> {
+ private final BigQuerySchemaTransformReadConfiguration configuration;
+
+
BigQuerySchemaTransformReadTransform(BigQuerySchemaTransformReadConfiguration
configuration) {
+ this.configuration = configuration;
+ }
+
+ @Override
+ public PCollectionRowTuple expand(PCollectionRowTuple input) {
+ if (!input.getAll().isEmpty()) {
+ throw new IllegalArgumentException("PCollectionRowTuple input is
expected to be empty");
Review comment:
Mention bigquery here so it's more understandable aon it's own?
##########
File path:
sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformReadProviderTest.java
##########
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import static org.junit.Assert.assertEquals;
+
+import
org.apache.beam.sdk.io.gcp.bigquery.BigQuerySchemaTransformReadProvider.BigQuerySchemaTransformRead;
+import org.apache.beam.sdk.schemas.AutoValueSchema;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Test for {@link BigQuerySchemaTransformReadProvider}. */
+@RunWith(JUnit4.class)
+public class BigQuerySchemaTransformReadProviderTest {
+ private static final AutoValueSchema AUTO_VALUE_SCHEMA = new
AutoValueSchema();
+ private static final
TypeDescriptor<BigQuerySchemaTransformReadConfiguration> TYPE_DESCRIPTOR =
+ TypeDescriptor.of(BigQuerySchemaTransformReadConfiguration.class);
+ private static final Schema SCHEMA =
AUTO_VALUE_SCHEMA.schemaFor(TYPE_DESCRIPTOR);
+ private static final
SerializableFunction<BigQuerySchemaTransformReadConfiguration, Row>
+ ROW_SERIALIZABLE_FUNCTION =
AUTO_VALUE_SCHEMA.toRowFunction(TYPE_DESCRIPTOR);
+
+ @Test
+ public void testFromExtractConfiguration() {
+ BigQuerySchemaTransformReadConfiguration configuration =
+
BigQuerySchemaTransformReadConfiguration.createExtractBuilder("dataset.table").build();
+ Row configurationRow = ROW_SERIALIZABLE_FUNCTION.apply(configuration);
+ SchemaTransformProvider provider = new
BigQuerySchemaTransformReadProvider();
+ BigQuerySchemaTransformRead transform =
+ (BigQuerySchemaTransformRead) provider.from(configurationRow);
+ assertEquals(configuration.getTableSpec(),
transform.getConfiguration().getTableSpec());
+ }
+
+ @Test
+ public void testFromQueryConfiguration() {
+ BigQuerySchemaTransformReadConfiguration want =
+ BigQuerySchemaTransformReadConfiguration.createQueryBuilder("select *
from example")
+ .build();
+ Row configurationRow = ROW_SERIALIZABLE_FUNCTION.apply(want);
+ SchemaTransformProvider provider = new
BigQuerySchemaTransformReadProvider();
+ BigQuerySchemaTransformRead transform =
Review comment:
Is this testing your transform code at all though? The most complex part
of the code is expand so it would be good to hav ea simple test. But if we
can't get anything with bigquery working then that may not be an option. I know
you said that it was hard to get the test system running. Is there no good
examples of bigquery tests? I was thinking you were able to test the transform
code without it, but if not it would be good to add something if possible.
##########
File path:
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformReadProvider.java
##########
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.auto.value.AutoValue;
+import java.util.Collections;
+import java.util.List;
+import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.annotations.Experimental.Kind;
+import org.apache.beam.sdk.annotations.Internal;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransform;
+import org.apache.beam.sdk.schemas.transforms.TypedSchemaTransformProvider;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionRowTuple;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptor;
+
+/**
+ * An implementation of {@link TypedSchemaTransformProvider} for BigQuery read
jobs configured using
+ * {@link BigQuerySchemaTransformReadConfiguration}.
+ *
+ * <p><b>Internal only:</b> This class is actively being worked on, and it
will likely change. We
+ * provide no backwards compatibility guarantees, and it should not be
implemented outside the Beam
+ * repository.
+ */
+@Internal
+@Experimental(Kind.SCHEMAS)
+@AutoValue
+public class BigQuerySchemaTransformReadProvider
+ extends
TypedSchemaTransformProvider<BigQuerySchemaTransformReadConfiguration> {
+
+ private static final String API = "bigquery";
+ private static final String VERSION = "v2";
+ private static final String TAG = "ToRows";
+
+ /** Returns the expected class of the configuration. */
+ @Override
+ protected Class<BigQuerySchemaTransformReadConfiguration>
configurationClass() {
+ return BigQuerySchemaTransformReadConfiguration.class;
+ }
+
+ /** Returns the expected {@link SchemaTransform} of the configuration. */
+ @Override
+ protected SchemaTransform from(BigQuerySchemaTransformReadConfiguration
configuration) {
+ return new BigQuerySchemaTransformRead(configuration);
+ }
+
+ /**
+ * An implementation of {@link SchemaTransform} for BigQuery read jobs
configured using {@link
+ * BigQuerySchemaTransformReadConfiguration}.
+ */
+ static class BigQuerySchemaTransformRead implements SchemaTransform {
+ private final BigQuerySchemaTransformReadConfiguration configuration;
+
+ BigQuerySchemaTransformRead(BigQuerySchemaTransformReadConfiguration
configuration) {
+ this.configuration = configuration;
+ }
+
+ BigQuerySchemaTransformReadConfiguration getConfiguration() {
+ return configuration;
+ }
+
+ /** Implements {@link SchemaTransform} buildTransform method. */
+ @Override
+ public PTransform<PCollectionRowTuple, PCollectionRowTuple>
buildTransform() {
+ return new BigQuerySchemaTransformReadTransform(configuration);
+ }
+ }
+
+ /**
+ * An implementation of {@link PTransform} for BigQuery read jobs configured
using {@link
+ * BigQuerySchemaTransformReadConfiguration}.
+ */
+ static class BigQuerySchemaTransformReadTransform
Review comment:
Can we put this sat the end of the class so it's not in the middle? It's
kind of hard to follow in the middle.
##########
File path:
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformReadProvider.java
##########
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.auto.value.AutoValue;
+import java.util.Collections;
+import java.util.List;
+import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.annotations.Experimental.Kind;
+import org.apache.beam.sdk.annotations.Internal;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransform;
+import org.apache.beam.sdk.schemas.transforms.TypedSchemaTransformProvider;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionRowTuple;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptor;
+
+/**
+ * An implementation of {@link TypedSchemaTransformProvider} for BigQuery read
jobs configured using
+ * {@link BigQuerySchemaTransformReadConfiguration}.
+ *
+ * <p><b>Internal only:</b> This class is actively being worked on, and it
will likely change. We
+ * provide no backwards compatibility guarantees, and it should not be
implemented outside the Beam
+ * repository.
+ */
+@Internal
+@Experimental(Kind.SCHEMAS)
+@AutoValue
+public class BigQuerySchemaTransformReadProvider
+ extends
TypedSchemaTransformProvider<BigQuerySchemaTransformReadConfiguration> {
+
+ private static final String API = "bigquery";
+ private static final String VERSION = "v2";
+ private static final String TAG = "ToRows";
Review comment:
READ_TAG or OUTPUT_TAG?
Also, ToRows doesn't really make sense as an identifier for an output I
don't think. Would recommend a different name here.
##########
File path:
sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformReadProviderTest.java
##########
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import static org.junit.Assert.assertEquals;
+
+import
org.apache.beam.sdk.io.gcp.bigquery.BigQuerySchemaTransformReadProvider.BigQuerySchemaTransformRead;
+import org.apache.beam.sdk.schemas.AutoValueSchema;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Test for {@link BigQuerySchemaTransformReadProvider}. */
+@RunWith(JUnit4.class)
+public class BigQuerySchemaTransformReadProviderTest {
+ private static final AutoValueSchema AUTO_VALUE_SCHEMA = new
AutoValueSchema();
+ private static final
TypeDescriptor<BigQuerySchemaTransformReadConfiguration> TYPE_DESCRIPTOR =
+ TypeDescriptor.of(BigQuerySchemaTransformReadConfiguration.class);
+ private static final Schema SCHEMA =
AUTO_VALUE_SCHEMA.schemaFor(TYPE_DESCRIPTOR);
+ private static final
SerializableFunction<BigQuerySchemaTransformReadConfiguration, Row>
+ ROW_SERIALIZABLE_FUNCTION =
AUTO_VALUE_SCHEMA.toRowFunction(TYPE_DESCRIPTOR);
+
+ @Test
+ public void testFromExtractConfiguration() {
+ BigQuerySchemaTransformReadConfiguration configuration =
+
BigQuerySchemaTransformReadConfiguration.createExtractBuilder("dataset.table").build();
+ Row configurationRow = ROW_SERIALIZABLE_FUNCTION.apply(configuration);
+ SchemaTransformProvider provider = new
BigQuerySchemaTransformReadProvider();
+ BigQuerySchemaTransformRead transform =
+ (BigQuerySchemaTransformRead) provider.from(configurationRow);
+ assertEquals(configuration.getTableSpec(),
transform.getConfiguration().getTableSpec());
+ }
+
+ @Test
+ public void testFromQueryConfiguration() {
+ BigQuerySchemaTransformReadConfiguration want =
+ BigQuerySchemaTransformReadConfiguration.createQueryBuilder("select *
from example")
+ .build();
+ Row configurationRow = ROW_SERIALIZABLE_FUNCTION.apply(want);
Review comment:
Just use your own API here, no reason to test the surrounding class.
##########
File path:
sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformReadProviderTest.java
##########
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import static org.junit.Assert.assertEquals;
+
+import
org.apache.beam.sdk.io.gcp.bigquery.BigQuerySchemaTransformReadProvider.BigQuerySchemaTransformRead;
+import org.apache.beam.sdk.schemas.AutoValueSchema;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Test for {@link BigQuerySchemaTransformReadProvider}. */
+@RunWith(JUnit4.class)
+public class BigQuerySchemaTransformReadProviderTest {
+ private static final AutoValueSchema AUTO_VALUE_SCHEMA = new
AutoValueSchema();
+ private static final
TypeDescriptor<BigQuerySchemaTransformReadConfiguration> TYPE_DESCRIPTOR =
+ TypeDescriptor.of(BigQuerySchemaTransformReadConfiguration.class);
+ private static final Schema SCHEMA =
AUTO_VALUE_SCHEMA.schemaFor(TYPE_DESCRIPTOR);
+ private static final
SerializableFunction<BigQuerySchemaTransformReadConfiguration, Row>
+ ROW_SERIALIZABLE_FUNCTION =
AUTO_VALUE_SCHEMA.toRowFunction(TYPE_DESCRIPTOR);
+
+ @Test
+ public void testFromExtractConfiguration() {
+ BigQuerySchemaTransformReadConfiguration configuration =
+
BigQuerySchemaTransformReadConfiguration.createExtractBuilder("dataset.table").build();
+ Row configurationRow = ROW_SERIALIZABLE_FUNCTION.apply(configuration);
+ SchemaTransformProvider provider = new
BigQuerySchemaTransformReadProvider();
+ BigQuerySchemaTransformRead transform =
+ (BigQuerySchemaTransformRead) provider.from(configurationRow);
+ assertEquals(configuration.getTableSpec(),
transform.getConfiguration().getTableSpec());
+ }
+
+ @Test
+ public void testFromQueryConfiguration() {
+ BigQuerySchemaTransformReadConfiguration want =
+ BigQuerySchemaTransformReadConfiguration.createQueryBuilder("select *
from example")
+ .build();
+ Row configurationRow = ROW_SERIALIZABLE_FUNCTION.apply(want);
+ SchemaTransformProvider provider = new
BigQuerySchemaTransformReadProvider();
+ BigQuerySchemaTransformRead transform =
+ (BigQuerySchemaTransformRead) provider.from(configurationRow);
+ BigQuerySchemaTransformReadConfiguration got =
transform.getConfiguration();
+ assertEquals(want.getQuery(), got.getQuery());
+ assertEquals(want.getUseStandardSql(), got.getUseStandardSql());
+ }
+
+ @Test
+ public void getConfiguration() {
Review comment:
What are you trying to test here? The TypedSchemaTransform class has
tests so we may not need this one if we are just testing in general that it
works.
Also, let's move schema generation to this test? The rest don't need it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 747878)
Time Spent: 1h 20m (was: 1h 10m)
> Convert BigQuery SchemaIO to SchemaTransform
> --------------------------------------------
>
> Key: BEAM-14035
> URL: https://issues.apache.org/jira/browse/BEAM-14035
> Project: Beam
> Issue Type: Sub-task
> Components: sdk-java-core
> Reporter: Lara Schmidt
> Assignee: Damon Douglas
> Priority: P2
> Time Spent: 1h 20m
> Remaining Estimate: 0h
>
> The output of this task is to refactor
> org.apache.beam.sdk.io.gcp.bigquery.BigQuerySchemaIOProvider [1] to implement
> SchemaTransform [2] and SchemaTransformProvider interfaces [3].
> Please see https://issues.apache.org/jira/browse/BEAM-14168 for more
> information on additional work remaining after this task is complete.
> As a result of this task there will be:
> 1. one class deletion and four classes created for the PR reflecting whether
> we are reading from or writing to BigQuery, via BigQueryIO.Read and
> BigQueryIO.Write, respectively.
> * delete:
> sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaIOProvider.java
> * create:
> sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformReadConfiguration.java
> * create:
> sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformReadProvider.java
> * create:
> sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformWriteConfiguration.java
> * create:
> sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaTransformWriteProvider.java
> 2. The testing strategy will leverage the use of the
> org.apache.beam.sdk.transforms.display.DisplayData class instead of using the
> org.apache.beam.sdk.io.gcp.testing.FakeBigQueryServices.
> We will test whether the input of BigQuerySchemaTransformReadConfiguration
> yields a BigQueryIO.Read with correct query, tableSpec, useLegacySQL
> (defaulted to false), and queryLocation values.
> We will test whether the input of BigQuerySchemaTransformWriteConfiguration
> yields a BigQueryIO.Write with correct tableReference, createDisposition, and
> writeDisposition values.
> References:
> 1 -
> [sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaIOProvider.java|https://github.com/apache/beam/blob/fc00b9697a0dfb73a03981f4d6c2c8dd1e316d5e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySchemaIOProvider.java]
> 2 -
> [SchemaTransform|https://github.com/apache/beam/blob/a47a725863fc8c37a9b8520cebeef83677fe531d/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/SchemaTransform.java]
> 3 -
> [SchemaTransformProvider|https://github.com/apache/beam/blob/a47a725863fc8c37a9b8520cebeef83677fe531d/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/transforms/SchemaTransformProvider.java]
--
This message was sent by Atlassian Jira
(v8.20.1#820001)