riteshghorse commented on code in PR #31295:
URL: https://github.com/apache/beam/pull/31295#discussion_r1605054186


##########
sdks/python/apache_beam/transforms/enrichment_handlers/bigquery.py:
##########
@@ -0,0 +1,265 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import List
+from typing import Mapping
+from typing import Optional
+from typing import Union
+
+from google.api_core.exceptions import BadRequest
+from google.cloud import bigquery
+
+import apache_beam as beam
+from apache_beam.pvalue import Row
+from apache_beam.transforms.enrichment import EnrichmentSourceHandler
+
+QueryFn = Callable[[beam.Row], str]
+ConditionValueFn = Callable[[beam.Row], List[Any]]
+
+
+def _validate_batch_query_fn(query_fn, min_batch_size, max_batch_size):
+  if query_fn and min_batch_size and max_batch_size:
+    raise ValueError(
+        "Please provide exactly one of `query_fn` or "
+        "(`min_batch_size` and `max_batch_size`)")
+
+
+def _validate_bigquery_metadata(
+    table_name, row_restriction_template, fields, condition_value_fn, 
query_fn):
+  if query_fn and bool(table_name or row_restriction_template or fields or
+                       condition_value_fn):
+    raise ValueError(
+        "Please provide either `query_fn` or the parameters "
+        "`table_name`, `row_restriction_template`, and `fields` "
+        "together.")
+  elif not query_fn and not (table_name and row_restriction_template and
+                             (fields or condition_value_fn)):
+    raise ValueError(
+        "Please provide either `query_fn` or the parameters "
+        "`table_name`, `row_restriction_template`, and"
+        "`fields/condition_value_fn` together.")
+  if not query_fn and ((fields and condition_value_fn) or
+                       (not fields and not condition_value_fn)):
+    raise ValueError(
+        "Please provide exactly one of `fields` or "
+        "`condition_value_fn`")
+
+
+class BigQueryEnrichmentHandler(EnrichmentSourceHandler[Union[Row, List[Row]],
+                                                        Union[Row, 
List[Row]]]):
+  """Enrichment handler for Google Cloud BigQuery.
+
+  Use this handler with :class:`apache_beam.transforms.enrichment.Enrichment`
+  transform.
+
+  To use this handler you need either of the following combinations:
+    * `table_name`, `row_restriction_template`, `fields`
+    * `table_name`, `row_restriction_template`, `condition_value_fn`
+    * `query_fn`
+
+  By default, the handler pulls all columns from the BigQuery table.
+  To override this, use the `column_name` parameter to specify a list of column
+  names to fetch.
+
+  This handler pulls data from BigQuery per element by default. To change this
+  behavior, set the `min_batch_size` and `max_batch_size` parameters.
+  These min and max values for batch size are sent to the
+  :class:`apache_beam.transforms.utils.BatchElements` transform.
+
+  NOTE: Elements cannot be batched when using the `query_fn` parameter.
+  """
+  def __init__(
+      self,
+      project: str,
+      *,
+      table_name: str = "",
+      row_restriction_template: str = "",
+      fields: Optional[List[str]] = None,
+      column_names: Optional[List[str]] = None,
+      condition_value_fn: Optional[ConditionValueFn] = None,
+      query_fn: Optional[QueryFn] = None,
+      min_batch_size: Optional[int] = None,
+      max_batch_size: Optional[int] = None,
+      **kwargs,
+  ):
+    """
+    Example Usage:
+      handler = BigQueryEnrichmentHandler(project=project_name,
+                                          row_restriction="id='{}'",
+                                          table_name='project.dataset.table',
+                                          fields=fields,
+                                          min_batch_size=2,
+                                          max_batch_size=100)
+
+    Args:
+      project: Google Cloud project ID for the BigQuery table.
+      table_name (str): Fully qualified BigQuery table name
+        in the format `project.dataset.table`.
+      row_restriction_template (str): A template string for the `WHERE` clause
+        in the BigQuery query with placeholders (`{}`) to dynamically filter
+        rows based on input data.
+      fields: (Optional[List[str]]) List of field names present in the input
+        `beam.Row`. These are used to construct the WHERE clause
+        (if `condition_value_fn` is not provided).
+      column_names: (Optional[List[str]]) Names of columns to select from the
+        BigQuery table. If not provided, all columns (`*`) are selected.
+      condition_value_fn: (Optional[Callable[[beam.Row], Any]]) A function
+        that takes a `beam.Row` and returns a list of value to populate in the
+        placeholder `{}` of `WHERE` clause in the query.
+      query_fn: (Optional[Callable[[beam.Row], str]]) A function that takes a
+        `beam.Row` and returns a complete BigQuery SQL query string.
+      min_batch_size: (Optional[int]) Minimum number of rows to batch together
+        when querying BigQuery.
+      max_batch_size: (Optional[int]) Maximum number of rows to batch together.
+      **kwargs: Additional keyword arguments to pass to `bigquery.Client`.
+
+    Note:
+      * `min_batch_size` and `max_batch_size` won't have any effect if the

Review Comment:
   good catch



##########
sdks/python/apache_beam/transforms/enrichment_handlers/bigquery.py:
##########
@@ -0,0 +1,265 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Any
+from typing import Callable
+from typing import Dict
+from typing import List
+from typing import Mapping
+from typing import Optional
+from typing import Union
+
+from google.api_core.exceptions import BadRequest
+from google.cloud import bigquery
+
+import apache_beam as beam
+from apache_beam.pvalue import Row
+from apache_beam.transforms.enrichment import EnrichmentSourceHandler
+
+QueryFn = Callable[[beam.Row], str]
+ConditionValueFn = Callable[[beam.Row], List[Any]]
+
+
+def _validate_batch_query_fn(query_fn, min_batch_size, max_batch_size):
+  if query_fn and min_batch_size and max_batch_size:
+    raise ValueError(
+        "Please provide exactly one of `query_fn` or "
+        "(`min_batch_size` and `max_batch_size`)")
+
+
+def _validate_bigquery_metadata(
+    table_name, row_restriction_template, fields, condition_value_fn, 
query_fn):
+  if query_fn and bool(table_name or row_restriction_template or fields or
+                       condition_value_fn):
+    raise ValueError(
+        "Please provide either `query_fn` or the parameters "
+        "`table_name`, `row_restriction_template`, and `fields` "
+        "together.")
+  elif not query_fn and not (table_name and row_restriction_template and
+                             (fields or condition_value_fn)):
+    raise ValueError(
+        "Please provide either `query_fn` or the parameters "
+        "`table_name`, `row_restriction_template`, and"
+        "`fields/condition_value_fn` together.")
+  if not query_fn and ((fields and condition_value_fn) or
+                       (not fields and not condition_value_fn)):
+    raise ValueError(
+        "Please provide exactly one of `fields` or "
+        "`condition_value_fn`")
+
+
+class BigQueryEnrichmentHandler(EnrichmentSourceHandler[Union[Row, List[Row]],
+                                                        Union[Row, 
List[Row]]]):
+  """Enrichment handler for Google Cloud BigQuery.
+
+  Use this handler with :class:`apache_beam.transforms.enrichment.Enrichment`
+  transform.
+
+  To use this handler you need either of the following combinations:
+    * `table_name`, `row_restriction_template`, `fields`
+    * `table_name`, `row_restriction_template`, `condition_value_fn`
+    * `query_fn`
+
+  By default, the handler pulls all columns from the BigQuery table.
+  To override this, use the `column_name` parameter to specify a list of column
+  names to fetch.
+
+  This handler pulls data from BigQuery per element by default. To change this
+  behavior, set the `min_batch_size` and `max_batch_size` parameters.
+  These min and max values for batch size are sent to the
+  :class:`apache_beam.transforms.utils.BatchElements` transform.
+
+  NOTE: Elements cannot be batched when using the `query_fn` parameter.
+  """
+  def __init__(
+      self,
+      project: str,
+      *,
+      table_name: str = "",
+      row_restriction_template: str = "",
+      fields: Optional[List[str]] = None,
+      column_names: Optional[List[str]] = None,
+      condition_value_fn: Optional[ConditionValueFn] = None,
+      query_fn: Optional[QueryFn] = None,
+      min_batch_size: Optional[int] = None,
+      max_batch_size: Optional[int] = None,

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to