damccorm commented on code in PR #37672:
URL: https://github.com/apache/beam/pull/37672#discussion_r2842408001
##########
sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupby_attr_expr.py:
##########
@@ -43,22 +43,23 @@
beam.Row(recipe='pie', fruit='blueberry', quantity=1, unit_price=2.00),
beam.Row(recipe='muffin', fruit='blueberry', quantity=2, unit_price=2.00),
beam.Row(recipe='muffin', fruit='banana', quantity=3, unit_price=1.00),
+ beam.Row(recipe='pie', fruit='strawberry', quantity=3, unit_price=1.50),
]
# [END groupby_table]
def groupby_attr_expr(test=None):
with beam.Pipeline() as p:
- # [START groupby_attr_expr]
Review Comment:
We should not remove these attributes
##########
sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupby_simple_aggregate.py:
##########
@@ -36,27 +36,49 @@
import apache_beam as beam
-# [START groupby_table]
GROCERY_LIST = [
beam.Row(recipe='pie', fruit='raspberry', quantity=1, unit_price=3.50),
beam.Row(recipe='pie', fruit='blackberry', quantity=1, unit_price=4.00),
beam.Row(recipe='pie', fruit='blueberry', quantity=1, unit_price=2.00),
beam.Row(recipe='muffin', fruit='blueberry', quantity=2, unit_price=2.00),
beam.Row(recipe='muffin', fruit='banana', quantity=3, unit_price=1.00),
+ beam.Row(recipe='pie', fruit='strawberry', quantity=3, unit_price=1.50),
]
-# [END groupby_table]
def simple_aggregate(test=None):
+ def to_grocery_row(x):
+ # If it's already a Beam Row / schema object, keep it
+ if hasattr(x, 'recipe') and hasattr(x, 'fruit') and hasattr(
+ x, 'quantity') and hasattr(x, 'unit_price'):
+ return beam.Row(
+ recipe=x.recipe,
+ fruit=x.fruit,
+ quantity=x.quantity,
+ unit_price=x.unit_price)
+
+ # If dict
+ if isinstance(x, dict):
+ return beam.Row(
+ recipe=x['recipe'],
+ fruit=x['fruit'],
+ quantity=x['quantity'],
+ unit_price=x['unit_price'],
+ )
+
+ # If tuple/list (recipe, fruit, quantity, unit_price)
+ return beam.Row(recipe=x[0], fruit=x[1], quantity=x[2], unit_price=x[3])
+
with beam.Pipeline() as p:
# [START simple_aggregate]
grouped = (
p
| beam.Create(GROCERY_LIST)
+ | 'ToGroceryRows' >> beam.Map(to_grocery_row)
Review Comment:
As mentioned above, this is redundant, I don't think we need it
##########
sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupby_attr.py:
##########
@@ -43,22 +43,24 @@
beam.Row(recipe='pie', fruit='blueberry', quantity=1, unit_price=2.00),
beam.Row(recipe='muffin', fruit='blueberry', quantity=2, unit_price=2.00),
beam.Row(recipe='muffin', fruit='banana', quantity=3, unit_price=1.00),
+ beam.Row(recipe='pie', fruit='strawberry', quantity=3, unit_price=1.50),
]
# [END groupby_table]
-
def groupby_attr(test=None):
with beam.Pipeline() as p:
# [START groupby_attr]
grouped = (
p
| beam.Create(GROCERY_LIST)
| beam.GroupBy('recipe')
- | beam.Map(print))
+ )
# [END groupby_attr]
- if test:
- test(grouped)
+ if test:
+ test(grouped)
+ else:
+ grouped | beam.Map(print)
Review Comment:
If we've fixed this, we should
https://github.com/apache/beam/blob/4fe61733f7786dcccc214f2095feb61fc713e0ac/sdks/python/apache_beam/examples/snippets/transforms/aggregation/groupby_test.py#L44
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]