felipecrv commented on code in PR #15083:
URL: https://github.com/apache/arrow/pull/15083#discussion_r1080656608
##########
python/pyarrow/table.pxi:
##########
@@ -5358,36 +5358,45 @@ list[tuple(str, str, FunctionOptions)]
----
values_sum: [[3,7,5]]
keys: [["a","b","c"]]
+ >>> t.group_by("keys").aggregate([([], "count_all")])
+ pyarrow.Table
+ _count_all: int64
+ keys: string
+ ----
+ _count_all: [[2,2,1]]
+ keys: [["a","b","c"]]
>>> t.group_by("keys").aggregate([])
pyarrow.Table
keys: string
----
keys: [["a","b","c"]]
"""
- columns = [a[0] for a in aggregations]
+ target_cols = [a[0] if isinstance(a[0], (list, tuple)) else [
+ a[0]] for a in aggregations]
aggrfuncs = [
- (a[1], a[2]) if len(a) > 2 else (a[1], None)
- for a in aggregations
+ (target, a[1], a[2]) if len(a) > 2 else (target, a[1], None)
+ for (target, a) in zip(target_cols, aggregations)
]
group_by_aggrs = []
for aggr in aggrfuncs:
- if not aggr[0].startswith("hash_"):
- aggr = ("hash_" + aggr[0], aggr[1])
+ if not aggr[1].startswith("hash_"):
+ aggr = (aggr[0], "hash_" + aggr[1], aggr[2])
group_by_aggrs.append(aggr)
# Build unique names for aggregation result columns
# so that it's obvious what they refer to.
- column_names = [
- aggr_name.replace("hash", col_name)
- for col_name, (aggr_name, _) in zip(columns, group_by_aggrs)
+ out_column_names = [
+ aggr_name.replace("hash", "_".join(target))
+ for target, aggr_name, _ in group_by_aggrs
Review Comment:
I kinda did this on purpose to keep the rule more general, but I understand
that the result might seem weird. I will special case nullary aggregations.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]