Yicong-Huang commented on code in PR #53043:
URL: https://github.com/apache/spark/pull/53043#discussion_r2562237208


##########
python/pyspark/worker.py:
##########
@@ -3003,24 +3009,50 @@ def mapper(a):
         )
         parsed_offsets = extract_key_value_indexes(arg_offsets)
 
-        # Create mapper similar to Arrow iterator:
-        # `a` is an iterator of Series lists (one list per batch, containing 
all columns)
-        # Materialize first batch to get keys, then create generator for value 
batches
-        def mapper(a):
-            import itertools
+        def series_from_offset(series_list, offsets):
+            return [series_list[o] for o in offsets]
 
-            series_iter = iter(a)
-            # Need to materialize the first series list to get the keys
+        def mapper(series_lists_iter):
+            # `series_lists_iter` is an iterator of Series lists (one list per 
batch)
+            # Materialize first batch to extract keys (guaranteed to exist for 
grouped operations)
+            series_iter = iter(series_lists_iter)
             first_series_list = next(series_iter)
 
-            keys = [first_series_list[o] for o in parsed_offsets[0][0]]
+            keys = series_from_offset(first_series_list, parsed_offsets[0][0])
+            # Create generator for value series from all batches
             value_series_gen = (
-                [series_list[o] for o in parsed_offsets[0][1]]
+                series_from_offset(series_list, parsed_offsets[0][1])
                 for series_list in itertools.chain((first_series_list,), 
series_iter)
             )
-
+            # Call wrapped function which returns (generator, arrow_type)
             return f(keys, value_series_gen)
 
+    elif eval_type in (
+        PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,

Review Comment:
   removed!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to