rtpsw commented on code in PR #14682:
URL: https://github.com/apache/arrow/pull/14682#discussion_r1029367885
##########
python/pyarrow/tests/test_udf.py:
##########
@@ -504,3 +504,49 @@ def test_input_lifetime(unary_func_fixture):
# Calling a UDF should not have kept `v` alive longer than required
v = None
assert proxy_pool.bytes_allocated() == 0
+
+
+def _record_batch_from_iters(schema, *iters):
+ arrays = [pa.array(list(v), type=schema[i].type)
+ for i, v in enumerate(iters)]
+ return pa.RecordBatch.from_arrays(arrays=arrays, schema=schema)
+
+
+def _record_batch_for_range(schema, n):
+ return _record_batch_from_iters(schema,
+ range(n, n + 10),
+ range(n + 1, n + 11))
+
+
+def datasource1(ctx):
+ """A short dataset"""
+ import pyarrow as pa
+ schema = pa.schema([('', pa.int32()), ('', pa.int32())])
+
+ class Generator:
+ def __init__(self):
+ self.n = 3
+
+ def __call__(self, ctx):
+ if self.n == 0:
+ batch = _record_batch_from_iters(schema, [], [])
+ else:
+ self.n -= 1
+ batch = _record_batch_for_range(schema, self.n)
+ return pc.udf_result_from_record_batch(batch)
+ return Generator()
Review Comment:
I was able to make this work in a quick-and-dirty way. Please see if the
following code seems reasonable from both a library and a user point of view.
If so, I'll organize it in a cleaner way.
I added this as a library function:
```
def make_udt_func(schema, batch_gen):
def udf_func(ctx):
class UDT:
def __init__(self):
self.caller = None
def __call__(self, ctx):
try:
if self.caller is None:
self.caller, ctx = batch_gen(ctx).send, None
batch = self.caller(ctx)
except StopIteration:
batch = _record_batch_from_iters(schema, *[[] for i in
schema])
return batch.to_struct_array()
return UDT()
return udf_func
```
and replaced `func - datasource1` with this user-code:
```
def batch_gen(ctx):
for i in range(3, 0, -1):
ctx = yield _record_batch_for_range(schema, n - 1)
func = make_udt_func(schema, batch_gen)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]