pitrou commented on PR #39245:
URL: https://github.com/apache/arrow/pull/39245#issuecomment-1883448603
I think this PR produces incorrect results sometimes:
```python
>>> pc.binary_slice(pa.array([b"ab\xf2de"], pa.binary(5)), -5, -6, -3)
Traceback (most recent call last):
Cell In[16], line 1
pc.binary_slice(pa.array([b"ab\xf2de"], pa.binary(5)), -5, -6, -3)
File ~/arrow/dev/python/pyarrow/compute.py:263 in wrapper
return func.call(args, options, memory_pool)
File pyarrow/_compute.pyx:385 in pyarrow._compute.Function.call
result = GetResultValue(
File pyarrow/error.pxi:154 in pyarrow.lib.pyarrow_internal_check_status
return check_status(status)
File pyarrow/error.pxi:91 in pyarrow.lib.check_status
raise convert_status(status)
ArrowInvalid: Invalid UTF8 sequence in input
```
This is when adding this validation test in Python:
```diff
diff --git a/python/pyarrow/tests/test_compute.py
b/python/pyarrow/tests/test_compute.py
index 7c5a134d33..d1eb605c71 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -561,7 +561,8 @@ def test_slice_compatibility():
def test_binary_slice_compatibility():
- arr = pa.array([b"", b"a", b"a\xff", b"ab\x00", b"abc\xfb",
b"ab\xf2de"])
+ data = [b"", b"a", b"a\xff", b"ab\x00", b"abc\xfb", b"ab\xf2de"]
+ arr = pa.array(data)
for start, stop, step in itertools.product(range(-6, 6),
range(-6, 6),
range(-3, 4)):
@@ -574,6 +575,13 @@ def test_binary_slice_compatibility():
assert expected.equals(result)
# Positional options
assert pc.binary_slice(arr, start, stop, step) == result
+ # Fixed size binary input / output
+ for item in data:
+ fsb_scalar = pa.scalar(item, type=pa.binary(len(item)))
+ expected = item[start:stop:step]
+ actual = pc.binary_slice(fsb_scalar, start, stop, step)
+ assert actual.type == pa.binary(len(expected))
+ assert actual.as_py() == expected
def test_split_pattern():
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]