milesgranger commented on code in PR #13793:
URL: https://github.com/apache/arrow/pull/13793#discussion_r937457037
##########
python/pyarrow/tests/test_io.py:
##########
@@ -126,6 +126,44 @@ def test_python_file_read():
pa.PythonFile(StringIO(), mode='r')
[email protected]("nbytes", (0, 1, 5, 100))
[email protected]("file_offset", (0, 5, 100))
+def test_python_file_get_stream(nbytes, file_offset):
+
+ data = b'data1data2data3data4data5'
+
+ f = pa.PythonFile(BytesIO(data), mode='r')
+ stream = f.get_stream(file_offset=file_offset, nbytes=nbytes)
+
+ # Subsequent calls to 'read' should match behavior if same
+ # data passed to BytesIO where get_stream should handle if
+ # nbytes/file_offset results in no bytes b/c out of bounds.
+ start = min(file_offset, len(data))
+ end = min(file_offset + nbytes, len(data))
+ buf = BytesIO(data[start:end])
+
+ # read some chunks
+ assert stream.read(nbytes=4) == buf.read(4)
+ assert stream.read(nbytes=6) == buf.read(6)
+
+ # Read to end of each stream
+ assert stream.read() == buf.read()
+
+ # Try reading passed the stream
+ n = len(data) * 2
+ assert stream.read(n) == buf.read(n)
+
+ # NativeFile[CInputStream] is not seekable
+ with pytest.raises(OSError) as e:
+ stream.seek(0)
+
+ # some error about not being seekable
+ assert e.match("seekable")
+
+ stream.close()
+ assert stream.closed
+
+
Review Comment:
Seems like the python `get_stream` should raise ValueErrors if either of
these are negative. Or at least if `file_offset` is negative right away instead
of waiting until `read()` What do you think?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]