pitrou commented on code in PR #13793:
URL: https://github.com/apache/arrow/pull/13793#discussion_r937424893


##########
python/pyarrow/tests/test_io.py:
##########
@@ -126,6 +126,44 @@ def test_python_file_read():
         pa.PythonFile(StringIO(), mode='r')
 
 
[email protected]("nbytes", (0, 1, 5, 100))
[email protected]("file_offset", (0, 5, 100))
+def test_python_file_get_stream(nbytes, file_offset):
+
+    data = b'data1data2data3data4data5'
+
+    f = pa.PythonFile(BytesIO(data), mode='r')
+    stream = f.get_stream(file_offset=file_offset, nbytes=nbytes)
+
+    # Subsequent calls to 'read' should match behavior if same
+    # data passed to BytesIO where get_stream should handle if
+    # nbytes/file_offset results in no bytes b/c out of bounds.
+    start = min(file_offset, len(data))
+    end = min(file_offset + nbytes, len(data))
+    buf = BytesIO(data[start:end])
+
+    # read some chunks
+    assert stream.read(nbytes=4) == buf.read(4)
+    assert stream.read(nbytes=6) == buf.read(6)
+
+    # Read to end of each stream
+    assert stream.read() == buf.read()
+
+    # Try reading passed the stream

Review Comment:
   I think there is a misspelling? (not a native English speaker though...)
   ```suggestion
       # Try reading past the stream
   ```



##########
python/pyarrow/tests/test_io.py:
##########
@@ -126,6 +126,44 @@ def test_python_file_read():
         pa.PythonFile(StringIO(), mode='r')
 
 
[email protected]("nbytes", (0, 1, 5, 100))
[email protected]("file_offset", (0, 5, 100))
+def test_python_file_get_stream(nbytes, file_offset):
+
+    data = b'data1data2data3data4data5'
+
+    f = pa.PythonFile(BytesIO(data), mode='r')
+    stream = f.get_stream(file_offset=file_offset, nbytes=nbytes)
+
+    # Subsequent calls to 'read' should match behavior if same
+    # data passed to BytesIO where get_stream should handle if
+    # nbytes/file_offset results in no bytes b/c out of bounds.
+    start = min(file_offset, len(data))
+    end = min(file_offset + nbytes, len(data))
+    buf = BytesIO(data[start:end])
+
+    # read some chunks
+    assert stream.read(nbytes=4) == buf.read(4)
+    assert stream.read(nbytes=6) == buf.read(6)
+
+    # Read to end of each stream
+    assert stream.read() == buf.read()
+
+    # Try reading passed the stream
+    n = len(data) * 2
+    assert stream.read(n) == buf.read(n)
+
+    # NativeFile[CInputStream] is not seekable
+    with pytest.raises(OSError) as e:
+        stream.seek(0)
+
+    # some error about not being seekable
+    assert e.match("seekable")
+
+    stream.close()
+    assert stream.closed
+
+

Review Comment:
   A couple more questions: what happens if either the offset or the length is 
negative?



##########
python/pyarrow/tests/test_io.py:
##########
@@ -126,6 +126,44 @@ def test_python_file_read():
         pa.PythonFile(StringIO(), mode='r')
 
 
[email protected]("nbytes", (0, 1, 5, 100))
[email protected]("file_offset", (0, 5, 100))
+def test_python_file_get_stream(nbytes, file_offset):
+
+    data = b'data1data2data3data4data5'
+
+    f = pa.PythonFile(BytesIO(data), mode='r')
+    stream = f.get_stream(file_offset=file_offset, nbytes=nbytes)
+
+    # Subsequent calls to 'read' should match behavior if same
+    # data passed to BytesIO where get_stream should handle if
+    # nbytes/file_offset results in no bytes b/c out of bounds.
+    start = min(file_offset, len(data))
+    end = min(file_offset + nbytes, len(data))
+    buf = BytesIO(data[start:end])
+
+    # read some chunks
+    assert stream.read(nbytes=4) == buf.read(4)
+    assert stream.read(nbytes=6) == buf.read(6)
+
+    # Read to end of each stream
+    assert stream.read() == buf.read()
+
+    # Try reading passed the stream
+    n = len(data) * 2
+    assert stream.read(n) == buf.read(n)
+
+    # NativeFile[CInputStream] is not seekable
+    with pytest.raises(OSError) as e:
+        stream.seek(0)
+
+    # some error about not being seekable
+    assert e.match("seekable")

Review Comment:
   This can be written a bit more succinctly:
   ```suggestion
       # NativeFile[CInputStream] is not seekable
       with pytest.raises(OSError, match="seekable") as e:
           stream.seek(0)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to