This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 671b53c ARROW-2046: [Python] Support path-like objects
671b53c is described below
commit 671b53c36bbf9d2cced39d5c8ca291adee859d68
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Feb 28 14:49:45 2018 -0500
ARROW-2046: [Python] Support path-like objects
IO functions accepting string filenames should also accept PEP 519 path
objects such as pathlib.Path (on Python 3.6 and later).
Author: Antoine Pitrou <[email protected]>
Closes #1675 from pitrou/ARROW-2046-path-like-objects and squashes the
following commits:
c449bfe7 <Antoine Pitrou> ARROW-2046: Support path-like objects
---
python/pyarrow/_parquet.pyx | 12 +++++----
python/pyarrow/io.pxi | 42 +++++++++++++++++++++---------
python/pyarrow/tests/test_parquet.py | 14 ++++++++++
python/pyarrow/tests/test_serialization.py | 17 ++++++++----
4 files changed, 63 insertions(+), 22 deletions(-)
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 9061ed5..e513e1d 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -31,7 +31,7 @@ from pyarrow.lib cimport (Array, Schema,
NativeFile, get_reader, get_writer)
from pyarrow.compat import tobytes, frombytes
-from pyarrow.lib import ArrowException, NativeFile
+from pyarrow.lib import ArrowException, NativeFile, _stringify_path
import six
@@ -825,15 +825,17 @@ cdef class ParquetWriter:
c_string c_where
CMemoryPool* pool
- if isinstance(where, six.string_types):
+ try:
+ where = _stringify_path(where)
+ except TypeError:
+ get_writer(where, &self.sink)
+ self.own_sink = False
+ else:
c_where = tobytes(where)
with nogil:
check_status(FileOutputStream.Open(c_where,
&self.sink))
self.own_sink = True
- else:
- get_writer(where, &self.sink)
- self.own_sink = False
self.use_dictionary = use_dictionary
self.compression = compression
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 0b444cd..325c582 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -38,6 +38,18 @@ cdef extern from "Python.h":
char *v, Py_ssize_t len) except NULL
+def _stringify_path(path):
+ """
+ Convert *path* to a string or unicode path if possible.
+ """
+ if isinstance(path, six.string_types):
+ return path
+ try:
+ return path.__fspath__()
+ except AttributeError:
+ raise TypeError("not a path-like object")
+
+
cdef class NativeFile:
def __cinit__(self):
self.closed = True
@@ -834,13 +846,16 @@ def frombuffer(object obj):
cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader):
cdef NativeFile nf
- if isinstance(source, six.string_types):
- source = memory_map(source, mode='r')
- elif isinstance(source, Buffer):
- source = BufferReader(source)
- elif not isinstance(source, NativeFile) and hasattr(source, 'read'):
- # Optimistically hope this is file-like
- source = PythonFile(source, mode='r')
+ try:
+ source_path = _stringify_path(source)
+ except TypeError:
+ if isinstance(source, Buffer):
+ source = BufferReader(source)
+ elif not isinstance(source, NativeFile) and hasattr(source, 'read'):
+ # Optimistically hope this is file-like
+ source = PythonFile(source, mode='r')
+ else:
+ source = memory_map(source_path, mode='r')
if isinstance(source, NativeFile):
nf = source
@@ -858,11 +873,14 @@ cdef get_reader(object source,
shared_ptr[RandomAccessFile]* reader):
cdef get_writer(object source, shared_ptr[OutputStream]* writer):
cdef NativeFile nf
- if isinstance(source, six.string_types):
- source = OSFile(source, mode='w')
- elif not isinstance(source, NativeFile) and hasattr(source, 'write'):
- # Optimistically hope this is file-like
- source = PythonFile(source, mode='w')
+ try:
+ source_path = _stringify_path(source)
+ except TypeError:
+ if not isinstance(source, NativeFile) and hasattr(source, 'write'):
+ # Optimistically hope this is file-like
+ source = PythonFile(source, mode='w')
+ else:
+ source = OSFile(source_path, mode='w')
if isinstance(source, NativeFile):
nf = source
diff --git a/python/pyarrow/tests/test_parquet.py
b/python/pyarrow/tests/test_parquet.py
index bd76feb..187971f 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -22,6 +22,7 @@ import decimal
import io
import json
import os
+import sys
import pytest
@@ -265,6 +266,19 @@ def test_pandas_parquet_1_0_rountrip(tmpdir):
@parquet
[email protected](sys.version_info < (3, 6), reason="need Python 3.6")
+def test_path_objects(tmpdir):
+ # Test compatibility with PEP 519 path-like objects
+ import pathlib
+ p = pathlib.Path(tmpdir) / 'zzz.parquet'
+ df = pd.DataFrame({'x': np.arange(10, dtype=np.int64)})
+ _write_table(df, p)
+ table_read = _read_table(p)
+ df_read = table_read.to_pandas()
+ tm.assert_frame_equal(df, df_read)
+
+
+@parquet
def test_pandas_column_selection(tmpdir):
size = 10000
np.random.seed(0)
diff --git a/python/pyarrow/tests/test_serialization.py
b/python/pyarrow/tests/test_serialization.py
index feccebb..3ee02cb 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -584,15 +584,11 @@ def _get_modified_env_with_pythonpath():
# Prepend pyarrow root directory to PYTHONPATH
env = os.environ.copy()
existing_pythonpath = env.get('PYTHONPATH', '')
- if sys.platform == 'win32':
- sep = ';'
- else:
- sep = ':'
module_path = os.path.abspath(
os.path.dirname(os.path.dirname(pa.__file__)))
- env['PYTHONPATH'] = sep.join((module_path, existing_pythonpath))
+ env['PYTHONPATH'] = os.pathsep.join((module_path, existing_pythonpath))
return env
@@ -650,3 +646,14 @@ def test_set_pickle():
serialized = pa.serialize(test_object, context=context).to_buffer()
deserialized = pa.deserialize(serialized.to_pybytes(), context=context)
assert deserialized == b'custom serialization 2'
+
+
[email protected](sys.version_info < (3, 6), reason="need Python 3.6")
+def test_path_objects(tmpdir):
+ # Test compatibility with PEP 519 path-like objects
+ import pathlib
+ p = pathlib.Path(tmpdir) / 'zzz.bin'
+ obj = 1234
+ pa.serialize_to(obj, p)
+ res = pa.deserialize_from(p, None)
+ assert res == obj
--
To stop receiving notification emails like this one, please contact
[email protected].