This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 671b53c  ARROW-2046: [Python] Support path-like objects
671b53c is described below

commit 671b53c36bbf9d2cced39d5c8ca291adee859d68
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Feb 28 14:49:45 2018 -0500

    ARROW-2046: [Python] Support path-like objects
    
    IO functions accepting string filenames should also accept PEP 519 path 
objects such as pathlib.Path (on Python 3.6 and later).
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #1675 from pitrou/ARROW-2046-path-like-objects and squashes the 
following commits:
    
    c449bfe7 <Antoine Pitrou> ARROW-2046:  Support path-like objects
---
 python/pyarrow/_parquet.pyx                | 12 +++++----
 python/pyarrow/io.pxi                      | 42 +++++++++++++++++++++---------
 python/pyarrow/tests/test_parquet.py       | 14 ++++++++++
 python/pyarrow/tests/test_serialization.py | 17 ++++++++----
 4 files changed, 63 insertions(+), 22 deletions(-)

diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 9061ed5..e513e1d 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -31,7 +31,7 @@ from pyarrow.lib cimport (Array, Schema,
                           NativeFile, get_reader, get_writer)
 
 from pyarrow.compat import tobytes, frombytes
-from pyarrow.lib import ArrowException, NativeFile
+from pyarrow.lib import ArrowException, NativeFile, _stringify_path
 
 import six
 
@@ -825,15 +825,17 @@ cdef class ParquetWriter:
             c_string c_where
             CMemoryPool* pool
 
-        if isinstance(where, six.string_types):
+        try:
+            where = _stringify_path(where)
+        except TypeError:
+            get_writer(where, &self.sink)
+            self.own_sink = False
+        else:
             c_where = tobytes(where)
             with nogil:
                 check_status(FileOutputStream.Open(c_where,
                                                    &self.sink))
             self.own_sink = True
-        else:
-            get_writer(where, &self.sink)
-            self.own_sink = False
 
         self.use_dictionary = use_dictionary
         self.compression = compression
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 0b444cd..325c582 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -38,6 +38,18 @@ cdef extern from "Python.h":
         char *v, Py_ssize_t len) except NULL
 
 
+def _stringify_path(path):
+    """
+    Convert *path* to a string or unicode path if possible.
+    """
+    if isinstance(path, six.string_types):
+        return path
+    try:
+        return path.__fspath__()
+    except AttributeError:
+        raise TypeError("not a path-like object")
+
+
 cdef class NativeFile:
     def __cinit__(self):
         self.closed = True
@@ -834,13 +846,16 @@ def frombuffer(object obj):
 cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader):
     cdef NativeFile nf
 
-    if isinstance(source, six.string_types):
-        source = memory_map(source, mode='r')
-    elif isinstance(source, Buffer):
-        source = BufferReader(source)
-    elif not isinstance(source, NativeFile) and hasattr(source, 'read'):
-        # Optimistically hope this is file-like
-        source = PythonFile(source, mode='r')
+    try:
+        source_path = _stringify_path(source)
+    except TypeError:
+        if isinstance(source, Buffer):
+            source = BufferReader(source)
+        elif not isinstance(source, NativeFile) and hasattr(source, 'read'):
+            # Optimistically hope this is file-like
+            source = PythonFile(source, mode='r')
+    else:
+        source = memory_map(source_path, mode='r')
 
     if isinstance(source, NativeFile):
         nf = source
@@ -858,11 +873,14 @@ cdef get_reader(object source, 
shared_ptr[RandomAccessFile]* reader):
 cdef get_writer(object source, shared_ptr[OutputStream]* writer):
     cdef NativeFile nf
 
-    if isinstance(source, six.string_types):
-        source = OSFile(source, mode='w')
-    elif not isinstance(source, NativeFile) and hasattr(source, 'write'):
-        # Optimistically hope this is file-like
-        source = PythonFile(source, mode='w')
+    try:
+        source_path = _stringify_path(source)
+    except TypeError:
+        if not isinstance(source, NativeFile) and hasattr(source, 'write'):
+            # Optimistically hope this is file-like
+            source = PythonFile(source, mode='w')
+    else:
+        source = OSFile(source_path, mode='w')
 
     if isinstance(source, NativeFile):
         nf = source
diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index bd76feb..187971f 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -22,6 +22,7 @@ import decimal
 import io
 import json
 import os
+import sys
 
 import pytest
 
@@ -265,6 +266,19 @@ def test_pandas_parquet_1_0_rountrip(tmpdir):
 
 
 @parquet
[email protected](sys.version_info < (3, 6), reason="need Python 3.6")
+def test_path_objects(tmpdir):
+    # Test compatibility with PEP 519 path-like objects
+    import pathlib
+    p = pathlib.Path(tmpdir) / 'zzz.parquet'
+    df = pd.DataFrame({'x': np.arange(10, dtype=np.int64)})
+    _write_table(df, p)
+    table_read = _read_table(p)
+    df_read = table_read.to_pandas()
+    tm.assert_frame_equal(df, df_read)
+
+
+@parquet
 def test_pandas_column_selection(tmpdir):
     size = 10000
     np.random.seed(0)
diff --git a/python/pyarrow/tests/test_serialization.py 
b/python/pyarrow/tests/test_serialization.py
index feccebb..3ee02cb 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -584,15 +584,11 @@ def _get_modified_env_with_pythonpath():
     # Prepend pyarrow root directory to PYTHONPATH
     env = os.environ.copy()
     existing_pythonpath = env.get('PYTHONPATH', '')
-    if sys.platform == 'win32':
-        sep = ';'
-    else:
-        sep = ':'
 
     module_path = os.path.abspath(
         os.path.dirname(os.path.dirname(pa.__file__)))
 
-    env['PYTHONPATH'] = sep.join((module_path, existing_pythonpath))
+    env['PYTHONPATH'] = os.pathsep.join((module_path, existing_pythonpath))
     return env
 
 
@@ -650,3 +646,14 @@ def test_set_pickle():
     serialized = pa.serialize(test_object, context=context).to_buffer()
     deserialized = pa.deserialize(serialized.to_pybytes(), context=context)
     assert deserialized == b'custom serialization 2'
+
+
[email protected](sys.version_info < (3, 6), reason="need Python 3.6")
+def test_path_objects(tmpdir):
+    # Test compatibility with PEP 519 path-like objects
+    import pathlib
+    p = pathlib.Path(tmpdir) / 'zzz.bin'
+    obj = 1234
+    pa.serialize_to(obj, p)
+    res = pa.deserialize_from(p, None)
+    assert res == obj

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to