This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9391951  ARROW-12506: [Python] Improve modularity of pyarrow codebase: 
_hdfsio module
9391951 is described below

commit 939195183657daa2060970b6fcd1938eab53d44b
Author: Alessandro Molina <[email protected]>
AuthorDate: Thu Apr 29 10:14:29 2021 +0200

    ARROW-12506: [Python] Improve modularity of pyarrow codebase: _hdfsio module
    
    Second batch of changes related to making pyarrow build more modular. 
`hdfs-io` is no longer included in `pyarrow.lib` but has been separated to its 
own module.
    
    This PR is based on https://github.com/apache/arrow/pull/10131
    
    Closes #10159 from amol-/ARROW-12506-2
    
    Authored-by: Alessandro Molina <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 python/CMakeLists.txt                       |  3 ++-
 python/pyarrow/__init__.py                  |  8 +++++---
 python/pyarrow/{io-hdfs.pxi => _hdfsio.pyx} | 10 ++++++++++
 python/pyarrow/hdfs.py                      |  4 ++--
 python/pyarrow/io.pxi                       |  1 +
 python/pyarrow/lib.pyx                      |  1 -
 python/setup.py                             |  1 +
 7 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 3058431..3ed518d 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -387,10 +387,11 @@ endif()
 
 set(CYTHON_EXTENSIONS
     lib
-    _fs
     _compute
     _csv
     _feather
+    _fs
+    _hdfsio
     _json)
 
 set(LINK_LIBS arrow_shared arrow_python_shared)
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index adfd69c..1488f5c 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -164,15 +164,17 @@ from pyarrow.lib import (MemoryPool, LoggingMemoryPool, 
ProxyMemoryPool,
                          log_memory_allocations, jemalloc_set_decay_ms)
 
 # I/O
-from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
+from pyarrow.lib import (NativeFile, PythonFile,
                          BufferedInputStream, BufferedOutputStream,
                          CompressedInputStream, CompressedOutputStream,
                          TransformInputStream, transcoding_input_stream,
                          FixedSizeBufferWriter,
                          BufferReader, BufferOutputStream,
                          OSFile, MemoryMappedFile, memory_map,
-                         create_memory_map, have_libhdfs,
-                         MockOutputStream, input_stream, output_stream)
+                         create_memory_map, MockOutputStream,
+                         input_stream, output_stream)
+
+from pyarrow._hdfsio import HdfsFile, have_libhdfs
 
 from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
                          concat_arrays, concat_tables)
diff --git a/python/pyarrow/io-hdfs.pxi b/python/pyarrow/_hdfsio.pyx
similarity index 97%
rename from python/pyarrow/io-hdfs.pxi
rename to python/pyarrow/_hdfsio.pyx
index 2cdb1b7..b864f8a 100644
--- a/python/pyarrow/io-hdfs.pxi
+++ b/python/pyarrow/_hdfsio.pyx
@@ -18,6 +18,16 @@
 # ----------------------------------------------------------------------
 # HDFS IO implementation
 
+# cython: language_level = 3
+
+import re
+
+from pyarrow.lib cimport check_status, _Weakrefable, NativeFile
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow.lib import frombytes, tobytes, ArrowIOError
+
 from queue import Queue, Empty as QueueEmpty, Full as QueueFull
 
 
diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py
index c4daac9..56667bd 100644
--- a/python/pyarrow/hdfs.py
+++ b/python/pyarrow/hdfs.py
@@ -23,10 +23,10 @@ import warnings
 
 from pyarrow.util import implements, _DEPR_MSG
 from pyarrow.filesystem import FileSystem
-import pyarrow.lib as lib
+import pyarrow._hdfsio as _hdfsio
 
 
-class HadoopFileSystem(lib.HadoopFileSystem, FileSystem):
+class HadoopFileSystem(_hdfsio.HadoopFileSystem, FileSystem):
     """
     DEPRECATED: FileSystem interface for HDFS cluster.
 
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 3fc0984..9c501ad 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -27,6 +27,7 @@ import threading
 import time
 import warnings
 from io import BufferedIOBase, IOBase, TextIOBase, UnsupportedOperation
+from queue import Queue, Empty as QueueEmpty
 
 from pyarrow.util import _is_path_like, _stringify_path
 
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 1866d07..191250b 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -140,7 +140,6 @@ include "tensor.pxi"
 
 # File IO
 include "io.pxi"
-include "io-hdfs.pxi"
 
 # IPC / Messaging
 include "ipc.pxi"
diff --git a/python/setup.py b/python/setup.py
index b4de579..24d5480 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -203,6 +203,7 @@ class build_ext(_build_ext):
         '_plasma',
         '_s3fs',
         '_hdfs',
+        '_hdfsio',
         'gandiva']
 
     def _run_cmake(self):

Reply via email to