This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 9391951 ARROW-12506: [Python] Improve modularity of pyarrow codebase:
_hdfsio module
9391951 is described below
commit 939195183657daa2060970b6fcd1938eab53d44b
Author: Alessandro Molina <[email protected]>
AuthorDate: Thu Apr 29 10:14:29 2021 +0200
ARROW-12506: [Python] Improve modularity of pyarrow codebase: _hdfsio module
Second batch of changes related to making pyarrow build more modular.
`hdfs-io` is no longer included in `pyarrow.lib` but has been separated to its
own module.
This PR is based on https://github.com/apache/arrow/pull/10131
Closes #10159 from amol-/ARROW-12506-2
Authored-by: Alessandro Molina <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
python/CMakeLists.txt | 3 ++-
python/pyarrow/__init__.py | 8 +++++---
python/pyarrow/{io-hdfs.pxi => _hdfsio.pyx} | 10 ++++++++++
python/pyarrow/hdfs.py | 4 ++--
python/pyarrow/io.pxi | 1 +
python/pyarrow/lib.pyx | 1 -
python/setup.py | 1 +
7 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 3058431..3ed518d 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -387,10 +387,11 @@ endif()
set(CYTHON_EXTENSIONS
lib
- _fs
_compute
_csv
_feather
+ _fs
+ _hdfsio
_json)
set(LINK_LIBS arrow_shared arrow_python_shared)
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index adfd69c..1488f5c 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -164,15 +164,17 @@ from pyarrow.lib import (MemoryPool, LoggingMemoryPool,
ProxyMemoryPool,
log_memory_allocations, jemalloc_set_decay_ms)
# I/O
-from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
+from pyarrow.lib import (NativeFile, PythonFile,
BufferedInputStream, BufferedOutputStream,
CompressedInputStream, CompressedOutputStream,
TransformInputStream, transcoding_input_stream,
FixedSizeBufferWriter,
BufferReader, BufferOutputStream,
OSFile, MemoryMappedFile, memory_map,
- create_memory_map, have_libhdfs,
- MockOutputStream, input_stream, output_stream)
+ create_memory_map, MockOutputStream,
+ input_stream, output_stream)
+
+from pyarrow._hdfsio import HdfsFile, have_libhdfs
from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
concat_arrays, concat_tables)
diff --git a/python/pyarrow/io-hdfs.pxi b/python/pyarrow/_hdfsio.pyx
similarity index 97%
rename from python/pyarrow/io-hdfs.pxi
rename to python/pyarrow/_hdfsio.pyx
index 2cdb1b7..b864f8a 100644
--- a/python/pyarrow/io-hdfs.pxi
+++ b/python/pyarrow/_hdfsio.pyx
@@ -18,6 +18,16 @@
# ----------------------------------------------------------------------
# HDFS IO implementation
+# cython: language_level = 3
+
+import re
+
+from pyarrow.lib cimport check_status, _Weakrefable, NativeFile
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow.lib import frombytes, tobytes, ArrowIOError
+
from queue import Queue, Empty as QueueEmpty, Full as QueueFull
diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py
index c4daac9..56667bd 100644
--- a/python/pyarrow/hdfs.py
+++ b/python/pyarrow/hdfs.py
@@ -23,10 +23,10 @@ import warnings
from pyarrow.util import implements, _DEPR_MSG
from pyarrow.filesystem import FileSystem
-import pyarrow.lib as lib
+import pyarrow._hdfsio as _hdfsio
-class HadoopFileSystem(lib.HadoopFileSystem, FileSystem):
+class HadoopFileSystem(_hdfsio.HadoopFileSystem, FileSystem):
"""
DEPRECATED: FileSystem interface for HDFS cluster.
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 3fc0984..9c501ad 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -27,6 +27,7 @@ import threading
import time
import warnings
from io import BufferedIOBase, IOBase, TextIOBase, UnsupportedOperation
+from queue import Queue, Empty as QueueEmpty
from pyarrow.util import _is_path_like, _stringify_path
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 1866d07..191250b 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -140,7 +140,6 @@ include "tensor.pxi"
# File IO
include "io.pxi"
-include "io-hdfs.pxi"
# IPC / Messaging
include "ipc.pxi"
diff --git a/python/setup.py b/python/setup.py
index b4de579..24d5480 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -203,6 +203,7 @@ class build_ext(_build_ext):
'_plasma',
'_s3fs',
'_hdfs',
+ '_hdfsio',
'gandiva']
def _run_cmake(self):