[ https://issues.apache.org/jira/browse/ARROW-2292?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16395627#comment-16395627 ]
ASF GitHub Bot commented on ARROW-2292: --------------------------------------- wesm closed pull request #1736: ARROW-2292: [Python] Rename frombuffer() to py_buffer() URL: https://github.com/apache/arrow/pull/1736 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst index 3db1a04b6d..cb9993302a 100644 --- a/python/doc/source/api.rst +++ b/python/doc/source/api.rst @@ -213,7 +213,7 @@ Input / Output and Shared Memory allocate_buffer compress decompress - frombuffer + py_buffer foreign_buffer Buffer ResizableBuffer diff --git a/python/doc/source/ipc.rst b/python/doc/source/ipc.rst index bce8b1ed1e..c77888ab90 100644 --- a/python/doc/source/ipc.rst +++ b/python/doc/source/ipc.rst @@ -296,12 +296,13 @@ which are zero-copy convertible to Python ``memoryview`` objects: memoryview(components['data'][0]) -A memoryview can be converted back to a ``Buffer`` with ``pyarrow.frombuffer``: +A memoryview can be converted back to a Arrow ``Buffer`` with +``pyarrow.py_buffer``: .. ipython:: python mv = memoryview(components['data'][0]) - buf = pa.frombuffer(mv) + buf = pa.py_buffer(mv) An object can be reconstructed from its component-based representation using ``deserialize_components``: diff --git a/python/doc/source/memory.rst b/python/doc/source/memory.rst index 4806bbb857..34664b898f 100644 --- a/python/doc/source/memory.rst +++ b/python/doc/source/memory.rst @@ -50,7 +50,7 @@ implements the buffer protocol. Let's consider a bytes object: import pyarrow as pa data = b'abcdefghijklmnopqrstuvwxyz' - buf = pa.frombuffer(data) + buf = pa.py_buffer(data) buf buf.size diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 965a37b4b9..bfd7d4db98 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -28,7 +28,8 @@ def parse_version(root): from setuptools_scm import version_from_scm import setuptools_scm.git - describe = setuptools_scm.git.DEFAULT_DESCRIBE + " --match 'apache-arrow-[0-9]*'" + describe = (setuptools_scm.git.DEFAULT_DESCRIBE + + " --match 'apache-arrow-[0-9]*'") # Strip catchall from the commandline describe = describe.replace("--match *.*", "") version = setuptools_scm.git.parse(root, describe) @@ -86,8 +87,8 @@ def parse_version(root): from pyarrow.lib import TimestampType # Buffers, allocation -from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, compress, - decompress, allocate_buffer, frombuffer) +from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer, + compress, decompress, allocate_buffer) from pyarrow.lib import (MemoryPool, total_allocated_bytes, set_memory_pool, default_memory_pool, @@ -163,7 +164,9 @@ def _plasma_store_entry_point(): # ---------------------------------------------------------------------- # Deprecations -from pyarrow.util import _deprecate_class # noqa +from pyarrow.util import _deprecate_api # noqa + +frombuffer = _deprecate_api('frombuffer', 'py_buffer', py_buffer, '0.9.0') # ---------------------------------------------------------------------- # Returning absolute path to the pyarrow include directory (if bundled, e.g. in diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 15ecd0164e..3947323233 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -26,6 +26,7 @@ import six import sys import threading import time +import warnings # 64K @@ -211,7 +212,7 @@ cdef class NativeFile: if isinstance(data, six.string_types): data = tobytes(data) - cdef Buffer arrow_buffer = frombuffer(data) + cdef Buffer arrow_buffer = py_buffer(data) cdef const uint8_t* buf = arrow_buffer.buffer.get().data() cdef int64_t bufsize = len(arrow_buffer) @@ -833,14 +834,14 @@ cdef class BufferReader(NativeFile): if isinstance(obj, Buffer): self.buffer = obj else: - self.buffer = frombuffer(obj) + self.buffer = py_buffer(obj) self.rd_file.reset(new CBufferReader(self.buffer.buffer)) self.is_readable = True self.closed = False -def frombuffer(object obj): +def py_buffer(object obj): """ Construct an Arrow buffer from a Python bytes object """ @@ -966,7 +967,7 @@ def compress(object buf, codec='lz4', asbytes=False, memory_pool=None): check_status(CCodec.Create(c_codec, &compressor)) if not isinstance(buf, Buffer): - buf = frombuffer(buf) + buf = py_buffer(buf) c_buf = (<Buffer> buf).buffer.get() @@ -1031,7 +1032,7 @@ def decompress(object buf, decompressed_size=None, codec='lz4', check_status(CCodec.Create(c_codec, &compressor)) if not isinstance(buf, Buffer): - buf = frombuffer(buf) + buf = py_buffer(buf) c_buf = (<Buffer> buf).buffer.get() diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py index bdf7535796..6c8df350bf 100644 --- a/python/pyarrow/serialization.py +++ b/python/pyarrow/serialization.py @@ -23,7 +23,7 @@ from pyarrow.compat import builtin_pickle from pyarrow.lib import (SerializationContext, _default_serialization_context, - frombuffer) + py_buffer) try: import cloudpickle @@ -46,7 +46,7 @@ def _deserialize_numpy_array_list(data): def _pickle_to_buffer(x): pickled = builtin_pickle.dumps(x, protocol=builtin_pickle.HIGHEST_PROTOCOL) - return frombuffer(pickled) + return py_buffer(pickled) def _load_pickle_from_buffer(data): diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index fe680133b4..591381085c 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -170,7 +170,7 @@ def test_python_file_closing(): def test_buffer_bytes(): val = b'some data' - buf = pa.frombuffer(val) + buf = pa.py_buffer(val) assert isinstance(buf, pa.Buffer) assert not buf.is_mutable @@ -182,7 +182,7 @@ def test_buffer_bytes(): def test_buffer_memoryview(): val = b'some data' - buf = pa.frombuffer(val) + buf = pa.py_buffer(val) assert isinstance(buf, pa.Buffer) assert not buf.is_mutable @@ -194,7 +194,7 @@ def test_buffer_memoryview(): def test_buffer_bytearray(): val = bytearray(b'some data') - buf = pa.frombuffer(val) + buf = pa.py_buffer(val) assert isinstance(buf, pa.Buffer) assert buf.is_mutable @@ -206,14 +206,14 @@ def test_buffer_bytearray(): def test_buffer_invalid(): with pytest.raises(TypeError, match="(bytes-like object|buffer interface)"): - pa.frombuffer(None) + pa.py_buffer(None) def test_buffer_to_numpy(): # Make sure creating a numpy array from an arrow buffer works byte_array = bytearray(20) byte_array[0] = 42 - buf = pa.frombuffer(byte_array) + buf = pa.py_buffer(byte_array) array = np.frombuffer(buf, dtype="uint8") assert array[0] == byte_array[0] byte_array[0] += 1 @@ -224,14 +224,14 @@ def test_buffer_to_numpy(): def test_buffer_from_numpy(): # C-contiguous arr = np.arange(12, dtype=np.int8).reshape((3, 4)) - buf = pa.frombuffer(arr) + buf = pa.py_buffer(arr) assert buf.to_pybytes() == arr.tobytes() # F-contiguous; note strides informations is lost - buf = pa.frombuffer(arr.T) + buf = pa.py_buffer(arr.T) assert buf.to_pybytes() == arr.tobytes() # Non-contiguous with pytest.raises(ValueError, match="not contiguous"): - buf = pa.frombuffer(arr.T[::2]) + buf = pa.py_buffer(arr.T[::2]) def test_buffer_equals(): @@ -250,11 +250,11 @@ def ne(a, b): b2 = bytearray(b1) b3 = bytearray(b1) b3[0] = 42 - buf1 = pa.frombuffer(b1) - buf2 = pa.frombuffer(b2) - buf3 = pa.frombuffer(b2) - buf4 = pa.frombuffer(b3) - buf5 = pa.frombuffer(np.frombuffer(b2, dtype=np.int16)) + buf1 = pa.py_buffer(b1) + buf2 = pa.py_buffer(b2) + buf3 = pa.py_buffer(b2) + buf4 = pa.py_buffer(b3) + buf5 = pa.py_buffer(np.frombuffer(b2, dtype=np.int16)) eq(buf1, buf1) eq(buf1, buf2) eq(buf2, buf3) @@ -266,7 +266,7 @@ def ne(a, b): def test_buffer_hashing(): # Buffers are unhashable with pytest.raises(TypeError, match="unhashable"): - hash(pa.frombuffer(b'123')) + hash(pa.py_buffer(b'123')) def test_foreign_buffer(): @@ -307,7 +307,7 @@ def test_compress_decompress(): test_data = (np.random.randint(0, 255, size=INPUT_SIZE) .astype(np.uint8) .tostring()) - test_buf = pa.frombuffer(test_data) + test_buf = pa.py_buffer(test_data) codecs = ['lz4', 'snappy', 'gzip', 'zstd', 'brotli'] for codec in codecs: @@ -333,7 +333,7 @@ def test_compress_decompress(): def test_buffer_memoryview_is_immutable(): val = b'some data' - buf = pa.frombuffer(val) + buf = pa.py_buffer(val) assert not buf.is_mutable assert isinstance(buf, pa.Buffer) @@ -368,9 +368,9 @@ def test_uninitialized_buffer(): with check_uninitialized(): memoryview(buf) with check_uninitialized(): - buf.equals(pa.frombuffer(b'')) + buf.equals(pa.py_buffer(b'')) with check_uninitialized(): - pa.frombuffer(b'').equals(buf) + pa.py_buffer(b'').equals(buf) def test_memory_output_stream(): @@ -400,7 +400,7 @@ def test_inmemory_write_after_closed(): def test_buffer_protocol_ref_counting(): def make_buffer(bytes_obj): - return bytearray(pa.frombuffer(bytes_obj)) + return bytearray(pa.py_buffer(bytes_obj)) buf = make_buffer(b'foo') gc.collect() diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py index 64aab06712..7ddf3958e5 100644 --- a/python/pyarrow/tests/test_serialization.py +++ b/python/pyarrow/tests/test_serialization.py @@ -445,7 +445,7 @@ class BufferClass(object): pass def serialize_buffer_class(obj): - return pa.frombuffer(b"hello") + return pa.py_buffer(b"hello") def deserialize_buffer_class(serialized_obj): return serialized_obj @@ -581,7 +581,7 @@ def test_serialize_subclasses(): def test_serialize_to_components_invalid_cases(): - buf = pa.frombuffer(b'hello') + buf = pa.py_buffer(b'hello') components = { 'num_tensors': 0, @@ -631,7 +631,7 @@ def test_deserialize_buffer_in_different_process(): import subprocess f = tempfile.NamedTemporaryFile(delete=False) - b = pa.serialize(pa.frombuffer(b'hello')).to_buffer() + b = pa.serialize(pa.py_buffer(b'hello')).to_buffer() f.write(b.to_pybytes()) f.close() diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index d984e19215..b8825658d8 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -27,11 +27,11 @@ def decorator(g): return decorator -def _deprecate_class(old_name, new_name, klass, next_version='0.5.0'): +def _deprecate_api(old_name, new_name, api, next_version): msg = ('pyarrow.{0} is deprecated as of {1}, please use {2} instead' .format(old_name, next_version, new_name)) - def deprecated_factory(*args, **kwargs): + def wrapper(*args, **kwargs): warnings.warn(msg, FutureWarning) - return klass(*args) - return deprecated_factory + return api(*args) + return wrapper ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > [Python] More consistent / intuitive name for pyarrow.frombuffer > ---------------------------------------------------------------- > > Key: ARROW-2292 > URL: https://issues.apache.org/jira/browse/ARROW-2292 > Project: Apache Arrow > Issue Type: Improvement > Components: Python > Reporter: Wes McKinney > Assignee: Antoine Pitrou > Priority: Major > Labels: pull-request-available > Fix For: 0.9.0 > > > Now that we have {{pyarrow.foreign_buffer}}, things are a bit odd. We could > call {{frombuffer}} something like {{py_buffer}} instead? -- This message was sent by Atlassian JIRA (v7.6.3#76005)