This is an automated email from the ASF dual-hosted git repository.

xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-opendal.git


The following commit(s) were added to refs/heads/main by this push:
     new 5d0194bff feat(bindings/python): read APIs return `memoryview` instead 
of `bytes` to avoid copy (#3310)
5d0194bff is described below

commit 5d0194bffa8533abba442ff3118f57c8c80f86c4
Author: messense <[email protected]>
AuthorDate: Tue Oct 17 00:44:11 2023 +0800

    feat(bindings/python): read APIs return `memoryview` instead of `bytes` to 
avoid copy (#3310)
    
    Before
    
    ```python
    In [1]: import opendal
    
    In [2]: op = opendal.Operator("memory")
    
    In [3]: op.write("test", bytes(10 * 1024 * 1024))
    
    In [4]: %timeit op.read("test")
    627 µs ± 3.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
    ```
    
    After
    
    ```python
    In [1]: import opendal
    
    In [2]: op = opendal.Operator("memory")
    
    In [3]: op.write("test", bytes(10 * 1024 * 1024))
    
    In [4]: %timeit op.read("test")
    291 µs ± 761 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
    ```
---
 .github/workflows/bindings_python.yml       | 24 ++++++++++---
 bindings/python/Cargo.toml                  |  2 +-
 bindings/python/python/opendal/__init__.pyi |  8 ++---
 bindings/python/src/asyncio.rs              | 25 +++++++++++---
 bindings/python/src/lib.rs                  | 53 ++++++++++++++++++++++++++---
 5 files changed, 95 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/bindings_python.yml 
b/.github/workflows/bindings_python.yml
index a93a72eb3..c3cb29e00 100644
--- a/.github/workflows/bindings_python.yml
+++ b/.github/workflows/bindings_python.yml
@@ -52,13 +52,29 @@ jobs:
         working-directory: "bindings/python"
         run: |
           python -m pip install -e .[test]
-      - name: Run behave
+      - name: Run pytest
         working-directory: "bindings/python"
         env:
           OPENDAL_MEMORY_TEST: on
         run: |
           pytest -vk TestMemory
 
+  sdist:
+    runs-on: ubuntu-latest
+    if: "startsWith(github.ref, 'refs/tags/')"
+    steps:
+      - uses: actions/checkout@v4
+      - uses: PyO3/maturin-action@v1
+        with:
+          working-directory: "bindings/python"
+          command: sdist
+          args: -o dist
+      - name: Upload sdist
+        uses: actions/upload-artifact@v3
+        with:
+          name: wheels
+          path: bindings/python/dist
+
   linux:
     runs-on: ubuntu-latest
     if: "startsWith(github.ref, 'refs/tags/')"
@@ -69,7 +85,7 @@ jobs:
           manylinux: auto
           working-directory: "bindings/python"
           command: build
-          args: --release --sdist -o dist
+          args: --release -o dist --find-interpreter
       - name: Upload wheels
         uses: actions/upload-artifact@v3
         with:
@@ -85,7 +101,7 @@ jobs:
         with:
           working-directory: "bindings/python"
           command: build
-          args: --release -o dist
+          args: --release -o dist --find-interpreter
       - name: Upload wheels
         uses: actions/upload-artifact@v3
         with:
@@ -102,7 +118,7 @@ jobs:
           working-directory: "bindings/python"
           command: build
           target: universal2-apple-darwin
-          args: --release -o dist
+          args: --release -o dist --find-interpreter
       - name: Upload wheels
         uses: actions/upload-artifact@v3
         with:
diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml
index 4289c6a12..df1da905d 100644
--- a/bindings/python/Cargo.toml
+++ b/bindings/python/Cargo.toml
@@ -34,6 +34,6 @@ doc = false
 [dependencies]
 futures = "0.3.28"
 opendal.workspace = true
-pyo3 = { version = "0.19", features = ["abi3-py37"] }
+pyo3 = "0.19"
 pyo3-asyncio = { version = "0.19", features = ["tokio-runtime"] }
 tokio = "1"
diff --git a/bindings/python/python/opendal/__init__.pyi 
b/bindings/python/python/opendal/__init__.pyi
index b25d66e4c..2ee1960b9 100644
--- a/bindings/python/python/opendal/__init__.pyi
+++ b/bindings/python/python/opendal/__init__.pyi
@@ -21,7 +21,7 @@ class Error(Exception): ...
 
 class Operator:
     def __init__(self, scheme: str, **kwargs): ...
-    def read(self, path: str) -> bytes: ...
+    def read(self, path: str) -> memoryview: ...
     def open_reader(self, path: str) -> Reader: ...
     def write(
         self,
@@ -41,7 +41,7 @@ class Operator:
 
 class AsyncOperator:
     def __init__(self, scheme: str, **kwargs): ...
-    async def read(self, path: str) -> bytes: ...
+    async def read(self, path: str) -> memoryview: ...
     def open_reader(self, path: str) -> AsyncReader: ...
     async def write(
         self,
@@ -65,14 +65,14 @@ class AsyncOperator:
     ) -> PresignedRequest: ...
 
 class Reader:
-    def read(self, size: Optional[int] = None) -> bytes: ...
+    def read(self, size: Optional[int] = None) -> memoryview: ...
     def seek(self, offset: int, whence: int = 0) -> int: ...
     def tell(self) -> int: ...
     def __enter__(self) -> Reader: ...
     def __exit__(self, exc_type, exc_value, traceback) -> None: ...
 
 class AsyncReader:
-    async def read(self, size: Optional[int] = None) -> bytes: ...
+    async def read(self, size: Optional[int] = None) -> memoryview: ...
     async def seek(self, offset: int, whence: int = 0) -> int: ...
     async def tell(self) -> int: ...
     def __aenter__(self) -> AsyncReader: ...
diff --git a/bindings/python/src/asyncio.rs b/bindings/python/src/asyncio.rs
index 4b71b948c..78693d577 100644
--- a/bindings/python/src/asyncio.rs
+++ b/bindings/python/src/asyncio.rs
@@ -27,9 +27,11 @@ use pyo3::exceptions::PyIOError;
 use pyo3::exceptions::PyNotImplementedError;
 use pyo3::exceptions::PyStopAsyncIteration;
 use pyo3::exceptions::PyValueError;
+use pyo3::ffi;
 use pyo3::prelude::*;
 use pyo3::types::PyBytes;
 use pyo3::types::PyDict;
+use pyo3::AsPyPointer;
 use pyo3_asyncio::tokio::future_into_py;
 use tokio::io::AsyncReadExt;
 use tokio::io::AsyncSeekExt;
@@ -39,6 +41,7 @@ use crate::build_operator;
 use crate::build_opwrite;
 use crate::format_pyerr;
 use crate::layers;
+use crate::Buffer;
 use crate::Entry;
 use crate::Metadata;
 use crate::PresignedRequest;
@@ -74,8 +77,15 @@ impl AsyncOperator {
         let this = self.0.clone();
         future_into_py(py, async move {
             let res: Vec<u8> = this.read(&path).await.map_err(format_pyerr)?;
-            let pybytes: PyObject = Python::with_gil(|py| PyBytes::new(py, 
&res).into());
-            Ok(pybytes)
+            Python::with_gil(|py| {
+                let buffer = Buffer::from(res).into_py(py);
+                unsafe {
+                    PyObject::from_owned_ptr_or_err(
+                        py,
+                        ffi::PyMemoryView_FromObject(buffer.as_ptr()),
+                    )
+                }
+            })
         })
     }
 
@@ -330,8 +340,15 @@ impl AsyncReader {
                     buffer
                 }
             };
-            let pybytes: PyObject = Python::with_gil(|py| PyBytes::new(py, 
&buffer).into());
-            Ok(pybytes)
+            Python::with_gil(|py| {
+                let buffer = Buffer::from(buffer).into_py(py);
+                unsafe {
+                    PyObject::from_owned_ptr_or_err(
+                        py,
+                        ffi::PyMemoryView_FromObject(buffer.as_ptr()),
+                    )
+                }
+            })
         })
     }
 
diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs
index 0ad5b0e09..83eb4a52c 100644
--- a/bindings/python/src/lib.rs
+++ b/bindings/python/src/lib.rs
@@ -22,6 +22,7 @@ use std::collections::HashMap;
 use std::io::Read;
 use std::io::Seek;
 use std::io::SeekFrom;
+use std::os::raw::c_int;
 use std::str::FromStr;
 
 use ::opendal as od;
@@ -33,9 +34,10 @@ use pyo3::exceptions::PyIOError;
 use pyo3::exceptions::PyNotImplementedError;
 use pyo3::exceptions::PyPermissionError;
 use pyo3::exceptions::PyValueError;
+use pyo3::ffi;
 use pyo3::prelude::*;
-use pyo3::types::PyBytes;
 use pyo3::types::PyDict;
+use pyo3::AsPyPointer;
 
 mod asyncio;
 mod layers;
@@ -44,6 +46,41 @@ use crate::asyncio::*;
 
 create_exception!(opendal, Error, PyException, "OpenDAL related errors");
 
+/// A bytes-like object that implements buffer protocol.
+#[pyclass(module = "opendal")]
+struct Buffer {
+    inner: Vec<u8>,
+}
+
+#[pymethods]
+impl Buffer {
+    unsafe fn __getbuffer__(
+        slf: PyRefMut<Self>,
+        view: *mut ffi::Py_buffer,
+        flags: c_int,
+    ) -> PyResult<()> {
+        let bytes = slf.inner.as_slice();
+        let ret = ffi::PyBuffer_FillInfo(
+            view,
+            slf.as_ptr() as *mut _,
+            bytes.as_ptr() as *mut _,
+            bytes.len().try_into().unwrap(),
+            1, // read only
+            flags,
+        );
+        if ret == -1 {
+            return Err(PyErr::fetch(slf.py()));
+        }
+        Ok(())
+    }
+}
+
+impl From<Vec<u8>> for Buffer {
+    fn from(inner: Vec<u8>) -> Self {
+        Self { inner }
+    }
+}
+
 fn add_layers(mut op: od::Operator, layers: Vec<layers::Layer>) -> 
PyResult<od::Operator> {
     for layer in layers {
         match layer {
@@ -105,10 +142,15 @@ impl Operator {
 
     /// Read the whole path into bytes.
     pub fn read<'p>(&'p self, py: Python<'p>, path: &str) -> PyResult<&'p 
PyAny> {
-        self.0
+        let buffer = self
+            .0
             .read(path)
             .map_err(format_pyerr)
-            .map(|res| PyBytes::new(py, &res).into())
+            .map(Buffer::from)?
+            .into_py(py);
+        let memoryview =
+            unsafe { 
py.from_owned_ptr_or_err(ffi::PyMemoryView_FromObject(buffer.as_ptr()))? };
+        Ok(memoryview)
     }
 
     /// Open a file-like reader for the given path.
@@ -238,7 +280,10 @@ impl Reader {
                 buffer
             }
         };
-        Ok(PyBytes::new(py, &buffer).into())
+        let buffer = Buffer::from(buffer).into_py(py);
+        let memoryview =
+            unsafe { 
py.from_owned_ptr_or_err(ffi::PyMemoryView_FromObject(buffer.as_ptr()))? };
+        Ok(memoryview)
     }
 
     /// `Reader` doesn't support write.

Reply via email to