This is an automated email from the ASF dual-hosted git repository.
xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-opendal.git
The following commit(s) were added to refs/heads/main by this push:
new 5d0194bff feat(bindings/python): read APIs return `memoryview` instead
of `bytes` to avoid copy (#3310)
5d0194bff is described below
commit 5d0194bffa8533abba442ff3118f57c8c80f86c4
Author: messense <[email protected]>
AuthorDate: Tue Oct 17 00:44:11 2023 +0800
feat(bindings/python): read APIs return `memoryview` instead of `bytes` to
avoid copy (#3310)
Before
```python
In [1]: import opendal
In [2]: op = opendal.Operator("memory")
In [3]: op.write("test", bytes(10 * 1024 * 1024))
In [4]: %timeit op.read("test")
627 µs ± 3.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
```
After
```python
In [1]: import opendal
In [2]: op = opendal.Operator("memory")
In [3]: op.write("test", bytes(10 * 1024 * 1024))
In [4]: %timeit op.read("test")
291 µs ± 761 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
```
---
.github/workflows/bindings_python.yml | 24 ++++++++++---
bindings/python/Cargo.toml | 2 +-
bindings/python/python/opendal/__init__.pyi | 8 ++---
bindings/python/src/asyncio.rs | 25 +++++++++++---
bindings/python/src/lib.rs | 53 ++++++++++++++++++++++++++---
5 files changed, 95 insertions(+), 17 deletions(-)
diff --git a/.github/workflows/bindings_python.yml
b/.github/workflows/bindings_python.yml
index a93a72eb3..c3cb29e00 100644
--- a/.github/workflows/bindings_python.yml
+++ b/.github/workflows/bindings_python.yml
@@ -52,13 +52,29 @@ jobs:
working-directory: "bindings/python"
run: |
python -m pip install -e .[test]
- - name: Run behave
+ - name: Run pytest
working-directory: "bindings/python"
env:
OPENDAL_MEMORY_TEST: on
run: |
pytest -vk TestMemory
+ sdist:
+ runs-on: ubuntu-latest
+ if: "startsWith(github.ref, 'refs/tags/')"
+ steps:
+ - uses: actions/checkout@v4
+ - uses: PyO3/maturin-action@v1
+ with:
+ working-directory: "bindings/python"
+ command: sdist
+ args: -o dist
+ - name: Upload sdist
+ uses: actions/upload-artifact@v3
+ with:
+ name: wheels
+ path: bindings/python/dist
+
linux:
runs-on: ubuntu-latest
if: "startsWith(github.ref, 'refs/tags/')"
@@ -69,7 +85,7 @@ jobs:
manylinux: auto
working-directory: "bindings/python"
command: build
- args: --release --sdist -o dist
+ args: --release -o dist --find-interpreter
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
@@ -85,7 +101,7 @@ jobs:
with:
working-directory: "bindings/python"
command: build
- args: --release -o dist
+ args: --release -o dist --find-interpreter
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
@@ -102,7 +118,7 @@ jobs:
working-directory: "bindings/python"
command: build
target: universal2-apple-darwin
- args: --release -o dist
+ args: --release -o dist --find-interpreter
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml
index 4289c6a12..df1da905d 100644
--- a/bindings/python/Cargo.toml
+++ b/bindings/python/Cargo.toml
@@ -34,6 +34,6 @@ doc = false
[dependencies]
futures = "0.3.28"
opendal.workspace = true
-pyo3 = { version = "0.19", features = ["abi3-py37"] }
+pyo3 = "0.19"
pyo3-asyncio = { version = "0.19", features = ["tokio-runtime"] }
tokio = "1"
diff --git a/bindings/python/python/opendal/__init__.pyi
b/bindings/python/python/opendal/__init__.pyi
index b25d66e4c..2ee1960b9 100644
--- a/bindings/python/python/opendal/__init__.pyi
+++ b/bindings/python/python/opendal/__init__.pyi
@@ -21,7 +21,7 @@ class Error(Exception): ...
class Operator:
def __init__(self, scheme: str, **kwargs): ...
- def read(self, path: str) -> bytes: ...
+ def read(self, path: str) -> memoryview: ...
def open_reader(self, path: str) -> Reader: ...
def write(
self,
@@ -41,7 +41,7 @@ class Operator:
class AsyncOperator:
def __init__(self, scheme: str, **kwargs): ...
- async def read(self, path: str) -> bytes: ...
+ async def read(self, path: str) -> memoryview: ...
def open_reader(self, path: str) -> AsyncReader: ...
async def write(
self,
@@ -65,14 +65,14 @@ class AsyncOperator:
) -> PresignedRequest: ...
class Reader:
- def read(self, size: Optional[int] = None) -> bytes: ...
+ def read(self, size: Optional[int] = None) -> memoryview: ...
def seek(self, offset: int, whence: int = 0) -> int: ...
def tell(self) -> int: ...
def __enter__(self) -> Reader: ...
def __exit__(self, exc_type, exc_value, traceback) -> None: ...
class AsyncReader:
- async def read(self, size: Optional[int] = None) -> bytes: ...
+ async def read(self, size: Optional[int] = None) -> memoryview: ...
async def seek(self, offset: int, whence: int = 0) -> int: ...
async def tell(self) -> int: ...
def __aenter__(self) -> AsyncReader: ...
diff --git a/bindings/python/src/asyncio.rs b/bindings/python/src/asyncio.rs
index 4b71b948c..78693d577 100644
--- a/bindings/python/src/asyncio.rs
+++ b/bindings/python/src/asyncio.rs
@@ -27,9 +27,11 @@ use pyo3::exceptions::PyIOError;
use pyo3::exceptions::PyNotImplementedError;
use pyo3::exceptions::PyStopAsyncIteration;
use pyo3::exceptions::PyValueError;
+use pyo3::ffi;
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use pyo3::types::PyDict;
+use pyo3::AsPyPointer;
use pyo3_asyncio::tokio::future_into_py;
use tokio::io::AsyncReadExt;
use tokio::io::AsyncSeekExt;
@@ -39,6 +41,7 @@ use crate::build_operator;
use crate::build_opwrite;
use crate::format_pyerr;
use crate::layers;
+use crate::Buffer;
use crate::Entry;
use crate::Metadata;
use crate::PresignedRequest;
@@ -74,8 +77,15 @@ impl AsyncOperator {
let this = self.0.clone();
future_into_py(py, async move {
let res: Vec<u8> = this.read(&path).await.map_err(format_pyerr)?;
- let pybytes: PyObject = Python::with_gil(|py| PyBytes::new(py,
&res).into());
- Ok(pybytes)
+ Python::with_gil(|py| {
+ let buffer = Buffer::from(res).into_py(py);
+ unsafe {
+ PyObject::from_owned_ptr_or_err(
+ py,
+ ffi::PyMemoryView_FromObject(buffer.as_ptr()),
+ )
+ }
+ })
})
}
@@ -330,8 +340,15 @@ impl AsyncReader {
buffer
}
};
- let pybytes: PyObject = Python::with_gil(|py| PyBytes::new(py,
&buffer).into());
- Ok(pybytes)
+ Python::with_gil(|py| {
+ let buffer = Buffer::from(buffer).into_py(py);
+ unsafe {
+ PyObject::from_owned_ptr_or_err(
+ py,
+ ffi::PyMemoryView_FromObject(buffer.as_ptr()),
+ )
+ }
+ })
})
}
diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs
index 0ad5b0e09..83eb4a52c 100644
--- a/bindings/python/src/lib.rs
+++ b/bindings/python/src/lib.rs
@@ -22,6 +22,7 @@ use std::collections::HashMap;
use std::io::Read;
use std::io::Seek;
use std::io::SeekFrom;
+use std::os::raw::c_int;
use std::str::FromStr;
use ::opendal as od;
@@ -33,9 +34,10 @@ use pyo3::exceptions::PyIOError;
use pyo3::exceptions::PyNotImplementedError;
use pyo3::exceptions::PyPermissionError;
use pyo3::exceptions::PyValueError;
+use pyo3::ffi;
use pyo3::prelude::*;
-use pyo3::types::PyBytes;
use pyo3::types::PyDict;
+use pyo3::AsPyPointer;
mod asyncio;
mod layers;
@@ -44,6 +46,41 @@ use crate::asyncio::*;
create_exception!(opendal, Error, PyException, "OpenDAL related errors");
+/// A bytes-like object that implements buffer protocol.
+#[pyclass(module = "opendal")]
+struct Buffer {
+ inner: Vec<u8>,
+}
+
+#[pymethods]
+impl Buffer {
+ unsafe fn __getbuffer__(
+ slf: PyRefMut<Self>,
+ view: *mut ffi::Py_buffer,
+ flags: c_int,
+ ) -> PyResult<()> {
+ let bytes = slf.inner.as_slice();
+ let ret = ffi::PyBuffer_FillInfo(
+ view,
+ slf.as_ptr() as *mut _,
+ bytes.as_ptr() as *mut _,
+ bytes.len().try_into().unwrap(),
+ 1, // read only
+ flags,
+ );
+ if ret == -1 {
+ return Err(PyErr::fetch(slf.py()));
+ }
+ Ok(())
+ }
+}
+
+impl From<Vec<u8>> for Buffer {
+ fn from(inner: Vec<u8>) -> Self {
+ Self { inner }
+ }
+}
+
fn add_layers(mut op: od::Operator, layers: Vec<layers::Layer>) ->
PyResult<od::Operator> {
for layer in layers {
match layer {
@@ -105,10 +142,15 @@ impl Operator {
/// Read the whole path into bytes.
pub fn read<'p>(&'p self, py: Python<'p>, path: &str) -> PyResult<&'p
PyAny> {
- self.0
+ let buffer = self
+ .0
.read(path)
.map_err(format_pyerr)
- .map(|res| PyBytes::new(py, &res).into())
+ .map(Buffer::from)?
+ .into_py(py);
+ let memoryview =
+ unsafe {
py.from_owned_ptr_or_err(ffi::PyMemoryView_FromObject(buffer.as_ptr()))? };
+ Ok(memoryview)
}
/// Open a file-like reader for the given path.
@@ -238,7 +280,10 @@ impl Reader {
buffer
}
};
- Ok(PyBytes::new(py, &buffer).into())
+ let buffer = Buffer::from(buffer).into_py(py);
+ let memoryview =
+ unsafe {
py.from_owned_ptr_or_err(ffi::PyMemoryView_FromObject(buffer.as_ptr()))? };
+ Ok(memoryview)
}
/// `Reader` doesn't support write.