This is an automated email from the ASF dual-hosted git repository.
xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opendal.git
The following commit(s) were added to refs/heads/main by this push:
new 86d1b40d7 feat(bindings/python): Enhance Reader and Writer (#6086)
86d1b40d7 is described below
commit 86d1b40d7aaf6a5fce2e012f356d7f464609eb95
Author: Chitral Verma <[email protected]>
AuthorDate: Tue Jun 3 12:18:53 2025 +0530
feat(bindings/python): Enhance Reader and Writer (#6086)
* add reader and writer options to async open
* Update test_write.py
* Add test_async_writer_options test stub
* Enhance capability struct with additional conditional operations
* Implement AsyncFile::write_from
* revert write_from
* update as per options api
* revert Cargo.lock
* add options to blocking open
* fix formatting and lint
* Reset __base.pyi
* Updated `read` and `write` methods to accept `**options`
* Reset __base.pyi
* fix docs for sync/ async read
* fix docs for sync/ async open
* fix docs for sync/ async write
* temp fix for ConditionNotMatch test
* set concurrent to default
* update capabilities
* add remaining read options
* fix formatting
* updated docstrings
---
bindings/python/pyproject.toml | 14 ++
bindings/python/python/opendal/__init__.pyi | 320 ++++++++++++++++++----------
bindings/python/python/opendal/layers.pyi | 2 +-
bindings/python/src/capability.rs | 65 ++++--
bindings/python/src/file.rs | 7 +-
bindings/python/src/lib.rs | 1 +
bindings/python/src/operator.rs | 120 ++++++++---
bindings/python/src/options.rs | 82 ++++++-
bindings/python/tests/test_read.py | 62 +++++-
bindings/python/tests/test_sync_delete.py | 4 +-
bindings/python/tests/test_sync_exists.py | 4 +-
bindings/python/tests/test_sync_list.py | 4 +-
bindings/python/tests/test_write.py | 33 +++
13 files changed, 540 insertions(+), 178 deletions(-)
diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml
index a650f5c95..bd97dab13 100644
--- a/bindings/python/pyproject.toml
+++ b/bindings/python/pyproject.toml
@@ -70,8 +70,22 @@ cache-keys = [
{ file = "../../core/**/*.rs" },
]
+[tool.ruff]
+line-length = 88
+fix = true
+
[tool.ruff.lint]
ignore = ["E402", "F403", "F405"]
select = ["E", "F", "I"]
+
+[tool.ruff.lint.pycodestyle]
+max-doc-length = 88
+
+[tool.ruff.lint.pydocstyle]
+convention = "numpy"
+
+[tool.ruff.format]
+docstring-code-format = true
+
[tool.ruff.lint.isort]
known-first-party = ["opendal"]
diff --git a/bindings/python/python/opendal/__init__.pyi
b/bindings/python/python/opendal/__init__.pyi
index c6bfaca25..994c91c65 100644
--- a/bindings/python/python/opendal/__init__.pyi
+++ b/bindings/python/python/opendal/__init__.pyi
@@ -16,8 +16,9 @@
# under the License.
import os
+from collections.abc import AsyncIterable, Iterable
from types import TracebackType
-from typing import Any, AsyncIterable, Iterable, Optional, Type, Union, final
+from typing import Any, Union, final
from opendal import exceptions as exceptions
from opendal import layers as layers
@@ -38,12 +39,13 @@ class Operator(_Base):
Example:
```python
import opendal
+
op = opendal.Operator("s3", bucket="bucket", region="us-east-1")
op.write("hello.txt", b"hello world")
```
"""
def __init__(self, scheme: str, **options: Any) -> None: ...
- def layer(self, layer: Layer) -> "Operator":
+ def layer(self, layer: Layer) -> Operator:
"""Add new layers upon the current operator.
Args:
@@ -52,58 +54,93 @@ class Operator(_Base):
Returns:
The new operator with the layer added.
"""
- def open(self, path: PathBuf, mode: str) -> File:
+ def open(self, path: PathBuf, mode: str, **options: Any) -> File:
"""Open a file at the given path for reading or writing.
Args:
- path (str|Path): The path to the file.
- mode (str): The mode to open the file. Can be "rb" or "wb".
+ path (str | Path): The path to the file.
+ mode (str): The mode to open the file. Must be either `"rb"` for
reading or
+ `"wb"` for writing.
+ **options (Any): Additional options passed to the underlying
OpenDAL reader
+ or writer.
+ - If `mode == "rb"`: options match the
+ [OpenDAL
`ReaderOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.ReaderOptions.html).
+ - If `mode == "wb"`: options match the
+ [OpenDAL
`WriteOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.WriteOptions.html).
Returns:
- A file-like object that can be used to read or write the file.
+ File: A file-like object that can be used to read or write the
file.
Example:
```python
import opendal
+
op = opendal.Operator("s3", bucket="bucket", region="us-east-1")
with op.open("hello.txt", "wb") as f:
f.write(b"hello world")
```
"""
- def read(self, path: PathBuf) -> bytes:
+ def read(self, path: PathBuf, **options: Any) -> bytes:
"""Read the content of the object at the given path.
Args:
- path (str|Path): The path to the object.
+ path (str | Path): The path to the object.
+ **options (Any): Optional read parameters matching the
+ [OpenDAL
`ReadOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.ReadOptions.html):
+
+ - offset (int): Byte offset to start reading from. Defaults to 0
+ if not specified.
+ - size (int): Number of bytes to read. If not specified, reads
until
+ the end of the object.
+ Together, `offset` and `size` define the byte range for
reading.
+ - version (str): Specify the version of the object to read, if
+ supported by the backend.
+ - concurrent (int): Level of concurrency for reading. Defaults
to
+ backend-specific value.
+ - chunk (int): Read chunk size in bytes.
+ - gap (int): Minimum gap (in bytes) between chunks to consider
+ them separate.
+ - if_match (str): Read only if the ETag matches the given
value.
+ - if_none_match (str): Read-only if the ETag does not match the
+ given value.
+ - if_modified_since (datetime): Only read if the object was
modified
+ since this timestamp. This timestamp must be in UTC.
+ - if_unmodified_since (datetime): Only read if the object was
not
+ modified since this timestamp. This timestamp must be in
UTC.
Returns:
- The content of the object as bytes.
+ bytes: The content of the object as bytes.
"""
- def write(
- self,
- path: PathBuf,
- bs: bytes,
- *,
- append: bool = ...,
- chunk: int = ...,
- content_type: str = ...,
- content_disposition: str = ...,
- cache_control: str = ...,
- ) -> None:
+ def write(self, path: PathBuf, bs: bytes, **options: Any) -> None:
"""Write the content to the object at the given path.
Args:
- path (str|Path): The path to the object.
+ path (str | Path): The path to the object.
bs (bytes): The content to write.
- append (bool): Whether to append the content to the object.
- Defaults to False.
- chunk (int): The chunk size for writing. Defaults to write all.
- content_type (str): The content type of the object.
- Defaults to None.
- content_disposition (str): The content disposition of the object.
- Defaults to None.
- cache_control (str): The cache control of the object.
- Defaults to None.
+ **options (Any): Optional write parameters matching the
+ [OpenDAL
`WriteOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.WriteOptions.html):
+
+ - append (bool): If True, append to the object instead of
overwriting.
+ - chunk (int): Specify the chunk size in bytes for multipart
uploads.
+ - concurrent (int): Number of concurrent upload parts. Larger
values can
+ improve performance.
+ - cache_control (str): Override the cache-control header for
the object.
+ - content_type (str): Explicitly set the Content-Type header
for
+ the object.
+ - content_disposition (str): Sets how the object should be
presented
+ (e.g., as an attachment).
+ - content_encoding (str): Override the Content-Encoding header.
+ - if_match (str): Perform the write only if the object's
current
+ ETag matches the given one.
+ - if_none_match (str): Perform the write only if the object's
+ current ETag does NOT match the given one.
+ - if_not_exists (bool): Only write the object if it doesn't
+ already exist.
+ - user_metadata (dict[str, str]): Custom user metadata to
associate
+ with the object.
+
+ Returns:
+ None
"""
def stat(self, path: PathBuf) -> Metadata:
"""Get the metadata of the object at the given path.
@@ -175,8 +212,7 @@ class Operator(_Base):
target (str|Path): The target path.
"""
def remove_all(self, path: PathBuf) -> None:
- """Convert into an async operator
- """
+ """Convert into an async operator"""
def to_async_operator(self) -> AsyncOperator: ...
@final
@@ -191,64 +227,101 @@ class AsyncOperator(_Base):
Example:
```python
import opendal
+
op = opendal.AsyncOperator("s3", bucket="bucket", region="us-east-1")
await op.write("hello.txt", b"hello world")
```
"""
def __init__(self, scheme: str, **options: Any) -> None: ...
- def layer(self, layer: Layer) -> "AsyncOperator": ...
- async def open(self, path: PathBuf, mode: str) -> AsyncFile:
+ def layer(self, layer: Layer) -> AsyncOperator: ...
+ async def open(self, path: PathBuf, mode: str, **options: Any) ->
AsyncFile:
"""Open a file at the given path for reading or writing.
Args:
- path (str|Path): The path to the file.
- mode (str): The mode to open the file. Can be "rb" or "wb".
+ path (str | Path): The path to the file.
+ mode (str): The mode to open the file. Must be either `"rb"` for
reading or
+ `"wb"` for writing.
+ **options (Any): Additional options passed to the underlying
OpenDAL reader
+ or writer.
+ - If `mode == "rb"`: options match the
+ [OpenDAL
`ReaderOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.ReaderOptions.html).
+ - If `mode == "wb"`: options match the
+ [OpenDAL
`WriteOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.WriteOptions.html).
Returns:
- A file-like object that can be used to read or write the file.
+ AsyncFile: A file-like object that can be used to read or write
the file.
Example:
```python
import opendal
+
op = opendal.AsyncOperator("s3", bucket="bucket",
region="us-east-1")
async with await op.open("hello.txt", "wb") as f:
await f.write(b"hello world")
```
"""
- async def read(self, path: PathBuf) -> bytes:
+ async def read(self, path: PathBuf, **options: Any) -> bytes:
"""Read the content of the object at the given path.
Args:
- path (str|Path): The path to the object.
+ path (str | Path): The path to the object.
+ **options (Any): Optional read parameters matching the
+ [OpenDAL
`ReadOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.ReadOptions.html):
+
+ - offset (int): Byte offset to start reading from. Defaults to 0
+ if not specified.
+ - size (int): Number of bytes to read. If not specified, reads
until
+ the end of the object.
+ Together, `offset` and `size` define the byte range for
reading.
+ - version (str): Specify the version of the object to read, if
+ supported by the backend.
+ - concurrent (int): Level of concurrency for reading. Defaults
to
+ backend-specific value.
+ - chunk (int): Read chunk size in bytes.
+ - gap (int): Minimum gap (in bytes) between chunks to consider
+ them separate.
+ - override_content_type (str): Override the returned content
type.
+ - if_match (str): Read only if the ETag matches the given
value.
+ - if_none_match (str): Read-only if the ETag does not match the
+ given value.
+ - if_modified_since (datetime): Only read if the object was
modified
+ since this timestamp. This timestamp must be in UTC.
+ - if_unmodified_since (datetime): Only read if the object was
not
+ modified since this timestamp. This timestamp must be in
UTC.
Returns:
The content of the object as bytes.
"""
- async def write(
- self,
- path: PathBuf,
- bs: bytes,
- *,
- append: bool = ...,
- chunk: int = ...,
- content_type: str = ...,
- content_disposition: str = ...,
- cache_control: str = ...,
- ) -> None:
+ async def write(self, path: PathBuf, bs: bytes, **options: Any) -> None:
"""Write the content to the object at the given path.
Args:
- path (str|Path): The path to the object.
+ path (str | Path): The path to the object.
bs (bytes): The content to write.
- append (bool): Whether to append the content to the object.
- Defaults to False.
- chunk (int): The chunk size for writing. Defaults to write all.
- content_type (str): The content type of the object.
- Defaults to None.
- content_disposition (str): The content disposition of the object.
- Defaults to None.
- cache_control (str): The cache control of the object.
- Defaults to None.
+ **options (Any): Optional write parameters matching the
+ [OpenDAL
`WriteOptions`](https://opendal.apache.org/docs/rust/opendal/options/struct.WriteOptions.html):
+
+ - append (bool): If True, append to the object instead of
overwriting.
+ - chunk (int): Specify the chunk size in bytes for multipart
uploads.
+ - concurrent (int): Number of concurrent upload parts. Larger
values can
+ improve performance.
+ - cache_control (str): Override the cache-control header for
the object.
+ - content_type (str): Explicitly set the Content-Type header
for
+ the object.
+ - content_disposition (str): Sets how the object should be
presented
+ (e.g., as an attachment).
+ - content_encoding (str): Override the Content-Encoding header.
+ - if_match (str): Perform the write only if the object's
current
+ ETag matches the given one.
+ - if_none_match (str): Perform the write only if the object's
+ current ETag does NOT match the given one.
+ - if_not_exists (bool): Only write the object if it doesn't
+ already exist.
+ - user_metadata (dict[str, str]): Custom user metadata to
associate
+ with the object.
+
+ Returns:
+ None
"""
async def stat(self, path: PathBuf) -> Metadata:
"""Get the metadata of the object at the given path.
@@ -375,7 +448,7 @@ class File:
Created by the `open` method of the `Operator` class.
"""
- def read(self, size: Optional[int] = None) -> bytes:
+ def read(self, size: int | None = None) -> bytes:
"""Read the content of the file.
Args:
@@ -384,7 +457,7 @@ class File:
Returns:
The content of the file as bytes.
"""
- def readline(self, size: Optional[int] = None) -> bytes:
+ def readline(self, size: int | None = None) -> bytes:
"""Read a single line from the file.
Args:
@@ -421,9 +494,9 @@ class File:
"""Enter the runtime context related to this object."""
def __exit__(
self,
- exc_type: Optional[Type[BaseException]],
- exc_value: Optional[BaseException],
- traceback: Optional[TracebackType],
+ exc_type: type[BaseException] | None,
+ exc_value: BaseException | None,
+ traceback: TracebackType | None,
) -> None:
"""Exit the runtime context related to this object."""
@property
@@ -433,7 +506,7 @@ class File:
"""Flush the internal buffer."""
def readable(self) -> bool:
"""Check if the file is readable."""
- def readinto(self, buffer: Union[bytes, bytearray]) -> int:
+ def readinto(self, buffer: bytes | bytearray) -> int:
"""Read bytes into a buffer.
Args:
@@ -454,7 +527,7 @@ class AsyncFile:
Created by the `open` method of the `AsyncOperator` class.
"""
- async def read(self, size: Optional[int] = None) -> bytes:
+ async def read(self, size: int | None = None) -> bytes:
"""Read the content of the file.
Args:
@@ -491,9 +564,9 @@ class AsyncFile:
"""Enter the runtime context related to this object."""
def __aexit__(
self,
- exc_type: Optional[Type[BaseException]],
- exc_value: Optional[BaseException],
- traceback: Optional[TracebackType],
+ exc_type: type[BaseException] | None,
+ exc_value: BaseException | None,
+ traceback: TracebackType | None,
) -> None:
"""Exit the runtime context related to this object."""
@property
@@ -516,19 +589,19 @@ class Entry:
@final
class Metadata:
@property
- def content_disposition(self) -> Optional[str]:
+ def content_disposition(self) -> str | None:
"""The content disposition of the object."""
@property
def content_length(self) -> int:
"""The content length of the object."""
@property
- def content_md5(self) -> Optional[str]:
+ def content_md5(self) -> str | None:
"""The MD5 checksum of the object."""
@property
- def content_type(self) -> Optional[str]:
+ def content_type(self) -> str | None:
"""The mime type of the object."""
@property
- def etag(self) -> Optional[str]:
+ def etag(self) -> str | None:
"""The ETag of the object."""
@property
def mode(self) -> EntryMode:
@@ -553,103 +626,124 @@ class Capability:
"""Storage capability information."""
stat: bool
- """If operator supports stat"""
+ """If operator supports stat."""
stat_with_if_match: bool
- """If operator supports stat with if match"""
+ """If operator supports stat with if match."""
stat_with_if_none_match: bool
- """If operator supports stat with if none match"""
+ """If operator supports stat with if none match."""
read: bool
- """If operator supports read"""
+ """Indicates if the operator supports read operations."""
read_with_if_match: bool
- """If operator supports read with if match"""
+ """Indicates if conditional read operations using If-Match are
supported."""
read_with_if_none_match: bool
- """If operator supports read with if none match"""
+ """Indicates if conditional read operations using If-None-Match are
supported."""
+
+ read_with_if_modified_since: bool
+ """If-Modified-Since condition supported for read."""
+
+ read_with_if_unmodified_since: bool
+ """If-Unmodified-Since condition supported for read."""
read_with_override_cache_control: bool
- """If operator supports read with override cache control"""
+ """Cache-Control header override supported for read."""
read_with_override_content_disposition: bool
- """If operator supports read with override content disposition"""
+ """Content-Disposition header override supported for read."""
read_with_override_content_type: bool
- """If operator supports read with override content type"""
+ """Indicates if Content-Type header override is supported during read
operations."""
+
+ read_with_version: bool
+ """Indicates if versions read operations are supported."""
write: bool
- """If operator supports write"""
+ """Indicates if the operator supports write operations."""
write_can_multi: bool
- """If operator supports write can be called in multi times"""
+ """Indicates if multiple write operations can be performed on the same
object."""
write_can_empty: bool
- """If operator supports write with empty content"""
+ """Indicates if writing empty content is supported."""
write_can_append: bool
- """If operator supports write by append"""
+ """Indicates if append operations are supported."""
write_with_content_type: bool
- """If operator supports write with content type"""
+ """Indicates if Content-Type can be specified during write operations."""
write_with_content_disposition: bool
- """If operator supports write with content disposition"""
+ """Indicates if Content-Disposition can be specified during write
operations."""
+
+ write_with_content_encoding: bool
+ """Indicates if Content-Encoding can be specified during write
operations."""
write_with_cache_control: bool
- """If operator supports write with cache control"""
+ """Indicates if Cache-Control can be specified during write operations."""
+
+ write_with_if_match: bool
+ """Indicates if conditional write operations using If-Match are
supported."""
+
+ write_with_if_none_match: bool
+ """Indicates if conditional write operations using If-None-Match are
supported."""
+
+ write_with_if_not_exists: bool
+ """Indicates if write operations can be conditional on object
non-existence."""
+
+ write_with_user_metadata: bool
+ """Indicates if custom user metadata can be attached during write
operations."""
- write_multi_max_size: Optional[int]
- """Write_multi_max_size is the max size that services support in
write_multi.
- For example, AWS S3 supports 5GiB as max in write_multi."""
+ write_multi_max_size: int | None
+ """Maximum part size for multipart uploads (e.g. 5GiB for AWS S3)."""
- write_multi_min_size: Optional[int]
- """Write_multi_min_size is the min size that services support in
write_multi.
- For example, AWS S3 requires at least 5MiB in write_multi expect the last
one."""
+ write_multi_min_size: int | None
+ """Minimum part size for multipart uploads (e.g. 5MiB for AWS S3)."""
- write_total_max_size: Optional[int]
- """Write_total_max_size is the max size that services support in
write_total.
- For example, Cloudflare D1 supports 1MB as max in write_total."""
+ write_total_max_size: int | None
+ """Maximum total size for write operations (e.g. 1MB for Cloudflare D1)."""
create_dir: bool
- """If operator supports create dir"""
+ """If operator supports create dir."""
delete: bool
- """If operator supports delete"""
+ """If operator supports delete."""
copy: bool
- """If operator supports copy"""
+ """If operator supports copy."""
rename: bool
- """If operator supports rename"""
+ """If operator supports rename."""
list: bool
- """If operator supports list"""
+ """If operator supports list."""
list_with_limit: bool
- """If backend supports list with limit"""
+ """If backend supports list with limit."""
list_with_start_after: bool
- """If backend supports list with start after"""
+ """If backend supports list with start after."""
list_with_recursive: bool
- """If backend supports list with recursive"""
+ """If backend supports list with recursive."""
presign: bool
- """If operator supports presign"""
+ """If operator supports presign."""
presign_read: bool
- """If operator supports presign read"""
+ """If operator supports presign read."""
presign_stat: bool
- """If operator supports presign stat"""
+ """If operator supports presign stat."""
presign_write: bool
- """If operator supports presign write"""
+ """If operator supports presign write."""
presign_delete: bool
- """If operator supports presign delete"""
+ """If operator supports presign delete."""
shared: bool
- """If operator supports shared"""
+ """If operator supports shared."""
diff --git a/bindings/python/python/opendal/layers.pyi
b/bindings/python/python/opendal/layers.pyi
index 609928ff0..adcb543f4 100644
--- a/bindings/python/python/opendal/layers.pyi
+++ b/bindings/python/python/opendal/layers.pyi
@@ -37,4 +37,4 @@ class ConcurrentLimitLayer(Layer):
@final
class MimeGuessLayer(Layer):
- def __init__(self) -> None: ...
\ No newline at end of file
+ def __init__(self) -> None: ...
diff --git a/bindings/python/src/capability.rs
b/bindings/python/src/capability.rs
index c828a605d..4edf92fb4 100644
--- a/bindings/python/src/capability.rs
+++ b/bindings/python/src/capability.rs
@@ -28,44 +28,57 @@ pub struct Capability {
/// If operator supports stat with if none match.
pub stat_with_if_none_match: bool,
- /// If operator supports read.
+ /// Indicates if the operator supports read operations.
pub read: bool,
- /// If operator supports read with if match.
+ /// Indicates if conditional read operations using If-Match are supported.
pub read_with_if_match: bool,
- /// If operator supports read with if none match.
+ /// Indicates if conditional read operations using If-None-Match are
supported.
pub read_with_if_none_match: bool,
- /// if operator supports read with override cache control.
+ /// Indicates if conditional read operations using If-Modified-Since are
supported.
+ pub read_with_if_modified_since: bool,
+ /// Indicates if conditional read operations using If-Unmodified-Since are
supported.
+ pub read_with_if_unmodified_since: bool,
+ /// Indicates if Cache-Control header override is supported during read
operations.
pub read_with_override_cache_control: bool,
- /// if operator supports read with override content disposition.
+ /// Indicates if Content-Disposition header override is supported during
read operations.
pub read_with_override_content_disposition: bool,
- /// if operator supports read with override content type.
+ /// Indicates if Content-Type header override is supported during read
operations.
pub read_with_override_content_type: bool,
+ /// Indicates if versions read operations are supported.
+ pub read_with_version: bool,
- /// If operator supports write.
+ /// Indicates if the operator supports write operations.
pub write: bool,
- /// If operator supports write can be called in multi times.
+ /// Indicates if multiple write operations can be performed on the same
object.
pub write_can_multi: bool,
- /// If operator supports write with empty content.
+ /// Indicates if writing empty content is supported.
pub write_can_empty: bool,
- /// If operator supports write by append.
+ /// Indicates if append operations are supported.
pub write_can_append: bool,
- /// If operator supports write with content type.
+ /// Indicates if Content-Type can be specified during write operations.
pub write_with_content_type: bool,
- /// If operator supports write with content disposition.
+ /// Indicates if Content-Disposition can be specified during write
operations.
pub write_with_content_disposition: bool,
- /// If operator supports write with cache control.
+ /// Indicates if Content-Encoding can be specified during write operations.
+ pub write_with_content_encoding: bool,
+ /// Indicates if Cache-Control can be specified during write operations.
pub write_with_cache_control: bool,
- /// write_multi_max_size is the max size that services support in
write_multi.
- ///
- /// For example, AWS S3 supports 5GiB as max in write_multi.
+ /// Indicates if conditional write operations using If-Match are supported.
+ pub write_with_if_match: bool,
+ /// Indicates if conditional write operations using If-None-Match are
supported.
+ pub write_with_if_none_match: bool,
+ /// Indicates if write operations can be conditional on object
non-existence.
+ pub write_with_if_not_exists: bool,
+ /// Indicates if custom user metadata can be attached during write
operations.
+ pub write_with_user_metadata: bool,
+ /// Maximum size supported for multipart uploads.
+ /// For example, AWS S3 supports up to 5GiB per part in multipart uploads.
pub write_multi_max_size: Option<usize>,
- /// write_multi_min_size is the min size that services support in
write_multi.
- ///
- /// For example, AWS S3 requires at least 5MiB in write_multi expect the
last one.
+ /// Minimum size required for multipart uploads (except for the last part).
+ /// For example, AWS S3 requires at least 5MiB per part.
pub write_multi_min_size: Option<usize>,
- /// write_total_max_size is the max size that services support in
write_total.
- ///
- /// For example, Cloudflare D1 supports 1MB as max in write_total.
+ /// Maximum total size supported for write operations.
+ /// For example, Cloudflare D1 has a 1MB total size limit.
pub write_total_max_size: Option<usize>,
/// If operator supports create dir.
@@ -117,6 +130,9 @@ impl Capability {
read_with_override_content_disposition: capability
.read_with_override_content_disposition,
read_with_override_content_type:
capability.read_with_override_content_type,
+ read_with_if_modified_since:
capability.read_with_if_modified_since,
+ read_with_if_unmodified_since:
capability.read_with_if_unmodified_since,
+ read_with_version: capability.read_with_version,
write: capability.write,
write_can_multi: capability.write_can_multi,
write_can_empty: capability.write_can_empty,
@@ -127,6 +143,11 @@ impl Capability {
write_multi_max_size: capability.write_multi_max_size,
write_multi_min_size: capability.write_multi_min_size,
write_total_max_size: capability.write_total_max_size,
+ write_with_content_encoding:
capability.write_with_content_encoding,
+ write_with_if_match: capability.write_with_if_match,
+ write_with_if_none_match: capability.write_with_if_none_match,
+ write_with_if_not_exists: capability.write_with_if_not_exists,
+ write_with_user_metadata: capability.write_with_user_metadata,
create_dir: capability.create_dir,
delete: capability.delete,
copy: capability.copy,
diff --git a/bindings/python/src/file.rs b/bindings/python/src/file.rs
index 26ec4c606..a84fa8ade 100644
--- a/bindings/python/src/file.rs
+++ b/bindings/python/src/file.rs
@@ -25,6 +25,7 @@ use std::sync::Arc;
use futures::AsyncReadExt;
use futures::AsyncSeekExt;
+use futures::AsyncWriteExt;
use pyo3::buffer::PyBuffer;
use pyo3::exceptions::PyIOError;
use pyo3::exceptions::PyValueError;
@@ -335,7 +336,7 @@ pub struct AsyncFile(Arc<Mutex<AsyncFileState>>);
enum AsyncFileState {
Reader(ocore::FuturesAsyncReader),
- Writer(ocore::Writer),
+ Writer(ocore::FuturesAsyncWriter),
Closed,
}
@@ -344,7 +345,7 @@ impl AsyncFile {
Self(Arc::new(Mutex::new(AsyncFileState::Reader(reader))))
}
- pub fn new_writer(writer: ocore::Writer) -> Self {
+ pub fn new_writer(writer: ocore::FuturesAsyncWriter) -> Self {
Self(Arc::new(Mutex::new(AsyncFileState::Writer(writer))))
}
}
@@ -422,7 +423,7 @@ impl AsyncFile {
let len = bs.len();
writer
- .write(bs)
+ .write_all(&bs)
.await
.map(|_| len)
.map_err(|err| PyIOError::new_err(err.to_string()))
diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs
index 7e1a99e60..456c3114e 100644
--- a/bindings/python/src/lib.rs
+++ b/bindings/python/src/lib.rs
@@ -54,6 +54,7 @@ fn _opendal(py: Python, m: &Bound<'_, PyModule>) ->
PyResult<()> {
m.add_class::<Capability>()?;
m.add_class::<WriteOptions>()?;
+ m.add_class::<ReadOptions>()?;
// Layer module
let layers_module = PyModule::new(py, "layers")?;
diff --git a/bindings/python/src/operator.rs b/bindings/python/src/operator.rs
index 38d372d8c..166dbe5d9 100644
--- a/bindings/python/src/operator.rs
+++ b/bindings/python/src/operator.rs
@@ -84,7 +84,7 @@ impl Operator {
})
}
- /// Add new layers upon existing operator
+ /// Add new layers upon the existing operator
pub fn layer(&self, layer: &layers::Layer) -> PyResult<Self> {
let op = layer.0.layer(self.core.clone().into());
@@ -99,19 +99,39 @@ impl Operator {
}
/// Open a file-like reader for the given path.
- pub fn open(&self, path: PathBuf, mode: String) -> PyResult<File> {
- let path = path.to_string_lossy().to_string();
+ #[pyo3(signature = (path, mode, *, **kwargs))]
+ pub fn open(
+ &self,
+ path: PathBuf,
+ mode: String,
+ kwargs: Option<&Bound<PyDict>>,
+ ) -> PyResult<File> {
let this = self.core.clone();
+ let path = path.to_string_lossy().to_string();
+
+ let reader_opts = kwargs
+ .map(|v| v.extract::<ReadOptions>())
+ .transpose()?
+ .unwrap_or_default();
+
+ let writer_opts = kwargs
+ .map(|v| v.extract::<WriteOptions>())
+ .transpose()?
+ .unwrap_or_default();
+
if mode == "rb" {
- let r = this
- .reader(&path)
- .map_err(format_pyerr)?
- .into_std_read(..)
+ let range = reader_opts.make_range();
+ let reader = this
+ .reader_options(&path, reader_opts.into())
.map_err(format_pyerr)?;
+
+ let r = reader.into_std_read(range).map_err(format_pyerr)?;
Ok(File::new_reader(r))
} else if mode == "wb" {
- let w = this.writer(&path).map_err(format_pyerr)?;
- Ok(File::new_writer(w))
+ let writer = this
+ .writer_options(&path, writer_opts.into())
+ .map_err(format_pyerr)?;
+ Ok(File::new_writer(writer))
} else {
Err(Unsupported::new_err(format!(
"OpenDAL doesn't support mode: {mode}"
@@ -120,13 +140,25 @@ impl Operator {
}
/// Read the whole path into bytes.
- pub fn read<'p>(&'p self, py: Python<'p>, path: PathBuf) ->
PyResult<Bound<'p, PyAny>> {
+ #[pyo3(signature = (path, **kwargs))]
+ pub fn read<'p>(
+ &'p self,
+ py: Python<'p>,
+ path: PathBuf,
+ kwargs: Option<ReadOptions>,
+ ) -> PyResult<Bound<'p, PyAny>> {
let path = path.to_string_lossy().to_string();
- let buffer = self.core.read(&path).map_err(format_pyerr)?.to_vec();
+ let kwargs = kwargs.unwrap_or_default();
+ let buffer = self
+ .core
+ .read_options(&path, kwargs.into())
+ .map_err(format_pyerr)?
+ .to_vec();
+
Buffer::new(buffer).into_bytes_ref(py)
}
- /// Write bytes into given path.
+ /// Write bytes into a given path.
#[pyo3(signature = (path, bs, **kwargs))]
pub fn write(&self, path: PathBuf, bs: Vec<u8>, kwargs:
Option<WriteOptions>) -> PyResult<()> {
let path = path.to_string_lossy().to_string();
@@ -137,7 +169,7 @@ impl Operator {
.map_err(format_pyerr)
}
- /// Get current path's metadata **without cache** directly.
+ /// Get the current path's metadata **without cache** directly.
pub fn stat(&self, path: PathBuf) -> PyResult<Metadata> {
let path = path.to_string_lossy().to_string();
self.core
@@ -146,7 +178,7 @@ impl Operator {
.map(Metadata::new)
}
- /// Copy source to target.
+ /// Copy the source to the target.
pub fn copy(&self, source: PathBuf, target: PathBuf) -> PyResult<()> {
let source = source.to_string_lossy().to_string();
let target = target.to_string_lossy().to_string();
@@ -160,19 +192,19 @@ impl Operator {
self.core.rename(&source, &target).map_err(format_pyerr)
}
- /// Remove all file
+ /// Remove all files
pub fn remove_all(&self, path: PathBuf) -> PyResult<()> {
let path = path.to_string_lossy().to_string();
self.core.remove_all(&path).map_err(format_pyerr)
}
- /// Create a dir at given path.
+ /// Create a dir at the given path.
///
/// # Notes
///
/// To indicate that a path is a directory, it is compulsory to include
/// a trailing / in the path. Failure to do so may result in
- /// `NotADirectory` error being returned by OpenDAL.
+ /// a ` NotADirectory ` error being returned by OpenDAL.
///
/// # Behavior
///
@@ -193,7 +225,7 @@ impl Operator {
self.core.delete(&path).map_err(format_pyerr)
}
- /// Check given path is exists.
+ /// Checks if the given path exists.
///
/// # Notes
///
@@ -220,7 +252,7 @@ impl Operator {
Ok(BlockingLister::new(l))
}
- /// List dir in flat way.
+ /// List dir in a flat way.
pub fn scan(&self, path: PathBuf) -> PyResult<BlockingLister> {
let path = path.to_string_lossy().to_string();
let l = self
@@ -312,7 +344,7 @@ impl AsyncOperator {
})
}
- /// Add new layers upon existing operator
+ /// Add new layers upon the existing operator
pub fn layer(&self, layer: &layers::Layer) -> PyResult<Self> {
let op = layer.0.layer(self.core.clone());
Ok(Self {
@@ -323,27 +355,46 @@ impl AsyncOperator {
}
/// Open a file-like reader for the given path.
+ #[pyo3(signature = (path, mode, *, **kwargs))]
pub fn open<'p>(
&'p self,
py: Python<'p>,
path: PathBuf,
mode: String,
+ kwargs: Option<&Bound<PyDict>>,
) -> PyResult<Bound<'p, PyAny>> {
let this = self.core.clone();
let path = path.to_string_lossy().to_string();
+ let reader_opts = kwargs
+ .map(|v| v.extract::<ReadOptions>())
+ .transpose()?
+ .unwrap_or_default();
+
+ let writer_opts = kwargs
+ .map(|v| v.extract::<WriteOptions>())
+ .transpose()?
+ .unwrap_or_default();
+
future_into_py(py, async move {
if mode == "rb" {
- let r = this
- .reader(&path)
+ let range = reader_opts.make_range();
+ let reader = this
+ .reader_options(&path, reader_opts.into())
.await
- .map_err(format_pyerr)?
- .into_futures_async_read(..)
+ .map_err(format_pyerr)?;
+
+ let r = reader
+ .into_futures_async_read(range)
.await
.map_err(format_pyerr)?;
Ok(AsyncFile::new_reader(r))
} else if mode == "wb" {
- let w = this.writer(&path).await.map_err(format_pyerr)?;
+ let writer = this
+ .writer_options(&path, writer_opts.into())
+ .await
+ .map_err(format_pyerr)?;
+ let w = writer.into_futures_async_write();
Ok(AsyncFile::new_writer(w))
} else {
Err(Unsupported::new_err(format!(
@@ -354,11 +405,26 @@ impl AsyncOperator {
}
/// Read the whole path into bytes.
- pub fn read<'p>(&'p self, py: Python<'p>, path: PathBuf) ->
PyResult<Bound<'p, PyAny>> {
+ #[pyo3(signature = (path, **kwargs))]
+ pub fn read<'p>(
+ &'p self,
+ py: Python<'p>,
+ path: PathBuf,
+ kwargs: Option<ReadOptions>,
+ ) -> PyResult<Bound<'p, PyAny>> {
let this = self.core.clone();
let path = path.to_string_lossy().to_string();
+ let kwargs = kwargs.unwrap_or_default();
future_into_py(py, async move {
- let res: Vec<u8> =
this.read(&path).await.map_err(format_pyerr)?.to_vec();
+ let range = kwargs.make_range();
+ let res = this
+ .reader_options(&path, kwargs.into())
+ .await
+ .map_err(format_pyerr)?
+ .read(range)
+ .await
+ .map_err(format_pyerr)?
+ .to_vec();
Python::with_gil(|py| Buffer::new(res).into_bytes(py))
})
}
diff --git a/bindings/python/src/options.rs b/bindings/python/src/options.rs
index a1c17b676..f47fdb147 100644
--- a/bindings/python/src/options.rs
+++ b/bindings/python/src/options.rs
@@ -20,27 +20,105 @@ use opendal as ocore;
use pyo3::pyclass;
use std::collections::HashMap;
+use chrono::{DateTime, Utc};
+use std::ops::Bound as RangeBound;
+
+#[pyclass(module = "opendal")]
+#[derive(FromPyObject, Default)]
+pub struct ReadOptions {
+ pub version: Option<String>,
+ pub concurrent: Option<usize>,
+ pub chunk: Option<usize>,
+ pub gap: Option<usize>,
+ pub offset: Option<usize>,
+ pub size: Option<usize>,
+ pub if_match: Option<String>,
+ pub if_none_match: Option<String>,
+ pub if_modified_since: Option<DateTime<Utc>>,
+ pub if_unmodified_since: Option<DateTime<Utc>>,
+ pub content_type: Option<String>,
+ pub cache_control: Option<String>,
+ pub content_disposition: Option<String>,
+}
+
+impl ReadOptions {
+ pub fn make_range(&self) -> (RangeBound<u64>, RangeBound<u64>) {
+ let start_bound = self
+ .offset
+ .map_or(RangeBound::Unbounded, |s| RangeBound::Included(s as u64));
+ let end_bound = self
+ .size
+ .map_or(RangeBound::Unbounded, |e| RangeBound::Excluded(e as u64));
+
+ (start_bound, end_bound)
+ }
+}
+
#[pyclass(module = "opendal")]
#[derive(FromPyObject, Default)]
pub struct WriteOptions {
pub append: Option<bool>,
pub chunk: Option<usize>,
+ pub concurrent: Option<usize>,
+ pub cache_control: Option<String>,
pub content_type: Option<String>,
pub content_disposition: Option<String>,
- pub cache_control: Option<String>,
+ pub content_encoding: Option<String>,
+ pub if_match: Option<String>,
+ pub if_none_match: Option<String>,
+ pub if_not_exists: Option<bool>,
pub user_metadata: Option<HashMap<String, String>>,
}
+impl From<ReadOptions> for ocore::options::ReadOptions {
+ fn from(opts: ReadOptions) -> Self {
+ let r = opts.make_range();
+ Self {
+ range: r.into(),
+ version: opts.version,
+ if_match: opts.if_match,
+ if_none_match: opts.if_none_match,
+ if_modified_since: opts.if_modified_since,
+ if_unmodified_since: opts.if_unmodified_since,
+ concurrent: opts.concurrent.unwrap_or_default(),
+ chunk: opts.chunk,
+ gap: opts.gap,
+ override_content_type: opts.content_type,
+ override_cache_control: opts.cache_control,
+ override_content_disposition: opts.content_disposition,
+ }
+ }
+}
+
+impl From<ReadOptions> for ocore::options::ReaderOptions {
+ fn from(opts: ReadOptions) -> Self {
+ Self {
+ version: opts.version,
+ if_match: opts.if_match,
+ if_none_match: opts.if_none_match,
+ if_modified_since: opts.if_modified_since,
+ if_unmodified_since: opts.if_unmodified_since,
+ concurrent: opts.concurrent.unwrap_or_default(),
+ chunk: opts.chunk,
+ gap: opts.gap,
+ }
+ }
+}
+
impl From<WriteOptions> for ocore::options::WriteOptions {
fn from(opts: WriteOptions) -> Self {
Self {
append: opts.append.unwrap_or(false),
+ concurrent: opts.concurrent.unwrap_or_default(),
chunk: opts.chunk,
content_type: opts.content_type,
content_disposition: opts.content_disposition,
cache_control: opts.cache_control,
+ content_encoding: opts.content_encoding,
user_metadata: opts.user_metadata,
- ..Default::default()
+ if_match: opts.if_match,
+ if_none_match: opts.if_none_match,
+ if_not_exists: opts.if_not_exists.unwrap_or(false),
}
}
}
diff --git a/bindings/python/tests/test_read.py
b/bindings/python/tests/test_read.py
index ef2abb646..5f78a4e1c 100644
--- a/bindings/python/tests/test_read.py
+++ b/bindings/python/tests/test_read.py
@@ -17,13 +17,14 @@
import io
import os
+from datetime import timedelta
from pathlib import Path
from random import choices, randint
from uuid import uuid4
import pytest
-from opendal.exceptions import NotFound
+from opendal.exceptions import ConditionNotMatch, NotFound
@pytest.mark.need_capability("read", "write", "delete")
@@ -73,6 +74,18 @@ def test_sync_reader(service_name, operator, async_operator):
reader.readinto(buf)
assert buf == content[:1]
+ range_start = randint(0, len(content) - 1)
+ range_end = randint(range_start, len(content) - 1)
+
+ with operator.open(filename, "rb", offset=range_start, size=range_end) as
reader:
+ assert reader.readable()
+ assert not reader.writable()
+ assert not reader.closed
+
+ read_content = reader.read()
+ assert read_content is not None
+ assert read_content == content[range_start:range_end]
+
operator.delete(filename)
@@ -142,6 +155,10 @@ async def test_async_reader(service_name, operator,
async_operator):
await async_operator.write(filename, content)
async with await async_operator.open(filename, "rb") as reader:
+ assert await reader.readable()
+ assert not await reader.writable()
+ assert not await reader.closed
+
read_content = await reader.read()
assert read_content is not None
assert read_content == content
@@ -158,6 +175,20 @@ async def test_async_reader(service_name, operator,
async_operator):
assert read_content is not None
assert read_content == content
+ range_start = randint(0, len(content) - 1)
+ range_end = randint(range_start, len(content) - 1)
+
+ async with await async_operator.open(
+ filename, "rb", offset=range_start, size=range_end
+ ) as reader:
+ assert await reader.readable()
+ assert not await reader.writable()
+ assert not await reader.closed
+
+ read_content = await reader.read()
+ assert read_content is not None
+ assert read_content == content[range_start:range_end]
+
await async_operator.delete(filename)
@@ -222,3 +253,32 @@ def test_sync_read_not_exists(service_name, operator,
async_operator):
async def test_async_read_not_exists(service_name, operator, async_operator):
with pytest.raises(NotFound):
await async_operator.read(str(uuid4()))
+
+
[email protected]_capability(
+ "read", "read_with_if_modified_since", "read_with_if_unmodified_since"
+)
+def test_sync_conditional_reads(service_name, operator):
+ path = f"random_file_{str(uuid4())}"
+ content = b"test data"
+ operator.write(path, content)
+
+ metadata = operator.stat(path)
+ mod_time = metadata.last_modified
+ assert mod_time is not None
+
+ # Large delta: 1 minute earlier
+ before = mod_time - timedelta(minutes=1)
+ after = mod_time + timedelta(seconds=10)
+
+ # Should succeed: file was modified after `before`
+ assert operator.read(path, if_modified_since=before) == content
+
+ # Should succeed: file was unmodified since `after`
+ assert operator.read(path, if_unmodified_since=after) == content
+
+ # Should fail: file was modified after `before`
+ with pytest.raises(ConditionNotMatch):
+ operator.read(path, if_unmodified_since=before)
+
+ operator.delete(path)
diff --git a/bindings/python/tests/test_sync_delete.py
b/bindings/python/tests/test_sync_delete.py
index 25e9d77d2..0260bf826 100644
--- a/bindings/python/tests/test_sync_delete.py
+++ b/bindings/python/tests/test_sync_delete.py
@@ -23,9 +23,7 @@ import pytest
from opendal.exceptions import NotFound
[email protected]_capability(
- "read", "write", "delete", "list", "create_dir"
-)
[email protected]_capability("read", "write", "delete", "list", "create_dir")
def test_sync_remove_all(service_name, operator, async_operator):
parent = f"random_dir_{str(uuid4())}"
excepted = [
diff --git a/bindings/python/tests/test_sync_exists.py
b/bindings/python/tests/test_sync_exists.py
index f1e5a6f81..7c591f598 100644
--- a/bindings/python/tests/test_sync_exists.py
+++ b/bindings/python/tests/test_sync_exists.py
@@ -21,9 +21,7 @@ from uuid import uuid4
import pytest
[email protected]_capability(
- "read", "write", "delete", "list", "create_dir"
-)
[email protected]_capability("read", "write", "delete", "list", "create_dir")
def test_sync_exists(service_name, operator, async_operator):
content = os.urandom(1024)
target = f"random_{str(uuid4())}"
diff --git a/bindings/python/tests/test_sync_list.py
b/bindings/python/tests/test_sync_list.py
index 75d416133..aa21ddeab 100644
--- a/bindings/python/tests/test_sync_list.py
+++ b/bindings/python/tests/test_sync_list.py
@@ -44,9 +44,7 @@ def test_sync_list_with_start_after(service_name, operator,
async_operator):
start_after_file = files_to_create[2] # e.g., test_dir/file_2
entries_after = []
# Note: start_after expects the *full path* relative to the operator root
- for entry in operator.list(
- test_dir, start_after=start_after_file
- ):
+ for entry in operator.list(test_dir, start_after=start_after_file):
entries_after.append(entry.path)
entries_after.sort() # Ensure order
diff --git a/bindings/python/tests/test_write.py
b/bindings/python/tests/test_write.py
index ef651c8d1..571d06344 100644
--- a/bindings/python/tests/test_write.py
+++ b/bindings/python/tests/test_write.py
@@ -176,6 +176,23 @@ async def test_async_writer(service_name, operator,
async_operator):
await async_operator.stat(filename)
[email protected]
[email protected]_capability("write", "delete", "write_with_if_not_exists")
+async def test_async_writer_options(service_name, operator, async_operator):
+ size = randint(1, 1024)
+ filename = f"test_file_{str(uuid4())}.txt"
+ content = os.urandom(size)
+ f = await async_operator.open(filename, "wb")
+ written_bytes = await f.write(content)
+ assert written_bytes == size
+ await f.close()
+
+ with pytest.raises(Exception) as excinfo:
+ async with await async_operator.open(filename, "wb",
if_not_exists=True) as w:
+ w.write(content)
+ assert "ConditionNotMatch" in str(excinfo.value)
+
+
@pytest.mark.need_capability("write", "delete")
def test_sync_writer(service_name, operator, async_operator):
size = randint(1, 1024)
@@ -188,3 +205,19 @@ def test_sync_writer(service_name, operator,
async_operator):
operator.delete(filename)
with pytest.raises(NotFound):
operator.stat(filename)
+
+
[email protected]_capability("write", "delete", "write_with_if_not_exists")
+def test_sync_writer_options(service_name, operator, async_operator):
+ size = randint(1, 1024)
+ filename = f"test_file_{str(uuid4())}.txt"
+ content = os.urandom(size)
+ f = operator.open(filename, "wb")
+ written_bytes = f.write(content)
+ assert written_bytes == size
+ f.close()
+
+ with pytest.raises(Exception) as excinfo:
+ with operator.open(filename, "wb", if_not_exists=True) as w:
+ w.write(content)
+ assert "ConditionNotMatch" in str(excinfo.value)