This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 2d0ee12 docs: add python binding docstrings (#169)
2d0ee12 is described below
commit 2d0ee12cd0bcc0ea86c10275271f37518c34eb9b
Author: kazdy <[email protected]>
AuthorDate: Mon Oct 14 19:43:02 2024 +0200
docs: add python binding docstrings (#169)
---
python/hudi/_internal.pyi | 167 ++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 154 insertions(+), 13 deletions(-)
diff --git a/python/hudi/_internal.pyi b/python/hudi/_internal.pyi
index d296821..ccb8d1c 100644
--- a/python/hudi/_internal.pyi
+++ b/python/hudi/_internal.pyi
@@ -23,13 +23,52 @@ __version__: str
@dataclass(init=False)
class HudiFileGroupReader:
- def __init__(self, base_uri: str, options: Optional[Dict[str, str]] =
None): ...
+ """
+ A reader for a group of Hudi file slices. Allows reading of records from
the base file in a Hudi table.
+
+ Attributes:
+ base_uri (str): The base URI of the Hudi table.
+ options (Optional[Dict[str, str]]): Additional options for reading the
file group.
+ """
+ def __init__(self, base_uri: str, options: Optional[Dict[str, str]] =
None):
+ """
+ Initializes the HudiFileGroupReader.
+
+ Parameters:
+ base_uri (str): The base URI of the Hudi table.
+ options (Optional[Dict[str, str]]): Additional configuration
options (optional).
+ """
+ ...
def read_file_slice_by_base_file_path(
self, relative_path: str
- ) -> "pyarrow.RecordBatch": ...
+ ) -> "pyarrow.RecordBatch":
+ """
+ Reads the data from the base file given a relative path.
+
+ Parameters:
+ relative_path (str): The relative path to the base file.
+
+ Returns:
+ pyarrow.RecordBatch: A batch of records read from the base file.
+ """
+ ...
@dataclass(init=False)
class HudiFileSlice:
+ """
+ Represents a file slice in a Hudi table. A file slice includes information
about the base file,
+ the partition it belongs to, and associated metadata.
+
+ Attributes:
+ file_group_id (str): The ID of the file group this slice belongs to.
+ partition_path (str): The path of the partition containing this file
slice.
+ commit_time (str): The commit time of this file slice.
+ base_file_name (str): The name of the base file.
+ base_file_size (int): The size of the base file.
+ num_records (int): The number of records in the base file.
+ size_bytes (int): The size of the file slice in bytes.
+ """
+
file_group_id: str
partition_path: str
commit_time: str
@@ -38,31 +77,133 @@ class HudiFileSlice:
num_records: int
size_bytes: int
- def base_file_relative_path(self) -> str: ...
+ def base_file_relative_path(self) -> str:
+ """
+ Returns the relative path of the base file for this file slice.
+
+ Returns:
+ str: The relative path of the base file.
+ """
+ ...
@dataclass(init=False)
class HudiTable:
+ """
+ Represents a Hudi table and provides methods to interact with it.
+
+ Attributes:
+ base_uri (str): The base URI of the Hudi table.
+ options (Optional[Dict[str, str]]): Additional options for table
operations.
+ """
+
def __init__(
self,
base_uri: str,
options: Optional[Dict[str, str]] = None,
- ): ...
- def hudi_options(self) -> Dict[str, str]: ...
- def storage_options(self) -> Dict[str, str]: ...
- def get_schema(self) -> "pyarrow.Schema": ...
- def get_partition_schema(self) -> "pyarrow.Schema": ...
+ ):
+ """
+ Initializes the HudiTable.
+
+ Parameters:
+ base_uri (str): The base URI of the Hudi table.
+ options (Optional[Dict[str, str]]): Additional configuration
options (optional).
+ """
+ ...
+ def get_schema(self) -> "pyarrow.Schema":
+ """
+ Returns the schema of the Hudi table.
+
+ Returns:
+ pyarrow.Schema: The schema of the table.
+ """
+ ...
+ def get_partition_schema(self) -> "pyarrow.Schema":
+ """
+ Returns the partition schema of the Hudi table.
+
+ Returns:
+ pyarrow.Schema: The schema used for partitioning the table.
+ """
+ ...
+ def hudi_options(self) -> Dict[str, str]:
+ """
+ Get hudi options for table.
+
+ Returns:
+ Dict[str, str]: A dictionary of hudi options.
+ """
+ ...
+ def storage_options(self) -> Dict[str, str]:
+ """
+ Get storage options set for table instance.
+
+ Returns:
+ Dict[str, str]: A dictionary of storage options.
+ """
+ ...
def split_file_slices(
self, n: int, filters: Optional[List[str]]
- ) -> List[List[HudiFileSlice]]: ...
- def get_file_slices(self, filters: Optional[List[str]]) ->
List[HudiFileSlice]: ...
- def create_file_group_reader(self) -> HudiFileGroupReader: ...
+ ) -> List[List[HudiFileSlice]]:
+ """
+ Splits the file slices into 'n' parts, optionally filtered by given
filters.
+
+ Parameters:
+ n (int): The number of parts to split the file slices into.
+ filters (Optional[List[str]]): Optional filters for selecting file
slices.
+
+ Returns:
+ List[List[HudiFileSlice]]: A list of file slice groups, each group
being a list of HudiFileSlice objects.
+ """
+ ...
+ def get_file_slices(self, filters: Optional[List[str]]) ->
List[HudiFileSlice]:
+ """
+ Retrieves all file slices in the Hudi table, optionally filtered by
the provided filters.
+
+ Parameters:
+ filters (Optional[List[str]]): Optional filters for selecting file
slices.
+
+ Returns:
+ List[HudiFileSlice]: A list of file slices matching the filters.
+ """
+ ...
+ def create_file_group_reader(self) -> HudiFileGroupReader:
+ """
+ Creates a HudiFileGroupReader for reading records from file groups in
the Hudi table.
+
+ Returns:
+ HudiFileGroupReader: A reader object for reading file groups.
+ """
+ ...
def read_snapshot(
self, filters: Optional[List[str]]
- ) -> List["pyarrow.RecordBatch"]: ...
+ ) -> List["pyarrow.RecordBatch"]:
+ """
+ Reads the latest snapshot of the Hudi table, optionally filtered by
the provided filters.
+
+ Parameters:
+ filters (Optional[List[str]]): Optional filters for selecting file
slices.
+
+ Returns:
+ List[pyarrow.RecordBatch]: A list of record batches from the
snapshot of the table.
+ """
+ ...
def build_hudi_table(
base_uri: str,
hudi_options: Optional[Dict[str, str]] = None,
storage_options: Optional[Dict[str, str]] = None,
options: Optional[Dict[str, str]] = None,
-) -> HudiTable: ...
+) -> HudiTable:
+ """
+ Builds hudi table from base_uri and options.
+
+ Parameters:
+ base_uri (str): location of a hudi table.
+ hudi_options (Optional[Dict[str, str]]): hudi options.
+ storage_options (Optional[Dict[str, str]]): storage_options.
+ options (Optional[Dict[str, str]]): hudi or storage options.
+
+ Returns:
+ HudiTable: An instance of hudi table.
+ """
+ ...