This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 2d0ee12  docs: add python binding docstrings (#169)
2d0ee12 is described below

commit 2d0ee12cd0bcc0ea86c10275271f37518c34eb9b
Author: kazdy <[email protected]>
AuthorDate: Mon Oct 14 19:43:02 2024 +0200

    docs: add python binding docstrings (#169)
---
 python/hudi/_internal.pyi | 167 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 154 insertions(+), 13 deletions(-)

diff --git a/python/hudi/_internal.pyi b/python/hudi/_internal.pyi
index d296821..ccb8d1c 100644
--- a/python/hudi/_internal.pyi
+++ b/python/hudi/_internal.pyi
@@ -23,13 +23,52 @@ __version__: str
 
 @dataclass(init=False)
 class HudiFileGroupReader:
-    def __init__(self, base_uri: str, options: Optional[Dict[str, str]] = 
None): ...
+    """
+    A reader for a group of Hudi file slices. Allows reading of records from 
the base file in a Hudi table.
+
+    Attributes:
+        base_uri (str): The base URI of the Hudi table.
+        options (Optional[Dict[str, str]]): Additional options for reading the 
file group.
+    """
+    def __init__(self, base_uri: str, options: Optional[Dict[str, str]] = 
None):
+        """
+        Initializes the HudiFileGroupReader.
+
+        Parameters:
+            base_uri (str): The base URI of the Hudi table.
+            options (Optional[Dict[str, str]]): Additional configuration 
options (optional).
+        """
+        ...
     def read_file_slice_by_base_file_path(
         self, relative_path: str
-    ) -> "pyarrow.RecordBatch": ...
+    ) -> "pyarrow.RecordBatch":
+        """
+        Reads the data from the base file given a relative path.
+
+        Parameters:
+            relative_path (str): The relative path to the base file.
+
+        Returns:
+            pyarrow.RecordBatch: A batch of records read from the base file.
+        """
+        ...
 
 @dataclass(init=False)
 class HudiFileSlice:
+    """
+    Represents a file slice in a Hudi table. A file slice includes information 
about the base file,
+    the partition it belongs to, and associated metadata.
+
+    Attributes:
+        file_group_id (str): The ID of the file group this slice belongs to.
+        partition_path (str): The path of the partition containing this file 
slice.
+        commit_time (str): The commit time of this file slice.
+        base_file_name (str): The name of the base file.
+        base_file_size (int): The size of the base file.
+        num_records (int): The number of records in the base file.
+        size_bytes (int): The size of the file slice in bytes.
+    """
+
     file_group_id: str
     partition_path: str
     commit_time: str
@@ -38,31 +77,133 @@ class HudiFileSlice:
     num_records: int
     size_bytes: int
 
-    def base_file_relative_path(self) -> str: ...
+    def base_file_relative_path(self) -> str:
+        """
+        Returns the relative path of the base file for this file slice.
+
+        Returns:
+            str: The relative path of the base file.
+        """
+        ...
 
 @dataclass(init=False)
 class HudiTable:
+    """
+    Represents a Hudi table and provides methods to interact with it.
+
+    Attributes:
+        base_uri (str): The base URI of the Hudi table.
+        options (Optional[Dict[str, str]]): Additional options for table 
operations.
+    """
+
     def __init__(
         self,
         base_uri: str,
         options: Optional[Dict[str, str]] = None,
-    ): ...
-    def hudi_options(self) -> Dict[str, str]: ...
-    def storage_options(self) -> Dict[str, str]: ...
-    def get_schema(self) -> "pyarrow.Schema": ...
-    def get_partition_schema(self) -> "pyarrow.Schema": ...
+    ):
+        """
+        Initializes the HudiTable.
+
+        Parameters:
+            base_uri (str): The base URI of the Hudi table.
+            options (Optional[Dict[str, str]]): Additional configuration 
options (optional).
+        """
+        ...
+    def get_schema(self) -> "pyarrow.Schema":
+        """
+        Returns the schema of the Hudi table.
+
+        Returns:
+            pyarrow.Schema: The schema of the table.
+        """
+        ...
+    def get_partition_schema(self) -> "pyarrow.Schema":
+        """
+        Returns the partition schema of the Hudi table.
+
+        Returns:
+            pyarrow.Schema: The schema used for partitioning the table.
+        """
+        ...
+    def hudi_options(self) -> Dict[str, str]:
+        """
+        Get hudi options for table.
+
+        Returns:
+            Dict[str, str]: A dictionary of hudi options.
+        """
+        ...
+    def storage_options(self) -> Dict[str, str]:
+        """
+        Get storage options set for table instance.
+
+        Returns:
+            Dict[str, str]: A dictionary of storage options.
+        """
+        ...
     def split_file_slices(
         self, n: int, filters: Optional[List[str]]
-    ) -> List[List[HudiFileSlice]]: ...
-    def get_file_slices(self, filters: Optional[List[str]]) -> 
List[HudiFileSlice]: ...
-    def create_file_group_reader(self) -> HudiFileGroupReader: ...
+    ) -> List[List[HudiFileSlice]]:
+        """
+        Splits the file slices into 'n' parts, optionally filtered by given 
filters.
+
+        Parameters:
+            n (int): The number of parts to split the file slices into.
+            filters (Optional[List[str]]): Optional filters for selecting file 
slices.
+
+        Returns:
+            List[List[HudiFileSlice]]: A list of file slice groups, each group 
being a list of HudiFileSlice objects.
+        """
+        ...
+    def get_file_slices(self, filters: Optional[List[str]]) -> 
List[HudiFileSlice]:
+        """
+        Retrieves all file slices in the Hudi table, optionally filtered by 
the provided filters.
+
+        Parameters:
+            filters (Optional[List[str]]): Optional filters for selecting file 
slices.
+
+        Returns:
+            List[HudiFileSlice]: A list of file slices matching the filters.
+        """
+        ...
+    def create_file_group_reader(self) -> HudiFileGroupReader:
+        """
+        Creates a HudiFileGroupReader for reading records from file groups in 
the Hudi table.
+
+        Returns:
+            HudiFileGroupReader: A reader object for reading file groups.
+        """
+        ...
     def read_snapshot(
         self, filters: Optional[List[str]]
-    ) -> List["pyarrow.RecordBatch"]: ...
+    ) -> List["pyarrow.RecordBatch"]:
+        """
+        Reads the latest snapshot of the Hudi table, optionally filtered by 
the provided filters.
+
+        Parameters:
+            filters (Optional[List[str]]): Optional filters for selecting file 
slices.
+
+        Returns:
+            List[pyarrow.RecordBatch]: A list of record batches from the 
snapshot of the table.
+        """
+        ...
 
 def build_hudi_table(
     base_uri: str,
     hudi_options: Optional[Dict[str, str]] = None,
     storage_options: Optional[Dict[str, str]] = None,
     options: Optional[Dict[str, str]] = None,
-) -> HudiTable: ...
+) -> HudiTable:
+    """
+    Builds hudi table from base_uri and options.
+
+    Parameters:
+        base_uri (str): location of a hudi table.
+        hudi_options (Optional[Dict[str, str]]): hudi options.
+        storage_options (Optional[Dict[str, str]]): storage_options.
+        options (Optional[Dict[str, str]]): hudi or storage options.
+
+    Returns:
+        HudiTable: An instance of hudi table.
+    """
+    ...

Reply via email to