This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 0724b7f  docs: add in-code docs to hudi-core APIs (#166)
0724b7f is described below

commit 0724b7fda377ca5166cced9a2e6493346513befe
Author: Shiyan Xu <[email protected]>
AuthorDate: Sat Oct 12 11:03:31 2024 -1000

    docs: add in-code docs to hudi-core APIs (#166)
---
 crates/core/src/config/internal.rs    |  7 ++-----
 crates/core/src/config/mod.rs         |  4 ++++
 crates/core/src/config/read.rs        |  7 ++-----
 crates/core/src/config/table.rs       |  6 ++----
 crates/core/src/config/utils.rs       |  4 ++++
 crates/core/src/file_group/mod.rs     |  2 ++
 crates/core/src/file_group/reader.rs  |  1 +
 crates/core/src/lib.rs                | 11 +++--------
 crates/core/src/storage/file_info.rs  |  1 +
 crates/core/src/storage/file_stats.rs |  1 +
 crates/core/src/storage/utils.rs      |  5 +++++
 crates/core/src/table/fs_view.rs      |  2 ++
 crates/core/src/table/partition.rs    |  7 +++++++
 crates/core/src/table/timeline.rs     |  3 +++
 14 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/crates/core/src/config/internal.rs 
b/crates/core/src/config/internal.rs
index 42641f5..89fd314 100644
--- a/crates/core/src/config/internal.rs
+++ b/crates/core/src/config/internal.rs
@@ -31,14 +31,11 @@ use crate::config::{ConfigParser, HudiConfigValue};
 /// **Example**
 ///
 /// ```rust
-/// use url::Url;
-/// use hudi_core::config::HudiConfigValue;
 /// use hudi_core::config::internal::HudiInternalConfig::SkipConfigValidation;
 /// use hudi_core::table::Table as HudiTable;
 ///
-/// let options = vec![(SkipConfigValidation.as_ref(), 
HudiConfigValue::Boolean(true))];
-/// let base_uri = Url::from_file_path("/tmp/hudi_data").unwrap();
-/// HudiTable::new_with_options(base_uri.as_ref(), options);
+/// let options = [(SkipConfigValidation, "true")];
+/// HudiTable::new_with_options("/tmp/hudi_data", options)
 /// ```
 ///
 #[derive(Clone, Debug, PartialEq, Eq, Hash, EnumIter)]
diff --git a/crates/core/src/config/mod.rs b/crates/core/src/config/mod.rs
index be6136f..7b557b4 100644
--- a/crates/core/src/config/mod.rs
+++ b/crates/core/src/config/mod.rs
@@ -97,6 +97,8 @@ impl HudiConfigValue {
         T::from(self)
     }
 
+    /// A convenience method to convert [HudiConfigValue] to [Url] when the 
value is a [String] and is intended to be a URL.
+    /// Panic if the value is not a [String].
     pub fn to_url(self) -> Result<Url> {
         match self {
             HudiConfigValue::String(v) => parse_uri(&v),
@@ -192,10 +194,12 @@ impl HudiConfigs {
         self.raw_options.as_ref().clone()
     }
 
+    /// Validate the associated config using the given parser by execute the 
[ConfigParser::validate] method.
     pub fn validate(&self, parser: impl ConfigParser<Output = 
HudiConfigValue>) -> Result<()> {
         parser.validate(&self.raw_options)
     }
 
+    /// Check if the given key exists in the configs.
     pub fn contains(&self, key: impl AsRef<str>) -> bool {
         self.raw_options.contains_key(key.as_ref())
     }
diff --git a/crates/core/src/config/read.rs b/crates/core/src/config/read.rs
index 807a0cf..e67617d 100644
--- a/crates/core/src/config/read.rs
+++ b/crates/core/src/config/read.rs
@@ -30,14 +30,11 @@ use strum_macros::EnumIter;
 /// **Example**
 ///
 /// ```rust
-/// use url::Url;
 /// use hudi_core::config::read::HudiReadConfig::{AsOfTimestamp, 
InputPartitions};
 /// use hudi_core::table::Table as HudiTable;
 ///
-/// let options = vec![(InputPartitions.as_ref(), "2"),
-///     (AsOfTimestamp.as_ref(), "20240101010100000")];
-/// let base_uri = Url::from_file_path("/tmp/hudi_data").unwrap();
-/// HudiTable::new_with_options(base_uri.as_ref(), options);
+/// let options = [(InputPartitions, "2"), (AsOfTimestamp, 
"20240101010100000")];
+/// HudiTable::new_with_options("/tmp/hudi_data", options)
 /// ```
 ///
 #[derive(Clone, Debug, PartialEq, Eq, Hash, EnumIter)]
diff --git a/crates/core/src/config/table.rs b/crates/core/src/config/table.rs
index 107e382..98cca7d 100644
--- a/crates/core/src/config/table.rs
+++ b/crates/core/src/config/table.rs
@@ -32,13 +32,11 @@ use crate::config::{ConfigParser, HudiConfigValue};
 /// **Example**
 ///
 /// ```rust
-/// use url::Url;
 /// use hudi_core::config::table::HudiTableConfig::BaseFileFormat;
 /// use hudi_core::table::Table as HudiTable;
 ///
-/// let options = vec![(BaseFileFormat.as_ref(), "parquet")];
-/// let base_uri = Url::from_file_path("/tmp/hudi_data").unwrap();
-/// HudiTable::new_with_options(base_uri.as_ref(), options);
+/// let options = [(BaseFileFormat, "parquet")];
+/// HudiTable::new_with_options("/tmp/hudi_data", options);
 /// ```
 #[derive(Clone, Debug, PartialEq, Eq, Hash, EnumIter)]
 pub enum HudiTableConfig {
diff --git a/crates/core/src/config/utils.rs b/crates/core/src/config/utils.rs
index 98ea6b1..800a81d 100644
--- a/crates/core/src/config/utils.rs
+++ b/crates/core/src/config/utils.rs
@@ -16,16 +16,19 @@
  * specific language governing permissions and limitations
  * under the License.
  */
+//! Config utilities.
 
 use anyhow::{Context, Result};
 use bytes::Bytes;
 use std::collections::HashMap;
 use std::io::{BufRead, BufReader, Cursor};
 
+/// Returns an empty iterator to represent an empty set of options.
 pub fn empty_options<'a>() -> std::iter::Empty<(&'a str, &'a str)> {
     std::iter::empty::<(&str, &str)>()
 }
 
+/// Splits the given options into two maps: one for Hudi options, and the 
other for others, which could be storage options for example.
 pub fn split_hudi_options_from_others<I, K, V>(
     all_options: I,
 ) -> (HashMap<String, String>, HashMap<String, String>)
@@ -47,6 +50,7 @@ where
     (hudi_options, others)
 }
 
+/// Parses the given data into a map of options.
 pub fn parse_data_for_options(data: &Bytes, split_chars: &str) -> 
Result<HashMap<String, String>> {
     let cursor = Cursor::new(data);
     let lines = BufReader::new(cursor).lines();
diff --git a/crates/core/src/file_group/mod.rs 
b/crates/core/src/file_group/mod.rs
index 6cd1248..db96b24 100644
--- a/crates/core/src/file_group/mod.rs
+++ b/crates/core/src/file_group/mod.rs
@@ -114,6 +114,7 @@ impl FileSlice {
         self.base_file = base_file
     }
 
+    /// Load stats from storage layer for the base file if not already loaded.
     pub async fn load_stats(&mut self, storage: &Storage) -> Result<()> {
         if self.base_file.stats.is_none() {
             let parquet_meta = storage
@@ -135,6 +136,7 @@ impl FileSlice {
     }
 }
 
+/// Hudi File Group.
 #[derive(Clone, Debug)]
 pub struct FileGroup {
     pub id: String,
diff --git a/crates/core/src/file_group/reader.rs 
b/crates/core/src/file_group/reader.rs
index 0c97dd5..1c7d748 100644
--- a/crates/core/src/file_group/reader.rs
+++ b/crates/core/src/file_group/reader.rs
@@ -25,6 +25,7 @@ use anyhow::Result;
 use arrow_array::RecordBatch;
 use std::sync::Arc;
 
+/// File group reader handles all read operations against a file group.
 #[derive(Clone, Debug)]
 pub struct FileGroupReader {
     storage: Arc<Storage>,
diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs
index 3369101..3e15dee 100644
--- a/crates/core/src/lib.rs
+++ b/crates/core/src/lib.rs
@@ -23,14 +23,11 @@
 //! **Example**
 //!
 //! ```rust
-//! use url::Url;
 //! use hudi_core::config::read::HudiReadConfig::{AsOfTimestamp, 
InputPartitions};
 //! use hudi_core::table::Table as HudiTable;
 //!
-//! let options = vec![(InputPartitions.as_ref(), "2"),
-//!     (AsOfTimestamp.as_ref(), "20240101010100000")];
-//! let base_uri = Url::from_file_path("/tmp/hudi_data").unwrap();
-//! HudiTable::new_with_options(base_uri.as_ref(), options);
+//! let options = [(InputPartitions, "2"), (AsOfTimestamp, 
"20240101010100000")];
+//! HudiTable::new_with_options("/tmp/hudi_data", options);
 //! ```
 //!
 //! # The [table] module is responsible for managing Hudi tables.
@@ -39,12 +36,10 @@
 //!
 //! create hudi table
 //! ```rust
-//! use url::Url;
 //! use hudi_core::table::Table;
 //!
 //! pub async fn test() {
-//!     let base_uri = Url::from_file_path("/tmp/hudi_data").unwrap();
-//!     let hudi_table = Table::new(base_uri.path()).await.unwrap();
+//!     let hudi_table = Table::new("/tmp/hudi_data").await.unwrap();
 //! }
 //! ```
 
diff --git a/crates/core/src/storage/file_info.rs 
b/crates/core/src/storage/file_info.rs
index 8a77048..a6f1e05 100644
--- a/crates/core/src/storage/file_info.rs
+++ b/crates/core/src/storage/file_info.rs
@@ -17,6 +17,7 @@
  * under the License.
  */
 
+/// File info that can be retrieved by listing operations without reading the 
file.
 #[derive(Clone, Debug, Default, Eq, PartialEq)]
 pub struct FileInfo {
     pub uri: String,
diff --git a/crates/core/src/storage/file_stats.rs 
b/crates/core/src/storage/file_stats.rs
index b0d2bcb..65fe1c5 100644
--- a/crates/core/src/storage/file_stats.rs
+++ b/crates/core/src/storage/file_stats.rs
@@ -17,6 +17,7 @@
  * under the License.
  */
 
+/// File stats that can be retrieved by reading the file's metadata.
 #[derive(Clone, Debug, Default)]
 pub struct FileStats {
     pub num_records: i64,
diff --git a/crates/core/src/storage/utils.rs b/crates/core/src/storage/utils.rs
index ba670f2..2413613 100644
--- a/crates/core/src/storage/utils.rs
+++ b/crates/core/src/storage/utils.rs
@@ -16,12 +16,14 @@
  * specific language governing permissions and limitations
  * under the License.
  */
+//! Utility functions for storage.
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
 
 use anyhow::{anyhow, Result};
 use url::{ParseError, Url};
 
+/// Splits a filename into a stem and an extension.
 pub fn split_filename(filename: &str) -> Result<(String, String)> {
     let path = Path::new(filename);
 
@@ -40,6 +42,7 @@ pub fn split_filename(filename: &str) -> Result<(String, 
String)> {
     Ok((stem, extension))
 }
 
+/// Parses a URI string into a URL.
 pub fn parse_uri(uri: &str) -> Result<Url> {
     let mut url = Url::parse(uri)
         .or(Url::from_file_path(PathBuf::from_str(uri)?))
@@ -54,10 +57,12 @@ pub fn parse_uri(uri: &str) -> Result<Url> {
     Ok(url)
 }
 
+/// Returns the scheme and authority of a URL in the form of 
`scheme://authority`.
 pub fn get_scheme_authority(url: &Url) -> String {
     format!("{}://{}", url.scheme(), url.authority())
 }
 
+/// Joins a base URL with a list of segments.
 pub fn join_url_segments(base_url: &Url, segments: &[&str]) -> Result<Url> {
     let mut url = base_url.clone();
 
diff --git a/crates/core/src/table/fs_view.rs b/crates/core/src/table/fs_view.rs
index 2278d6c..b67534a 100644
--- a/crates/core/src/table/fs_view.rs
+++ b/crates/core/src/table/fs_view.rs
@@ -29,6 +29,8 @@ use anyhow::Result;
 use dashmap::DashMap;
 use futures::stream::{self, StreamExt, TryStreamExt};
 
+/// A view of the Hudi table's data files (files stored outside the `.hoodie/` 
directory) in the file system. It provides APIs to load and
+/// access the file groups and file slices.
 #[derive(Clone, Debug)]
 #[allow(dead_code)]
 pub struct FileSystemView {
diff --git a/crates/core/src/table/partition.rs 
b/crates/core/src/table/partition.rs
index 7dcf482..16b9a99 100644
--- a/crates/core/src/table/partition.rs
+++ b/crates/core/src/table/partition.rs
@@ -30,6 +30,7 @@ use std::collections::HashMap;
 use std::str::FromStr;
 use std::sync::Arc;
 
+/// A partition pruner that filters partitions based on the partition path and 
its filters.
 #[derive(Debug, Clone)]
 pub struct PartitionPruner {
     schema: Arc<Schema>,
@@ -64,6 +65,7 @@ impl PartitionPruner {
         })
     }
 
+    /// Creates an empty partition pruner that does not filter any partitions.
     pub fn empty() -> Self {
         PartitionPruner {
             schema: Arc::new(Schema::empty()),
@@ -73,10 +75,12 @@ impl PartitionPruner {
         }
     }
 
+    /// Returns `true` if the partition pruner does not have any filters.
     pub fn is_empty(&self) -> bool {
         self.and_filters.is_empty()
     }
 
+    /// Returns `true` if the partition path should be included based on the 
filters.
     pub fn should_include(&self, partition_path: &str) -> bool {
         let segments = match self.parse_segments(partition_path) {
             Ok(s) => s,
@@ -151,6 +155,7 @@ impl PartitionPruner {
     }
 }
 
+/// An operator that represents a comparison operation used in a partition 
filter expression.
 #[derive(Debug, Clone, Copy, PartialEq)]
 enum Operator {
     Eq,
@@ -171,6 +176,7 @@ impl Operator {
         (">=", Operator::Gte),
     ];
 
+    /// Returns the supported operator tokens. Note that the tokens are sorted 
by length in descending order to facilitate parsing.
     fn supported_tokens() -> &'static [&'static str] {
         static TOKENS: Lazy<Vec<&'static str>> = Lazy::new(|| {
             let mut tokens: Vec<&'static str> = Operator::TOKEN_OP_PAIRS
@@ -195,6 +201,7 @@ impl FromStr for Operator {
     }
 }
 
+/// A partition filter that represents a filter expression for partition 
pruning.
 #[derive(Debug, Clone)]
 pub struct PartitionFilter {
     field: Field,
diff --git a/crates/core/src/table/timeline.rs 
b/crates/core/src/table/timeline.rs
index 6dc9df5..2993c95 100644
--- a/crates/core/src/table/timeline.rs
+++ b/crates/core/src/table/timeline.rs
@@ -33,6 +33,7 @@ use crate::file_group::FileGroup;
 use crate::storage::utils::split_filename;
 use crate::storage::Storage;
 
+/// The [State] of an [Instant] represents the status of the action performed 
on the table.
 #[allow(dead_code)]
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum State {
@@ -41,6 +42,7 @@ pub enum State {
     Completed,
 }
 
+/// An [Instant] represents a point in time when an action was performed on 
the table.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Instant {
     state: State,
@@ -87,6 +89,7 @@ impl Instant {
     }
 }
 
+/// A [Timeline] contains transaction logs of all actions performed on the 
table at different [Instant]s of time.
 #[derive(Clone, Debug)]
 #[allow(dead_code)]
 pub struct Timeline {

Reply via email to