Copilot commented on code in PR #499:
URL: https://github.com/apache/hudi-rs/pull/499#discussion_r2638286476
##########
crates/core/src/table/mod.rs:
##########
@@ -213,6 +214,142 @@ impl Table {
self.table_type() == TableTypeValue::MergeOnRead.as_ref()
}
+ /// Check if this table is a metadata table.
+ ///
+ /// Detection is based on the base path ending with `.hoodie/metadata`.
+ pub fn is_metadata_table(&self) -> bool {
+ let base_path: String = self
+ .hudi_configs
+ .get_or_default(HudiTableConfig::BasePath)
+ .into();
+ base_path
+ .trim_end_matches('/')
+ .ends_with(".hoodie/metadata")
+ }
+
+ /// Get the list of available metadata table partitions for this table.
+ ///
+ /// Returns the partitions configured in
`hoodie.table.metadata.partitions`.
+ pub fn get_metadata_table_partitions(&self) -> Vec<String> {
+ self.hudi_configs
+ .get_or_default(MetadataTablePartitions)
+ .into()
+ }
+
+ /// Create a metadata table instance for this data table.
+ ///
+ /// Uses all partitions from `hoodie.table.metadata.partitions`
configuration.
+ /// To filter specific partitions, use
[`new_metadata_table_with_partitions`].
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if the metadata table cannot be created or if there
are
+ /// no metadata table partitions configured.
+ pub async fn new_metadata_table(&self) -> Result<Table> {
+ let mdt_partitions = self.get_metadata_table_partitions();
+ if mdt_partitions.is_empty() {
+ return Err(CoreError::MetadataTable(
+ "No metadata table partitions configured".to_string(),
+ ));
+ }
+ self.create_metadata_table_with_partitions(mdt_partitions)
+ .await
+ }
+
+ /// Same as [Table::new_metadata_table], but blocking.
+ pub fn new_metadata_table_blocking(&self) -> Result<Table> {
+ tokio::runtime::Builder::new_current_thread()
+ .enable_all()
+ .build()?
+ .block_on(async { self.new_metadata_table().await })
+ }
+
+ /// Create a metadata table instance with specific partitions.
+ ///
+ /// Only the specified partitions will be available in the returned table
instance.
+ /// This is useful when you only need to read from specific MDT partitions
+ /// (e.g., only "files" partition for file listing).
+ ///
+ /// # Arguments
+ ///
+ /// * `partitions` - Iterator of partition names to include (e.g.,
`["files"]`)
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if any of the specified partitions are not valid
+ /// (i.e., not in `hoodie.table.metadata.partitions`).
+ pub async fn new_metadata_table_with_partitions<I, S>(
+ &self,
+ partitions: I,
+ ) -> Result<Table>
+ where
+ I: IntoIterator<Item = S>,
+ S: AsRef<str>,
+ {
+ let available_partitions: HashSet<String> =
+ self.get_metadata_table_partitions().into_iter().collect();
+
+ if available_partitions.is_empty() {
+ return Err(CoreError::MetadataTable(
+ "No metadata table partitions configured".to_string(),
+ ));
+ }
+
+ let requested: Vec<String> = partitions
+ .into_iter()
+ .map(|s| s.as_ref().to_string())
+ .collect();
+
+ // Validate all requested partitions exist
+ let invalid: Vec<&str> = requested
+ .iter()
+ .filter(|p| !available_partitions.contains(*p))
+ .map(|s| s.as_str())
+ .collect();
+
+ if !invalid.is_empty() {
+ return Err(CoreError::MetadataTable(format!(
+ "Invalid metadata table partitions: {:?}. Available: {:?}",
+ invalid,
+ available_partitions.iter().collect::<Vec<_>>()
+ )));
+ }
Review Comment:
When an empty partitions iterator is passed to
`new_metadata_table_with_partitions`, the method will pass validation (since no
invalid partitions exist) and call `create_metadata_table_with_partitions` with
an empty vector. This will result in creating a metadata table with an empty
partition fields configuration, which may not be the intended behavior.
Consider adding a check to return an error if the requested partitions list is
empty, similar to the check for available_partitions.
##########
crates/core/src/table/mod.rs:
##########
@@ -213,6 +214,142 @@ impl Table {
self.table_type() == TableTypeValue::MergeOnRead.as_ref()
}
+ /// Check if this table is a metadata table.
+ ///
+ /// Detection is based on the base path ending with `.hoodie/metadata`.
+ pub fn is_metadata_table(&self) -> bool {
+ let base_path: String = self
+ .hudi_configs
+ .get_or_default(HudiTableConfig::BasePath)
+ .into();
+ base_path
+ .trim_end_matches('/')
+ .ends_with(".hoodie/metadata")
+ }
+
+ /// Get the list of available metadata table partitions for this table.
+ ///
+ /// Returns the partitions configured in
`hoodie.table.metadata.partitions`.
+ pub fn get_metadata_table_partitions(&self) -> Vec<String> {
+ self.hudi_configs
+ .get_or_default(MetadataTablePartitions)
+ .into()
+ }
+
+ /// Create a metadata table instance for this data table.
+ ///
+ /// Uses all partitions from `hoodie.table.metadata.partitions`
configuration.
+ /// To filter specific partitions, use
[`new_metadata_table_with_partitions`].
Review Comment:
The documentation reference is incorrect. It should be
`Table::new_metadata_table_with_partitions` instead of just
`new_metadata_table_with_partitions` for proper Rust documentation linking.
```suggestion
/// To filter specific partitions, use
[`Table::new_metadata_table_with_partitions`].
```
##########
crates/core/src/config/table.rs:
##########
@@ -128,6 +128,20 @@ pub enum HudiTableConfig {
/// Path for LSM timeline history for layout v2, relative to timeline path
(default: history)
/// The full path will be `.hoodie/{TimelinePath}/{TimelineHistoryPath}`
TimelineHistoryPath,
+
+ /// Enable the internal metadata table which serves table metadata like
level file listings.
Review Comment:
The documentation mentions "level file listings" which appears to be a typo.
It should likely be "table file listings" or just "file listings" to be
consistent with the rest of the codebase terminology.
```suggestion
/// Enable the internal metadata table which serves table metadata like
file listings.
```
##########
crates/core/src/table/validation.rs:
##########
@@ -68,6 +70,25 @@ pub fn validate_configs(hudi_configs: &HudiConfigs) ->
crate::error::Result<()>
)));
}
+ // Validate HFile format is only used for metadata tables
+ if let Ok(base_file_format_str) = hudi_configs.get(BaseFileFormat) {
+ let format_str: String = base_file_format_str.into();
+ if let Ok(format) = BaseFileFormatValue::from_str(&format_str) {
+ if format.is_metadata_table_only() {
+ let base_path: String =
hudi_configs.get_or_default(BasePath).into();
+ let is_mdt = base_path
+ .trim_end_matches('/')
+ .ends_with(".hoodie/metadata");
Review Comment:
The metadata table detection logic is duplicated across multiple locations
(here in validation.rs, in Table::is_metadata_table(), and in
FileGroupReader::is_metadata_table()). Consider extracting this logic into a
shared utility function to maintain consistency and reduce duplication.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]