Copilot commented on code in PR #505:
URL: https://github.com/apache/hudi-rs/pull/505#discussion_r2656955186
##########
crates/core/tests/table_read_tests.rs:
##########
@@ -550,3 +550,140 @@ mod v8_tables {
}
}
}
+
+/// Test module for tables with metadata table (MDT) enabled.
+/// These tests verify MDT-accelerated file listing and partition
normalization.
+mod mdt_enabled_tables {
+ use super::*;
+ use hudi_core::table::partition::PartitionPruner;
+
+ mod snapshot_queries {
+ use super::*;
+
+ /// Test reading a V8 MOR non-partitioned table with MDT enabled.
+ /// Verifies:
+ /// 1. Table can be read correctly via MDT file listing
+ /// 2. MDT partition key normalization ("." -> "") works correctly
+ /// 3. File slices are retrieved correctly from MDT
+ #[test]
+ fn test_v8_nonpartitioned_with_mdt() -> Result<()> {
+ let base_url = SampleTableMdt::V8Nonpartitioned.url_to_mor_avro();
+ let hudi_table = Table::new_blocking(base_url.path())?;
+
+ // Verify MDT is enabled
+ assert!(
+ hudi_table.is_metadata_table_enabled(),
+ "Metadata table should be enabled"
+ );
+
+ // Get file slices - this uses MDT file listing
+ let file_slices =
hudi_table.get_file_slices_blocking(empty_filters())?;
+
+ // Should have file slices for the non-partitioned table
+ assert!(
+ !file_slices.is_empty(),
+ "Should have file slices from MDT listing"
+ );
+
+ // All file slices should be in the root partition (empty string)
+ for fs in &file_slices {
+ assert_eq!(
+ &fs.partition_path, "",
+ "Non-partitioned table should have files in root partition"
+ );
+ }
+
+ Ok(())
+ }
+
+ /// Test MDT partition key normalization for non-partitioned tables.
+ /// The metadata table stores "." as partition key, but external API
should see "".
+ /// For non-partitioned tables, we use a fast path that directly
fetches "." without
+ /// going through __all_partitions__ lookup.
+ #[test]
+ fn test_v8_nonpartitioned_mdt_partition_normalization() -> Result<()> {
+ let base_url = SampleTableMdt::V8Nonpartitioned.url_to_mor_avro();
+ let hudi_table = Table::new_blocking(base_url.path())?;
+
+ // Read MDT files partition records
+ let partition_pruner = PartitionPruner::empty();
+ let records =
+
hudi_table.read_metadata_table_files_partition_blocking(&partition_pruner)?;
+
+ // For non-partitioned tables, the fast path only fetches the
files record.
+ // __all_partitions__ is not fetched to avoid redundant HFile
lookup.
+ assert_eq!(
+ records.len(),
+ 1,
+ "Non-partitioned table fast path should only fetch files
record"
+ );
+
+ // The files record should be keyed by "" (empty string)
+ // not "." (which is the internal MDT representation)
+ assert!(
+ records.contains_key(""),
+ "Non-partitioned table should have files record with empty
string key"
+ );
+ assert!(
+ !records.contains_key("."),
+ "Non-partitioned table should NOT have files record with '.'
key after normalization"
+ );
+
+ // Verify the files record has actual file entries
+ let files_record = records.get("").unwrap();
+ assert!(
+ !files_record.files.is_empty(),
+ "Files record should contain file entries"
+ );
+
+ Ok(())
+ }
+
+ /// Test pruned MDT lookup for non-partitioned tables.
+ /// Verifies that the fast path directly fetches "." for
non-partitioned tables,
+ /// skipping the __all_partitions__ lookup entirely.
+ #[test]
+ fn test_v8_nonpartitioned_mdt_pruned_lookup() -> Result<()> {
+ use hudi_core::expr::filter::from_str_tuples;
+
+ let base_url = SampleTableMdt::V8Nonpartitioned.url_to_mor_avro();
+ let hudi_table = Table::new_blocking(base_url.path())?;
+ let partition_schema = hudi_table.get_partition_schema_blocking()?;
+
+ // Create a partition pruner that would match the non-partitioned
table's root partition.
+ // For non-partitioned tables, the partition path is "" (empty
string).
+ let filters = from_str_tuples([("", "=", "")])?;
+ let partition_pruner = PartitionPruner::new(
+ &filters,
+ &partition_schema,
+ hudi_table.hudi_configs.as_ref(),
+ )?;
+
+ // For non-partitioned tables, the fast path directly fetches "."
(converted to "")
+ // without going through __all_partitions__.
+ let records =
+
hudi_table.read_metadata_table_files_partition_blocking(&partition_pruner)?;
+
+ // Should have 1 record: just the files record for ""
+ // (fast path skips __all_partitions__ for non-partitioned tables)
+ assert_eq!(
+ records.len(),
+ 1,
+ "Non-partitioned table fast path should only fetch files
record"
+ );
+ assert!(
+ records.contains_key(""),
+ "Should contain files record for non-partitioned table"
+ );
+
+ // Verify the files record has actual file entries
+ let files_record = records.get("").unwrap();
+ assert!(
+ !files_record.files.is_empty(),
+ "Files record should contain file entries"
+ );
+
+ Ok(())
+ }
Review Comment:
The filter `[("", "=", "")]` is likely ineffective for a non-partitioned
table. For non-partitioned tables, the partition schema is empty (has no
fields), so when creating a PartitionPruner with this filter, the
`SchemableFilter::try_from` conversion in `PartitionPruner::new` (line 69 of
partition.rs) will fail because there's no field with an empty name in the
schema. This means the resulting pruner will have no filters and `is_empty()`
will return true, making this test case identical to the test at line 604.
The test comment claims it tests "pruned MDT lookup" but the pruner will
actually be empty, so it takes the same code path as the unpruned test.
Consider either:
1. Removing this test as redundant, or
2. Modifying it to test a different scenario that actually exercises pruned
lookup (though that may not be applicable for non-partitioned tables)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]