This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 57f17e0 refactor: move .crc filtering logic from table to storage
layer (#458)
57f17e0 is described below
commit 57f17e06bbdaa985f54a59b149873744292fd351
Author: Yunchi Pang <[email protected]>
AuthorDate: Fri Oct 3 18:20:11 2025 -0700
refactor: move .crc filtering logic from table to storage layer (#458)
Moves .crc file filtering logic from table layer to storage layer to make
sure all storage consumers automatically exclude .crc files.
---
crates/core/src/storage/mod.rs | 19 +++++++++++++++++++
crates/core/src/table/listing.rs | 2 +-
crates/core/tests/data/timeline/commits_stub/test.crc | 0
3 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs
index ce7695b..deb5963 100644
--- a/crates/core/src/storage/mod.rs
+++ b/crates/core/src/storage/mod.rs
@@ -250,6 +250,11 @@ impl Storage {
let name = location
.filename()
.ok_or_else(|| InvalidPath(format!("Failed to get file name
from {location:?}")))?;
+
+ if name.ends_with(".crc") {
+ continue;
+ }
+
file_metadata.push(FileMetadata::new(name.to_string(),
obj_meta.size));
}
Ok(file_metadata)
@@ -402,6 +407,20 @@ mod tests {
assert_eq!(file_info_3, vec![FileMetadata::new("c.parquet", 0)],);
}
+ #[tokio::test]
+ async fn storage_list_files_excludes_crc_files() {
+ let base_url = Url::from_directory_path(
+
canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(),
+ )
+ .unwrap();
+ let storage = Storage::new_with_base_url(base_url).unwrap();
+
+ let files = storage.list_files(None).await.unwrap();
+
+ assert!(!files.iter().any(|f| f.name.ends_with(".crc")));
+ assert_eq!(files, vec![FileMetadata::new("a.parquet", 0)]);
+ }
+
#[tokio::test]
async fn use_storage_to_get_leaf_dirs() {
let base_url = Url::from_directory_path(
diff --git a/crates/core/src/table/listing.rs b/crates/core/src/table/listing.rs
index dad1c58..0344e10 100644
--- a/crates/core/src/table/listing.rs
+++ b/crates/core/src/table/listing.rs
@@ -57,7 +57,7 @@ impl FileLister {
}
fn should_exclude_for_listing(file_name: &str) -> bool {
- file_name.starts_with(PARTITION_METAFIELD_PREFIX) ||
file_name.ends_with(".crc")
+ file_name.starts_with(PARTITION_METAFIELD_PREFIX)
}
async fn list_file_groups_for_partition(&self, partition_path: &str) ->
Result<Vec<FileGroup>> {
diff --git a/crates/core/tests/data/timeline/commits_stub/test.crc
b/crates/core/tests/data/timeline/commits_stub/test.crc
new file mode 100644
index 0000000..e69de29