This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 57f17e0  refactor: move .crc filtering logic from table to storage 
layer (#458)
57f17e0 is described below

commit 57f17e06bbdaa985f54a59b149873744292fd351
Author: Yunchi Pang <[email protected]>
AuthorDate: Fri Oct 3 18:20:11 2025 -0700

    refactor: move .crc filtering logic from table to storage layer (#458)
    
    Moves .crc file filtering logic from table layer to storage layer to make 
sure all storage consumers automatically exclude .crc files.
---
 crates/core/src/storage/mod.rs                        | 19 +++++++++++++++++++
 crates/core/src/table/listing.rs                      |  2 +-
 crates/core/tests/data/timeline/commits_stub/test.crc |  0
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs
index ce7695b..deb5963 100644
--- a/crates/core/src/storage/mod.rs
+++ b/crates/core/src/storage/mod.rs
@@ -250,6 +250,11 @@ impl Storage {
             let name = location
                 .filename()
                 .ok_or_else(|| InvalidPath(format!("Failed to get file name 
from {location:?}")))?;
+
+            if name.ends_with(".crc") {
+                continue;
+            }
+
             file_metadata.push(FileMetadata::new(name.to_string(), 
obj_meta.size));
         }
         Ok(file_metadata)
@@ -402,6 +407,20 @@ mod tests {
         assert_eq!(file_info_3, vec![FileMetadata::new("c.parquet", 0)],);
     }
 
+    #[tokio::test]
+    async fn storage_list_files_excludes_crc_files() {
+        let base_url = Url::from_directory_path(
+            
canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(),
+        )
+        .unwrap();
+        let storage = Storage::new_with_base_url(base_url).unwrap();
+
+        let files = storage.list_files(None).await.unwrap();
+
+        assert!(!files.iter().any(|f| f.name.ends_with(".crc")));
+        assert_eq!(files, vec![FileMetadata::new("a.parquet", 0)]);
+    }
+
     #[tokio::test]
     async fn use_storage_to_get_leaf_dirs() {
         let base_url = Url::from_directory_path(
diff --git a/crates/core/src/table/listing.rs b/crates/core/src/table/listing.rs
index dad1c58..0344e10 100644
--- a/crates/core/src/table/listing.rs
+++ b/crates/core/src/table/listing.rs
@@ -57,7 +57,7 @@ impl FileLister {
     }
 
     fn should_exclude_for_listing(file_name: &str) -> bool {
-        file_name.starts_with(PARTITION_METAFIELD_PREFIX) || 
file_name.ends_with(".crc")
+        file_name.starts_with(PARTITION_METAFIELD_PREFIX)
     }
 
     async fn list_file_groups_for_partition(&self, partition_path: &str) -> 
Result<Vec<FileGroup>> {
diff --git a/crates/core/tests/data/timeline/commits_stub/test.crc 
b/crates/core/tests/data/timeline/commits_stub/test.crc
new file mode 100644
index 0000000..e69de29

Reply via email to