This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new fd5e67df9f Automatically cleanup empty dirs in LocalFileSystem (#5978)
fd5e67df9f is described below
commit fd5e67df9fde55feafeeec891d93c1d62f3b75fb
Author: Faiaz Sanaulla <[email protected]>
AuthorDate: Sat Jul 6 14:06:20 2024 +0200
Automatically cleanup empty dirs in LocalFileSystem (#5978)
* automatically cleanup empty dirs
* automatic cleanup toggle
* configurable cleanup
* test for automatic dir deletion
* clippy
* more comments
---
object_store/src/local.rs | 75 +++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 69 insertions(+), 6 deletions(-)
diff --git a/object_store/src/local.rs b/object_store/src/local.rs
index d3bfab8ede..4847389299 100644
--- a/object_store/src/local.rs
+++ b/object_store/src/local.rs
@@ -240,6 +240,8 @@ impl From<Error> for super::Error {
#[derive(Debug)]
pub struct LocalFileSystem {
config: Arc<Config>,
+ // if you want to delete empty directories when deleting files
+ automatic_cleanup: bool,
}
#[derive(Debug)]
@@ -266,6 +268,7 @@ impl LocalFileSystem {
config: Arc::new(Config {
root: Url::parse("file:///").unwrap(),
}),
+ automatic_cleanup: false,
}
}
@@ -282,6 +285,7 @@ impl LocalFileSystem {
config: Arc::new(Config {
root: absolute_path_to_url(path)?,
}),
+ automatic_cleanup: false,
})
}
@@ -295,6 +299,12 @@ impl LocalFileSystem {
);
self.config.prefix_to_filesystem(location)
}
+
+ /// Enable automatic cleanup of empty directories when deleting files
+ pub fn with_automatic_cleanup(mut self, automatic_cleanup: bool) -> Self {
+ self.automatic_cleanup = automatic_cleanup;
+ self
+ }
}
impl Config {
@@ -465,13 +475,36 @@ impl ObjectStore for LocalFileSystem {
}
async fn delete(&self, location: &Path) -> Result<()> {
+ let config = Arc::clone(&self.config);
let path = self.path_to_filesystem(location)?;
- maybe_spawn_blocking(move || match std::fs::remove_file(&path) {
- Ok(_) => Ok(()),
- Err(e) => Err(match e.kind() {
- ErrorKind::NotFound => Error::NotFound { path, source: e
}.into(),
- _ => Error::UnableToDeleteFile { path, source: e }.into(),
- }),
+ let automactic_cleanup = self.automatic_cleanup;
+ maybe_spawn_blocking(move || {
+ if let Err(e) = std::fs::remove_file(&path) {
+ Err(match e.kind() {
+ ErrorKind::NotFound => Error::NotFound { path, source: e
}.into(),
+ _ => Error::UnableToDeleteFile { path, source: e }.into(),
+ })
+ } else if automactic_cleanup {
+ let root = &config.root;
+ let root = root
+ .to_file_path()
+ .map_err(|_| Error::InvalidUrl { url: root.clone() })?;
+
+ // here we will try to traverse up and delete an empty dir if
possible until we reach the root or get an error
+ let mut parent = path.parent();
+
+ while let Some(loc) = parent {
+ if loc != root && std::fs::remove_dir(loc).is_ok() {
+ parent = loc.parent();
+ } else {
+ break;
+ }
+ }
+
+ Ok(())
+ } else {
+ Ok(())
+ }
})
.await
}
@@ -1010,6 +1043,8 @@ fn convert_walkdir_result(
#[cfg(test)]
mod tests {
+ use std::fs;
+
use futures::TryStreamExt;
use tempfile::{NamedTempFile, TempDir};
@@ -1445,6 +1480,34 @@ mod tests {
list.sort_unstable();
assert_eq!(list, vec![c, a]);
}
+
+ #[tokio::test]
+ async fn delete_dirs_automatically() {
+ let root = TempDir::new().unwrap();
+ let integration = LocalFileSystem::new_with_prefix(root.path())
+ .unwrap()
+ .with_automatic_cleanup(true);
+ let location = Path::from("nested/file/test_file");
+ let data = Bytes::from("arbitrary data");
+
+ integration
+ .put(&location, data.clone().into())
+ .await
+ .unwrap();
+
+ let read_data = integration
+ .get(&location)
+ .await
+ .unwrap()
+ .bytes()
+ .await
+ .unwrap();
+
+ assert_eq!(&*read_data, data);
+ assert!(fs::read_dir(root.path()).unwrap().count() > 0);
+ integration.delete(&location).await.unwrap();
+ assert!(fs::read_dir(root.path()).unwrap().count() == 0);
+ }
}
#[cfg(not(target_arch = "wasm32"))]