realno commented on a change in pull request #1783:
URL: https://github.com/apache/arrow-datafusion/pull/1783#discussion_r802319179
##########
File path: ballista/rust/executor/src/main.rs
##########
@@ -148,3 +167,108 @@ async fn main() -> Result<()> {
Ok(())
}
+
+/// This function will scheduled periodically for cleanup executor.
+/// Will only clean the dir under work_dir not include file
+async fn clean_shuffle_data_loop(work_dir: &str, seconds: i64) -> Result<()> {
+ let mut dir = fs::read_dir(work_dir).await?;
+ let mut to_deleted = Vec::new();
+ let mut need_delete_dir;
+ while let Some(child) = dir.next_entry().await.unwrap() {
+ if let Ok(metadata) = child.metadata().await {
+ if metadata.is_dir() {
+ let dir = fs::read_dir(child.path()).await?;
+ match check_modified_time_in_dirs(vec![dir], seconds).await {
+ Ok(x) => match x {
+ true => {
+ need_delete_dir = child.path().into_os_string();
+ to_deleted.push(need_delete_dir)
+ }
+ false => {}
+ },
+ Err(e) => {
+ error!("Fail in clean_shuffle_data_loop {:?}", e)
+ }
+ }
+ }
+ } else {
+ error!("can not get meta from file{:?}", child)
+ }
+ }
+ info!(
+ "Executor work_dir {:?} not modified in {:?} seconds will be deleted ",
+ &to_deleted, seconds
+ );
+ for del in to_deleted {
+ fs::remove_dir_all(del).await?;
+ }
+ Ok(())
+}
+
+/// Determines if a directory all files are older than cutoff seconds.
+async fn check_modified_time_in_dirs(
+ mut vec: Vec<ReadDir>,
+ seconds: i64,
+) -> Result<bool> {
+ let cutoff = Utc::now() - Duration::seconds(seconds);
+
+ while !vec.is_empty() {
+ let mut dir = vec.pop().unwrap();
+ while let Some(child) = dir.next_entry().await? {
+ let meta = child.metadata().await?;
+ match meta.is_dir() {
+ true => {
+ let dir = fs::read_dir(child.path()).await?;
+ vec.push(dir);
Review comment:
So if I understand correctly, after the dir is pushed assuming all files
within all passed TTL, then it returns to caller to push to `to_deleted`, then
it'll loop through all folders then remove everything. Before the modified time
check and finally things are deleted, I think it is possible new files are
written to the folders in `to_deleted`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]