This is an automated email from the ASF dual-hosted git repository.

zihanli58 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 200757a1c Add error handling for timeaware finder to handle scenarios 
where fil… (#3537)
200757a1c is described below

commit 200757a1cb0ceed064bf4a7ddf5d22640e8b14a2
Author: William Lo <[email protected]>
AuthorDate: Thu Aug 18 15:20:03 2022 -0700

    Add error handling for timeaware finder to handle scenarios where fil… 
(#3537)
    
    * Add error handling for timeaware finder to handle scenarios where files 
do not exist or folders not matching date format
    
    * Check path exists before attempting ls
---
 .../data/management/copy/RecursiveCopyableDataset.java       |  2 +-
 .../management/copy/TimeAwareRecursiveCopyableDataset.java   | 12 ++++++++++--
 .../copy/TimeAwareRecursiveCopyableDatasetTest.java          |  5 +++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
index 2c15b9e7d..cffdb6e02 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
@@ -209,7 +209,7 @@ public class RecursiveCopyableDataset implements 
CopyableDataset, FileSystemData
       return FileListUtils
           .listFilesToCopyAtPath(fs, path, fileFilter, 
applyFilterToDirectories, includeEmptyDirectories);
     } catch (IOException e) {
-      log.info(String.format("Could not find any files on target path due to 
%s. Returning an empty list of files.", e.getClass().getCanonicalName()));
+      log.warn(String.format("Could not find any files on fs %s path %s due to 
the following exception. Returning an empty list of files.", fs.getUri(), 
path), e);
       return Lists.newArrayList();
     }
   }
diff --git 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
index 47cbe0ec6..5e6291d23 100644
--- 
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
+++ 
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
@@ -137,8 +137,16 @@ public class TimeAwareRecursiveCopyableDataset extends 
RecursiveCopyableDataset
   private List<FileStatus> recursivelyGetFilesAtDatePath(FileSystem fs, Path 
path, String traversedDatePath, PathFilter fileFilter,
       int level,  LocalDateTime startDate, LocalDateTime endDate, 
DateTimeFormatter formatter) throws IOException {
     List<FileStatus> fileStatuses = Lists.newArrayList();
-    Iterator<FileStatus> folderIterator = 
Arrays.asList(fs.listStatus(path)).iterator();
-
+    if (!fs.exists(path)) {
+      return fileStatuses;
+    }
+    Iterator<FileStatus> folderIterator;
+    try {
+      folderIterator = Arrays.asList(fs.listStatus(path)).iterator();
+    } catch (IOException e) {
+      log.warn(String.format("Error while listing paths at %s due to ", path), 
e);
+      return fileStatuses;
+    }
     // Check if at the lowest level/granularity of the date folder
     if (this.datePattern.split(FileSystems.getDefault().getSeparator()).length 
== level) {
       // Truncate the start date to the most granular unit of time in the 
datepattern
diff --git 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
index dc439aaa3..4f44a9909 100644
--- 
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
+++ 
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
@@ -17,6 +17,7 @@
 
 package org.apache.gobblin.data.management.copy;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.List;
@@ -173,6 +174,10 @@ public class TimeAwareRecursiveCopyableDatasetTest {
         candidateFiles.add(filePath.toString());
       }
     }
+    // Edge case: test that files that do not match dateformat but within the 
folders searched by the timeaware finder is ignored
+    File f = new File(baseDir2.toString() + "/metadata.test");
+
+    f.createNewFile();
 
     properties = new Properties();
     
properties.setProperty(TimeAwareRecursiveCopyableDataset.LOOKBACK_TIME_KEY, 
NUM_LOOKBACK_DAYS_STR);

Reply via email to