This is an automated email from the ASF dual-hosted git repository.
zihanli58 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 200757a1c Add error handling for timeaware finder to handle scenarios
where fil… (#3537)
200757a1c is described below
commit 200757a1cb0ceed064bf4a7ddf5d22640e8b14a2
Author: William Lo <[email protected]>
AuthorDate: Thu Aug 18 15:20:03 2022 -0700
Add error handling for timeaware finder to handle scenarios where fil…
(#3537)
* Add error handling for timeaware finder to handle scenarios where files
do not exist or folders not matching date format
* Check path exists before attempting ls
---
.../data/management/copy/RecursiveCopyableDataset.java | 2 +-
.../management/copy/TimeAwareRecursiveCopyableDataset.java | 12 ++++++++++--
.../copy/TimeAwareRecursiveCopyableDatasetTest.java | 5 +++++
3 files changed, 16 insertions(+), 3 deletions(-)
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
index 2c15b9e7d..cffdb6e02 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/RecursiveCopyableDataset.java
@@ -209,7 +209,7 @@ public class RecursiveCopyableDataset implements
CopyableDataset, FileSystemData
return FileListUtils
.listFilesToCopyAtPath(fs, path, fileFilter,
applyFilterToDirectories, includeEmptyDirectories);
} catch (IOException e) {
- log.info(String.format("Could not find any files on target path due to
%s. Returning an empty list of files.", e.getClass().getCanonicalName()));
+ log.warn(String.format("Could not find any files on fs %s path %s due to
the following exception. Returning an empty list of files.", fs.getUri(),
path), e);
return Lists.newArrayList();
}
}
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
index 47cbe0ec6..5e6291d23 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
@@ -137,8 +137,16 @@ public class TimeAwareRecursiveCopyableDataset extends
RecursiveCopyableDataset
private List<FileStatus> recursivelyGetFilesAtDatePath(FileSystem fs, Path
path, String traversedDatePath, PathFilter fileFilter,
int level, LocalDateTime startDate, LocalDateTime endDate,
DateTimeFormatter formatter) throws IOException {
List<FileStatus> fileStatuses = Lists.newArrayList();
- Iterator<FileStatus> folderIterator =
Arrays.asList(fs.listStatus(path)).iterator();
-
+ if (!fs.exists(path)) {
+ return fileStatuses;
+ }
+ Iterator<FileStatus> folderIterator;
+ try {
+ folderIterator = Arrays.asList(fs.listStatus(path)).iterator();
+ } catch (IOException e) {
+ log.warn(String.format("Error while listing paths at %s due to ", path),
e);
+ return fileStatuses;
+ }
// Check if at the lowest level/granularity of the date folder
if (this.datePattern.split(FileSystems.getDefault().getSeparator()).length
== level) {
// Truncate the start date to the most granular unit of time in the
datepattern
diff --git
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
index dc439aaa3..4f44a9909 100644
---
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
+++
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
@@ -17,6 +17,7 @@
package org.apache.gobblin.data.management.copy;
+import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
@@ -173,6 +174,10 @@ public class TimeAwareRecursiveCopyableDatasetTest {
candidateFiles.add(filePath.toString());
}
}
+ // Edge case: test that files that do not match dateformat but within the
folders searched by the timeaware finder is ignored
+ File f = new File(baseDir2.toString() + "/metadata.test");
+
+ f.createNewFile();
properties = new Properties();
properties.setProperty(TimeAwareRecursiveCopyableDataset.LOOKBACK_TIME_KEY,
NUM_LOOKBACK_DAYS_STR);