This is an automated email from the ASF dual-hosted git repository.
zihanli58 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 9ce59628f [GOBBLIN-1677] Fix timezone property to read from key
correctly (#3535)
9ce59628f is described below
commit 9ce59628fa9940a6d5d38c4b471d77f1f151dff2
Author: William Lo <[email protected]>
AuthorDate: Mon Aug 8 16:29:14 2022 -0700
[GOBBLIN-1677] Fix timezone property to read from key correctly (#3535)
* Fix timezone property to read from key correctly
* Code cleanup and additional test around invalid timezones
---
.../copy/TimeAwareRecursiveCopyableDataset.java | 9 ++--
.../TimeAwareRecursiveCopyableDatasetTest.java | 50 ++++++++++++++++++++++
2 files changed, 54 insertions(+), 5 deletions(-)
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
index e0cdd9899..47cbe0ec6 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDataset.java
@@ -52,6 +52,7 @@ public class TimeAwareRecursiveCopyableDataset extends
RecursiveCopyableDataset
private final String lookbackTime;
private final String datePattern;
private final Period lookbackPeriod;
+ private final DateTimeZone dateTimeZone;
private final LocalDateTime currentTime;
public TimeAwareRecursiveCopyableDataset(FileSystem fs, Path rootPath,
Properties properties, Path glob) {
@@ -66,11 +67,9 @@ public class TimeAwareRecursiveCopyableDataset extends
RecursiveCopyableDataset
.toFormatter();
this.lookbackPeriod = periodFormatter.parsePeriod(lookbackTime);
this.datePattern = properties.getProperty(DATE_PATTERN_KEY);
-
- this.currentTime = properties.containsKey(DATE_PATTERN_TIMEZONE_KEY) ?
LocalDateTime.now(
- DateTimeZone.forID(DATE_PATTERN_TIMEZONE_KEY))
- : LocalDateTime.now(DateTimeZone.forID(DEFAULT_DATE_PATTERN_TIMEZONE));
-
+ this.dateTimeZone = DateTimeZone.forID(properties
+ .getProperty(DATE_PATTERN_TIMEZONE_KEY,
DEFAULT_DATE_PATTERN_TIMEZONE));
+ this.currentTime = LocalDateTime.now(this.dateTimeZone);
this.validateLookbackWithDatePatternFormat(this.datePattern,
this.lookbackTime);
}
diff --git
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
index 521caaafe..dc439aaa3 100644
---
a/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
+++
b/gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/TimeAwareRecursiveCopyableDatasetTest.java
@@ -260,6 +260,44 @@ public class TimeAwareRecursiveCopyableDatasetTest {
}
}
+ @Test
+ public void testTimezoneProperty() throws IOException {
+ // Test in UTC instead of default time
+ String datePattern = "yyyy/MM/dd/HH";
+ DateTimeFormatter formatter = DateTimeFormat.forPattern(datePattern);
+ // Ensure that the files are created in UTC time
+ LocalDateTime endDate = LocalDateTime.now(DateTimeZone.forID("UTC"));
+
+ Set<String> candidateFiles = new HashSet<>();
+ for (int i = 0; i < MAX_NUM_HOURLY_DIRS; i++) {
+ String startDate = endDate.minusHours(i).toString(formatter);
+ Path subDirPath = new Path(baseDir1, new Path(startDate));
+ fs.mkdirs(subDirPath);
+ Path filePath = new Path(subDirPath, i + ".avro");
+ fs.create(filePath);
+ if (i < (NUM_LOOKBACK_HOURS + 1)) {
+ candidateFiles.add(filePath.toString());
+ }
+ }
+
+ //Lookback time = "4h"
+ Properties properties = new Properties();
+
properties.setProperty(TimeAwareRecursiveCopyableDataset.LOOKBACK_TIME_KEY,
NUM_LOOKBACK_HOURS_STR);
+ properties.setProperty(TimeAwareRecursiveCopyableDataset.DATE_PATTERN_KEY,
"yyyy/MM/dd/HH");
+
properties.setProperty(TimeAwareRecursiveCopyableDataset.DATE_PATTERN_TIMEZONE_KEY,
"UTC");
+
+ PathFilter pathFilter = new HiddenFilter();
+ TimeAwareRecursiveCopyableDataset dataset = new
TimeAwareRecursiveCopyableDataset(fs, baseDir1, properties,
+ new Path("/tmp/src/*/hourly"));
+ List<FileStatus> fileStatusList = dataset.getFilesAtPath(fs, baseDir1,
pathFilter);
+
+ Assert.assertEquals(fileStatusList.size(), NUM_LOOKBACK_HOURS + 1);
+
+ for (FileStatus fileStatus: fileStatusList) {
+
Assert.assertTrue(candidateFiles.contains(PathUtils.getPathWithoutSchemeAndAuthority(fileStatus.getPath()).toString()));
+ }
+ }
+
@Test (expectedExceptions = IllegalArgumentException.class)
public void testInstantiationError() {
//Daily directories, but look back time has days and hours. We should
expect an assertion error.
@@ -280,6 +318,18 @@ public class TimeAwareRecursiveCopyableDatasetTest {
}
+ @Test (expectedExceptions = IllegalArgumentException.class)
+ public void testIllegalTimezoneProperty() throws IOException {
+ //Lookback time = "4h"
+ Properties properties = new Properties();
+
properties.setProperty(TimeAwareRecursiveCopyableDataset.LOOKBACK_TIME_KEY,
NUM_LOOKBACK_HOURS_STR);
+ properties.setProperty(TimeAwareRecursiveCopyableDataset.DATE_PATTERN_KEY,
"yyyy/MM/dd/HH");
+
properties.setProperty(TimeAwareRecursiveCopyableDataset.DATE_PATTERN_TIMEZONE_KEY,
"InvalidTimeZone");
+
+ TimeAwareRecursiveCopyableDataset dataset = new
TimeAwareRecursiveCopyableDataset(fs, baseDir3, properties,
+ new Path("/tmp/src/ds2/daily"));
+ }
+
@AfterClass
public void clean() throws IOException {
//Delete tmp directories