This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 86be8554820 [HUDI-5336] Fixing log file pattern match to ignore
extraneous files (#7612)
86be8554820 is described below
commit 86be85548202f4726ed1ab702c3c439c3fbea4d2
Author: Sivabalan Narayanan <[email protected]>
AuthorDate: Thu Jan 19 17:41:54 2023 -0800
[HUDI-5336] Fixing log file pattern match to ignore extraneous files (#7612)
Co-authored-by: Y Ethan Guo <[email protected]>
---
hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java | 2 +-
.../java/org/apache/hudi/common/functional/TestHoodieLogFormat.java | 2 +-
.../hudi/common/functional/TestHoodieLogFormatAppendFailure.java | 4 ++--
.../apache/hudi/common/table/view/TestHoodieTableFileSystemView.java | 4 +++-
4 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index 53e7fcc2721..bb9a8d7159d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -80,7 +80,7 @@ public class FSUtils {
// Log files are of this pattern -
.b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1_1-0-1
// Archive log files are of this pattern - .commits_.archive.1_1-0-1
public static final Pattern LOG_FILE_PATTERN =
-
Pattern.compile("\\.(.+)_(.*)\\.(.+)\\.(\\d+)(_((\\d+)-(\\d+)-(\\d+))(.cdc)?)?");
+
Pattern.compile("^\\.(.+)_(.*)\\.(log|archive)\\.(\\d+)(_((\\d+)-(\\d+)-(\\d+))(.cdc)?)?");
private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
private static final long MIN_CLEAN_TO_KEEP = 10;
private static final long MIN_ROLLBACK_TO_KEEP = 10;
diff --git
a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
index 66b1c25cef7..4c12c5e8aa5 100755
---
a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
+++
b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
@@ -345,7 +345,7 @@ public class TestHoodieLogFormat extends
HoodieCommonTestHarness {
for (int i = 0; i < 2; i++) {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
-
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive").overBaseCommit("")
+
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits").overBaseCommit("")
.withFs(localFs).build();
writer.appendBlock(dataBlock);
writer.close();
diff --git
a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
index d28dfe5e01e..309a3b04858 100644
---
a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
+++
b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
@@ -103,7 +103,7 @@ public class TestHoodieLogFormatAppendFailure {
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header,
HoodieRecord.RECORD_KEY_METADATA_FIELD);
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
-
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive")
+
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits")
.overBaseCommit("").withFs(fs).build();
writer.appendBlock(dataBlock);
@@ -134,7 +134,7 @@ public class TestHoodieLogFormatAppendFailure {
// Opening a new Writer right now will throw IOException. The code should
handle this, rollover the logfile and
// return a new writer with a bumped up logVersion
writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
-
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive")
+
.withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits")
.overBaseCommit("").withFs(fs).build();
header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
diff --git
a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
index 9e14611f80f..877b8fea3bf 100644
---
a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
+++
b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
@@ -306,12 +306,14 @@ public class TestHoodieTableFileSystemView extends
HoodieCommonTestHarness {
String fileName2 =
FSUtils.makeLogFileName(fileId, HoodieLogFile.DELTA_EXTENSION,
instantTime1, 1, TEST_WRITE_TOKEN);
// create a dummy log file mimicing cloud stores marker files
- String fileName3 = "_DUMMY_" + fileName1.substring(1, fileName1.length());
+ String fileName3 = "_GCS_SYNCABLE_TEMPFILE_" + fileName1;
+ String fileName4 = "_DUMMY_" + fileName1.substring(1, fileName1.length());
// this file should not be deduced as a log file.
Paths.get(basePath, partitionPath, fileName1).toFile().createNewFile();
Paths.get(basePath, partitionPath, fileName2).toFile().createNewFile();
Paths.get(basePath, partitionPath, fileName3).toFile().createNewFile();
+ Paths.get(basePath, partitionPath, fileName4).toFile().createNewFile();
HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
HoodieInstant instant1 = new HoodieInstant(true,
HoodieTimeline.COMMIT_ACTION, instantTime1);