This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 655afdb49f [HUDI-5082] Improve the cdc log file name format (#7042)
655afdb49f is described below
commit 655afdb49f2e9f0af8a947f341a9ba8e6071671c
Author: Yann Byron <[email protected]>
AuthorDate: Tue Nov 8 16:28:55 2022 +0800
[HUDI-5082] Improve the cdc log file name format (#7042)
---
hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java | 2 +-
.../main/java/org/apache/hudi/common/table/cdc/HoodieCDCUtils.java | 2 +-
.../scala/org/apache/hudi/functional/cdc/HoodieCDCTestBase.scala | 5 +++--
3 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index e73fbee3df..21b21256fa 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -80,7 +80,7 @@ public class FSUtils {
// Log files are of this pattern -
.b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1_1-0-1
// Archive log files are of this pattern - .commits_.archive.1_1-0-1
private static final Pattern LOG_FILE_PATTERN =
-
Pattern.compile("\\.(.+)_(.*)\\.(.+)\\.(\\d+)(_((\\d+)-(\\d+)-(\\d+))(-cdc)?)?");
+
Pattern.compile("\\.(.+)_(.*)\\.(.+)\\.(\\d+)(_((\\d+)-(\\d+)-(\\d+))(.cdc)?)?");
private static final String LOG_FILE_PREFIX = ".";
private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
private static final long MIN_CLEAN_TO_KEEP = 10;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/cdc/HoodieCDCUtils.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/cdc/HoodieCDCUtils.java
index 042e95cfd6..6ca5869fdf 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/cdc/HoodieCDCUtils.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/cdc/HoodieCDCUtils.java
@@ -34,7 +34,7 @@ import java.util.List;
*/
public class HoodieCDCUtils {
- public static final String CDC_LOGFILE_SUFFIX = "-cdc";
+ public static final String CDC_LOGFILE_SUFFIX = ".cdc";
/* the `op` column represents how a record is changed. */
public static final String CDC_OPERATION_TYPE = "op";
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/cdc/HoodieCDCTestBase.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/cdc/HoodieCDCTestBase.scala
index af0935b423..08d0e5b337 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/cdc/HoodieCDCTestBase.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/cdc/HoodieCDCTestBase.scala
@@ -21,7 +21,7 @@ import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.common.config.HoodieMetadataConfig
import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieKey,
HoodieLogFile, HoodieRecord}
-import org.apache.hudi.common.table.cdc.HoodieCDCOperation
+import org.apache.hudi.common.table.cdc.{HoodieCDCOperation, HoodieCDCUtils}
import org.apache.hudi.common.table.HoodieTableConfig
import org.apache.hudi.common.table.log.HoodieLogFormat
import org.apache.hudi.common.table.log.block.HoodieDataBlock
@@ -101,7 +101,8 @@ abstract class HoodieCDCTestBase extends
HoodieClientTestBase {
val hoodieWriteStats = commitMetadata.getWriteStats.asScala
hoodieWriteStats.exists { hoodieWriteStat =>
val cdcPaths = hoodieWriteStat.getCdcStats
- cdcPaths != null && cdcPaths.nonEmpty
+ cdcPaths != null && cdcPaths.nonEmpty &&
+
cdcPaths.keySet().asScala.forall(_.endsWith(HoodieCDCUtils.CDC_LOGFILE_SUFFIX))
}
}