This is an automated email from the ASF dual-hosted git repository.
jinsongzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/amoro.git
The following commit(s) were added to refs/heads/master by this push:
new c12c240e6 [AMORO-3228] Remove the format suffix in judgment condition
(#3338)
c12c240e6 is described below
commit c12c240e6e5b508962d45fd36675853c4df5062c
Author: Qishang Zhong <[email protected]>
AuthorDate: Fri Nov 22 17:25:37 2024 +0800
[AMORO-3228] Remove the format suffix in judgment condition (#3338)
* [AMORO-3228] Fix the problem of file name with suffix in judgment
condition
* Extract method
* Fix comment
---
.../io/writer/IcebergFanoutPosDeleteWriter.java | 6 +----
.../java/org/apache/amoro/utils/TableFileUtil.java | 26 +++++++++++++++++++---
.../amoro/io/TestIcebergFanoutPosDeleteWriter.java | 4 ++++
3 files changed, 28 insertions(+), 8 deletions(-)
diff --git
a/amoro-format-iceberg/src/main/java/org/apache/amoro/io/writer/IcebergFanoutPosDeleteWriter.java
b/amoro-format-iceberg/src/main/java/org/apache/amoro/io/writer/IcebergFanoutPosDeleteWriter.java
index ea3b7ef89..b6f33ac43 100644
---
a/amoro-format-iceberg/src/main/java/org/apache/amoro/io/writer/IcebergFanoutPosDeleteWriter.java
+++
b/amoro-format-iceberg/src/main/java/org/apache/amoro/io/writer/IcebergFanoutPosDeleteWriter.java
@@ -149,11 +149,7 @@ public class IcebergFanoutPosDeleteWriter<T>
return;
}
posDeletes.sort(Comparator.comparingLong(PosRow::pos));
- String fileName =
TableFileUtil.getFileName(filePath.get().toString());
- FileFormat fileFormat = FileFormat.fromFileName(fileName);
- if (fileFormat != null) {
- fileName = fileName.substring(0, fileName.length() -
fileFormat.name().length() - 1);
- }
+ String fileName =
TableFileUtil.getFileNameWithoutExt(filePath.get().toString());
String fileDir = TableFileUtil.getFileDir(filePath.get().toString());
String deleteFilePath =
format.addExtension(
diff --git
a/amoro-format-iceberg/src/main/java/org/apache/amoro/utils/TableFileUtil.java
b/amoro-format-iceberg/src/main/java/org/apache/amoro/utils/TableFileUtil.java
index e140c79eb..ea972067b 100644
---
a/amoro-format-iceberg/src/main/java/org/apache/amoro/utils/TableFileUtil.java
+++
b/amoro-format-iceberg/src/main/java/org/apache/amoro/utils/TableFileUtil.java
@@ -20,6 +20,7 @@ package org.apache.amoro.utils;
import org.apache.amoro.io.AuthenticatedFileIO;
import org.apache.hadoop.fs.Path;
+import org.apache.iceberg.FileFormat;
import org.apache.iceberg.io.BulkDeletionFailureException;
import org.apache.iceberg.util.Tasks;
import org.slf4j.Logger;
@@ -36,16 +37,33 @@ public class TableFileUtil {
private static final String POS_DELETE_FILE_IDENTIFIER = "delete";
/**
- * Parse file name form file path
+ * Parse file name from file path.
*
* @param filePath file path
- * @return file name parsed from file path
+ * @return file name parsed from file path, e.g. data-1.parquet.
*/
public static String getFileName(String filePath) {
int lastSlash = filePath.lastIndexOf('/');
return filePath.substring(lastSlash + 1);
}
+ /**
+ * Parse file name without ext from file path.
+ *
+ * @param filePath file path
+ * @return file name without ext parsed from file path, e.g. data-1.
+ */
+ public static String getFileNameWithoutExt(String filePath) {
+ String fileName = getFileName(filePath);
+
+ FileFormat fileFormat = FileFormat.fromFileName(fileName);
+ if (fileFormat != null) {
+ return fileName.substring(0, fileName.length() -
fileFormat.name().length() - 1);
+ }
+
+ return fileName;
+ }
+
/**
* Parse file directory path from file path
*
@@ -200,6 +218,8 @@ public class TableFileUtil {
public static boolean isOptimizingPosDeleteFile(String dataFilePath, String
posDeleteFilePath) {
return getFileName(posDeleteFilePath)
- .startsWith(String.format("%s-%s", getFileName(dataFilePath),
POS_DELETE_FILE_IDENTIFIER));
+ .startsWith(
+ String.format(
+ "%s-%s", getFileNameWithoutExt(dataFilePath),
POS_DELETE_FILE_IDENTIFIER));
}
}
diff --git
a/amoro-format-iceberg/src/test/java/org/apache/amoro/io/TestIcebergFanoutPosDeleteWriter.java
b/amoro-format-iceberg/src/test/java/org/apache/amoro/io/TestIcebergFanoutPosDeleteWriter.java
index ff4185ab0..b8667ce22 100644
---
a/amoro-format-iceberg/src/test/java/org/apache/amoro/io/TestIcebergFanoutPosDeleteWriter.java
+++
b/amoro-format-iceberg/src/test/java/org/apache/amoro/io/TestIcebergFanoutPosDeleteWriter.java
@@ -127,6 +127,8 @@ public class TestIcebergFanoutPosDeleteWriter extends
TableTestBase {
dataDir,
fileFormat.addExtension("data-1-delete-suffix")))
.toString());
Assert.assertNotNull(deleteFile1);
+ Assert.assertTrue(
+ TableFileUtil.isOptimizingPosDeleteFile(dataFile1Path,
deleteFile1.path().toString()));
Assert.assertEquals(3, deleteFile1.recordCount());
// Check whether the path-pos pairs are sorted as expected.
Schema pathPosSchema = DeleteSchemaUtil.pathPosSchema();
@@ -147,6 +149,8 @@ public class TestIcebergFanoutPosDeleteWriter extends
TableTestBase {
dataDir,
fileFormat.addExtension("data-2-delete-suffix")))
.toString());
Assert.assertNotNull(deleteFile2);
+ Assert.assertTrue(
+ TableFileUtil.isOptimizingPosDeleteFile(dataFile2Path,
deleteFile2.path().toString()));
Assert.assertEquals(
new Path(
TableFileUtil.getNewFilePath(