karuppayya commented on code in PR #4652:
URL: https://github.com/apache/iceberg/pull/4652#discussion_r927885166
##########
spark/v3.2/spark/src/main/java/org/apache/iceberg/spark/actions/DeleteOrphanFilesSparkAction.java:
##########
@@ -361,4 +471,53 @@ static PathFilter forSpecs(Map<Integer, PartitionSpec>
specs) {
return partitionNames.isEmpty() ? HiddenPathFilter.get() : new
PartitionAwareHiddenPathFilter(partitionNames);
}
}
+
+ public static class FileMetadata implements Serializable {
+ private String scheme;
+ private String authority;
+ private String path;
+ private String filePath;
+
+ public FileMetadata(String scheme, String authority, String path, String
filePathAsString) {
+ this.scheme = scheme;
+ this.authority = authority;
+ this.path = path;
+ this.filePath = filePathAsString;
+ }
+
+ public FileMetadata() {
+ }
+
+ public void setScheme(String scheme) {
+ this.scheme = scheme;
+ }
+
+ public void setAuthority(String authority) {
+ this.authority = authority;
+ }
+
+ public void setPath(String path) {
+ this.path = path;
+ }
+
+ public void setFilePath(String filePath) {
+ this.filePath = filePath;
+ }
+
+ public String getScheme() {
+ return scheme;
+ }
+
+ public String getAuthority() {
+ return authority;
+ }
+
+ public String getPath() {
+ return path;
+ }
+
+ public String getFilePath() {
Review Comment:
done
##########
spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRemoveOrphanFilesAction.java:
##########
@@ -955,4 +956,114 @@ protected long waitUntilAfter(long timestampMillis) {
}
return current;
}
+
+ @Test
+ public void testPathsWithExtraSlashes() {
+ List<String> validFiles = Lists.newArrayList("file:///dir1/dir2/file1");
+ List<String> actualFiles =
Lists.newArrayList("file:///dir1/////dir2///file1");
+ executeTest(validFiles, actualFiles, Lists.newArrayList());
+ }
+
+ @Test
+ public void testPathsWithValidFileHavingNoAuthority() {
+ List<String> validFiles = Lists.newArrayList("hdfs:///dir1/dir2/file1");
+ List<String> actualFiles =
Lists.newArrayList("hdfs://servicename/dir1/dir2/file1");
+ executeTest(validFiles, actualFiles, Lists.newArrayList());
+ }
+
+ @Test
+ public void testPathsWithActualFileHavingNoAuthority() {
+ List<String> validFiles =
Lists.newArrayList("hdfs://servicename/dir1/dir2/file1");
+ List<String> actualFiles = Lists.newArrayList("hdfs:///dir1/dir2/file1");
+ executeTest(validFiles, actualFiles, Lists.newArrayList());
+ }
+
+ @Test
+ public void testPathsWithEqualSchemes() {
+ List<String> validFiles =
Lists.newArrayList("s3n://bucket1/dir1/dir2/file1");
+ List<String> actualFiles =
Lists.newArrayList("s3a://bucket1/dir1/dir2/file1");
+ AssertHelpers.assertThrows("Test remove orphan files with equal schemes",
+ ValidationException.class,
+ "Conflicting authorities/schemes: [(s3n, s3a)]",
+ () -> executeTest(validFiles,
+ actualFiles,
+ Lists.newArrayList(),
+ ImmutableMap.of(),
+ ImmutableMap.of(),
+ DeleteOrphanFiles.PrefixMismatchMode.ERROR));
+
+ Map<String, String> equalSchemes = Maps.newHashMap();
+ equalSchemes.put("s3n", "s3");
+ equalSchemes.put("s3a", "s3");
+ executeTest(validFiles,
+ actualFiles,
+ Lists.newArrayList(),
+ equalSchemes,
+ ImmutableMap.of(),
+ DeleteOrphanFiles.PrefixMismatchMode.ERROR);
+ }
+
+ @Test
+ public void testPathsWithEqualAuthorities() {
+ List<String> validFiles =
Lists.newArrayList("hdfs://servicename1/dir1/dir2/file1");
+ List<String> actualFiles =
Lists.newArrayList("hdfs://servicename2/dir1/dir2/file1");
+ AssertHelpers.assertThrows("Test remove orphan files with equal
authorities",
+ ValidationException.class,
+ "Conflicting authorities/schemes: [(servicename1, servicename2)]",
+ () -> executeTest(validFiles,
+ actualFiles,
+ Lists.newArrayList(),
+ ImmutableMap.of(),
+ ImmutableMap.of(),
+ DeleteOrphanFiles.PrefixMismatchMode.ERROR));
+
+ Map<String, String> equalAuthorities = Maps.newHashMap();
+ equalAuthorities.put("servicename1", "servicename");
+ equalAuthorities.put("servicename2", "servicename");
+ executeTest(validFiles,
+ actualFiles,
+ Lists.newArrayList(),
+ ImmutableMap.of(),
+ equalAuthorities,
+ DeleteOrphanFiles.PrefixMismatchMode.ERROR);
+ }
+
+ @Test
+ public void testRemoveOrphanFileActionWithDeleteMode() {
+ List<String> validFiles =
Lists.newArrayList("hdfs://servicename1/dir1/dir2/file1");
+ List<String> actualFiles =
Lists.newArrayList("hdfs://servicename2/dir1/dir2/file1");
+
+ executeTest(validFiles,
+ actualFiles,
+ Lists.newArrayList("hdfs://servicename2/dir1/dir2/file1"),
+ ImmutableMap.of(),
+ ImmutableMap.of(),
+ DeleteOrphanFiles.PrefixMismatchMode.DELETE);
+ }
+
+ private void executeTest(List<String> validFiles,
+ List<String> actualFiles,
+ List<String> expectedOrphanFiles) {
+ executeTest(validFiles, actualFiles, expectedOrphanFiles,
ImmutableMap.of(), ImmutableMap.of(),
+ DeleteOrphanFiles.PrefixMismatchMode.IGNORE);
+ }
+
+ private void executeTest(List<String> validFiles,
+ List<String> actualFiles,
+ List<String> expectedOrphanFiles,
+ Map<String, String> equalSchemes,
+ Map<String, String> equalAuthorities,
+ DeleteOrphanFiles.PrefixMismatchMode mode) {
+ Dataset<Row> validFilesDF = spark.createDataset(validFiles,
Encoders.STRING()).toDF();
+ Dataset<Row> actualFilesDF = spark.createDataset(actualFiles,
Encoders.STRING()).toDF();
+
+ List<String> orphanFiles =
BaseDeleteOrphanFilesSparkAction.findOrphanFiles(
Review Comment:
done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]