rdblue commented on code in PR #4503:
URL: https://github.com/apache/iceberg/pull/4503#discussion_r857177394
##########
spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/actions/TestRemoveOrphanFilesAction.java:
##########
@@ -703,4 +705,120 @@ public void testGarbageCollectionDisabled() {
ValidationException.class, "Cannot delete orphan files: GC is
disabled",
() -> SparkActions.get().deleteOrphanFiles(table).execute());
}
+
+ @Test
+ public void testCompareToFileList() throws IOException, InterruptedException
{
+ Table table =
+ TABLES.create(SCHEMA, PartitionSpec.unpartitioned(),
Maps.newHashMap(), tableLocation);
+
+ List<ThreeColumnRecord> records =
+ Lists.newArrayList(new ThreeColumnRecord(1, "AAAAAAAAAA", "AAAA"));
+
+ Dataset<Row> df = spark.createDataFrame(records,
ThreeColumnRecord.class).coalesce(1);
+
+ df.select("c1", "c2",
"c3").write().format("iceberg").mode("append").save(tableLocation);
+
+ df.select("c1", "c2",
"c3").write().format("iceberg").mode("append").save(tableLocation);
+
+ Path dataPath = new Path(tableLocation + "/data");
+ FileSystem fs =
dataPath.getFileSystem(spark.sessionState().newHadoopConf());
+ List<FilePathLastModifiedRecord> validFiles =
+ Arrays.stream(fs.listStatus(dataPath, HiddenPathFilter.get()))
+ .filter(FileStatus::isFile)
+ .map(
+ file ->
+ new FilePathLastModifiedRecord(
+ file.getPath().toString(), new
Timestamp(file.getModificationTime())))
+ .collect(Collectors.toList());
+
+ Assert.assertEquals("Should be 2 valid files", 2, validFiles.size());
+
+ df.write().mode("append").parquet(tableLocation + "/data");
+
+ List<FilePathLastModifiedRecord> allFiles =
+ Arrays.stream(fs.listStatus(dataPath, HiddenPathFilter.get()))
+ .filter(FileStatus::isFile)
+ .map(
+ file ->
+ new FilePathLastModifiedRecord(
+ file.getPath().toString(), new
Timestamp(file.getModificationTime())))
+ .collect(Collectors.toList());
+
+ Assert.assertEquals("Should be 3 files", 3, allFiles.size());
+
+ List<FilePathLastModifiedRecord> invalidFiles =
Lists.newArrayList(allFiles);
+ invalidFiles.removeAll(validFiles);
+ List<String> invalidFilePaths =
+ invalidFiles.stream()
+ .map(FilePathLastModifiedRecord::getFilePath)
+ .collect(Collectors.toList());
+ Assert.assertEquals("Should be 1 invalid file", 1, invalidFiles.size());
+
+ // sleep for 1 second to ensure files will be old enough
+ Thread.sleep(1000);
Review Comment:
This should use `waitUntilAfter` rather than sleeping.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]