RussellSpitzer commented on code in PR #4325:
URL: https://github.com/apache/iceberg/pull/4325#discussion_r867347482
##########
spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestAddFilesProcedure.java:
##########
@@ -759,6 +759,51 @@ public void
testPartitionedImportFromEmptyPartitionDoesNotThrow() {
sql("SELECT * FROM %s ORDER BY id", tableName));
}
+ @Test
+ public void testSkipOnError() throws IOException {
+ createUnpartitionedFileTable("parquet");
+
+ List<Object[]> source = sql("SELECT * FROM %s ORDER BY id",
sourceTableName);
+ Assert.assertEquals(String.format("Rows in source table did not
match\nExpected :%s rows \nFound :%s",
+ 8, source.size()), 8, source.size());
+
+ String createIceberg =
+ "CREATE TABLE %s (id Integer, name String, dept String, subdept
String) USING iceberg";
+
+ sql(createIceberg, tableName);
+
+ File[] expectedFiles = fileTableDir.listFiles((dir, name) ->
!name.endsWith("crc") && !name.contains("_SUCCESS"));
+
+ Assert.assertEquals("Expected number of source files", 2,
expectedFiles.length);
+
+ // Corrupt the second file
+ Assume.assumeTrue("Delete source file!", expectedFiles[1].delete());
+ Assume.assumeTrue("Create a empty source file!",
expectedFiles[1].createNewFile());
+
+ AssertHelpers.assertThrows(
+ "Expected an exception",
+ RuntimeException.class,
+ "not a Parquet file (length is too low: 0)",
+ () -> scalarSql("CALL %s.system.add_files(" +
+ "table => '%s', " +
+ "source_table => '%s'," +
+ "skip_on_error => false)",
+ catalogName, tableName, sourceTableName));
+
+ Object result = scalarSql("CALL %s.system.add_files(" +
+ "table => '%s'," +
+ "source_table => '%s'," +
+ "skip_on_error => true)",
+ catalogName, tableName, sourceTableName);
+
+ Assert.assertEquals(1L, result);
+
+ List<Object[]> actual = sql("SELECT * FROM %s ORDER BY id", tableName);
+
+ Assert.assertEquals(String.format("Rows in table did not match\nExpected
:%s rows \nFound :%s",
Review Comment:
Rather than this, you can just check against ```unpartitionedDF ``` which
has all of the records that should be in the table
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]