szehon-ho commented on code in PR #5063:
URL: https://github.com/apache/iceberg/pull/5063#discussion_r903149334
##########
spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestMetadataTables.java:
##########
@@ -319,6 +321,58 @@ public void testAllFilesPartitioned() throws Exception {
TestHelpers.assertEqualsSafe(filesTableSchema.asStruct(), expectedFiles,
actualFiles);
}
+ @Test
+ public void testMetadataLogMetatable() throws Exception {
+ // Create table and insert data
+ sql("CREATE TABLE %s (id bigint, data string) " +
+ "USING iceberg " +
+ "PARTITIONED BY (data) " +
+ "TBLPROPERTIES" +
+ "('format-version'='2', 'write.delete.mode'='merge-on-read')",
tableName);
+
+ List<SimpleRecord> recordsA = Lists.newArrayList(
+ new SimpleRecord(1, "a"),
+ new SimpleRecord(2, "a")
+ );
+ spark.createDataset(recordsA, Encoders.bean(SimpleRecord.class))
+ .coalesce(1)
+ .writeTo(tableName)
+ .append();
+
+ List<SimpleRecord> recordsB = Lists.newArrayList(
+ new SimpleRecord(1, "b"),
+ new SimpleRecord(2, "b")
+ );
+ spark.createDataset(recordsB, Encoders.bean(SimpleRecord.class))
+ .coalesce(1)
+ .writeTo(tableName)
+ .append();
+
+ Table table = Spark3Util.loadIcebergTable(spark, tableName);
+ Long currentSnapshotId = table.currentSnapshot().snapshotId();
+
+ // Check metadataLog table
+ List<Object[]> metadataLogs = sql("SELECT * FROM %s.metadata_log",
tableName);
+ Assert.assertEquals("metadataLog table should return 3 rows", 3,
metadataLogs.size());
+
+ // test filtering
+ List<Object[]> metadataLogWithFilters =
+ sql("SELECT * FROM %s.metadata_log WHERE latest_snapshot_id = %s",
tableName, currentSnapshotId);
+ Assert.assertEquals("metadataLog table should return 1 row", 1,
metadataLogWithFilters.size());
+ Assert.assertEquals("timestampMillis should match currentSnapshot",
+ table.currentSnapshot().timestampMillis() * 1000,
metadataLogWithFilters.get(0)[0]);
+
+ if (((HasTableOperations) table).operations() instanceof
HiveTableOperations) {
+ Assert.assertEquals("file should match current metadata location",
Review Comment:
Will this test ever not be HiveTableOperations? Usually tests use Assume
for these kind of things, but maybe this check is not even necessary here if we
control the environment.
##########
spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestMetadataTables.java:
##########
@@ -319,6 +321,58 @@ public void testAllFilesPartitioned() throws Exception {
TestHelpers.assertEqualsSafe(filesTableSchema.asStruct(), expectedFiles,
actualFiles);
}
+ @Test
+ public void testMetadataLogMetatable() throws Exception {
+ // Create table and insert data
+ sql("CREATE TABLE %s (id bigint, data string) " +
+ "USING iceberg " +
+ "PARTITIONED BY (data) " +
+ "TBLPROPERTIES" +
+ "('format-version'='2', 'write.delete.mode'='merge-on-read')",
tableName);
+
+ List<SimpleRecord> recordsA = Lists.newArrayList(
+ new SimpleRecord(1, "a"),
+ new SimpleRecord(2, "a")
+ );
+ spark.createDataset(recordsA, Encoders.bean(SimpleRecord.class))
+ .coalesce(1)
+ .writeTo(tableName)
+ .append();
+
+ List<SimpleRecord> recordsB = Lists.newArrayList(
+ new SimpleRecord(1, "b"),
+ new SimpleRecord(2, "b")
+ );
+ spark.createDataset(recordsB, Encoders.bean(SimpleRecord.class))
+ .coalesce(1)
Review Comment:
Probably no need for coalesce (even if it becomes multiple files, its still
one snapshot + metadata.json)
##########
spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestMetadataTables.java:
##########
@@ -319,6 +321,58 @@ public void testAllFilesPartitioned() throws Exception {
TestHelpers.assertEqualsSafe(filesTableSchema.asStruct(), expectedFiles,
actualFiles);
}
+ @Test
+ public void testMetadataLogMetatable() throws Exception {
+ // Create table and insert data
+ sql("CREATE TABLE %s (id bigint, data string) " +
+ "USING iceberg " +
+ "PARTITIONED BY (data) " +
+ "TBLPROPERTIES" +
+ "('format-version'='2', 'write.delete.mode'='merge-on-read')",
tableName);
Review Comment:
Probably no need for format-version=2
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]