flyrain commented on code in PR #4456:
URL: https://github.com/apache/iceberg/pull/4456#discussion_r845569362
##########
hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java:
##########
@@ -468,4 +478,83 @@ public void testUUIDinTableProperties() throws Exception {
catalog.dropTable(tableIdentifier);
}
}
+
+ @Test
+ public void testSnapshotStatsTableProperties() throws Exception {
+ Schema schema = new Schema(
+ required(1, "id", Types.IntegerType.get(), "unique ID"),
+ required(2, "data", Types.StringType.get())
+ );
+ TableIdentifier tableIdentifier = TableIdentifier.of(DB_NAME, "tbl");
+ String location = temp.newFolder("tbl").toString();
+
+ try {
+ catalog.buildTable(tableIdentifier, schema)
+ .withLocation(location)
+ .create();
+
+ String tableName = tableIdentifier.name();
+ org.apache.hadoop.hive.metastore.api.Table hmsTable =
+ metastoreClient.getTable(tableIdentifier.namespace().level(0),
tableName);
+
+ // check whether parameters are in expected state
+ Map<String, String> parameters = hmsTable.getParameters();
+ Assert.assertEquals("0", parameters.get(TableProperties.SNAPSHOT_COUNT));
+
Assert.assertNull(parameters.get(TableProperties.CURRENT_SNAPSHOT_SUMMARY));
+ Assert.assertNull(parameters.get(TableProperties.CURRENT_SNAPSHOT_ID));
+
Assert.assertNull(parameters.get(TableProperties.CURRENT_SNAPSHOT_TIMESTAMP));
+
+ // create a snapshot
+ Table icebergTable = catalog.loadTable(tableIdentifier);
+ String fileName = UUID.randomUUID().toString();
+ DataFile file = DataFiles.builder(icebergTable.spec())
+ .withPath(FileFormat.PARQUET.addExtension(fileName))
+ .withRecordCount(2)
+ .withFileSizeInBytes(0)
+ .build();
+ icebergTable.newFastAppend().appendFile(file).commit();
+
+ // check whether parameters are in expected state
+ hmsTable =
metastoreClient.getTable(tableIdentifier.namespace().level(0), tableName);
+ parameters = hmsTable.getParameters();
+ Assert.assertEquals("1", parameters.get(TableProperties.SNAPSHOT_COUNT));
+ String summary =
JsonUtil.mapper().writeValueAsString(icebergTable.currentSnapshot().summary());
+ Assert.assertEquals(summary,
parameters.get(TableProperties.CURRENT_SNAPSHOT_SUMMARY));
+ long snapshotId = icebergTable.currentSnapshot().snapshotId();
+ Assert.assertEquals(String.valueOf(snapshotId),
parameters.get(TableProperties.CURRENT_SNAPSHOT_ID));
+
Assert.assertEquals(String.valueOf(icebergTable.currentSnapshot().timestampMillis()),
+ parameters.get(TableProperties.CURRENT_SNAPSHOT_TIMESTAMP));
+
+ } finally {
+ catalog.dropTable(tableIdentifier);
+ }
+ }
+
+ @Test
+ public void testSetSnapshotSummary() throws Exception {
+ Configuration conf = new Configuration();
+ conf.set("iceberg.hive.table.parameter.size.max", "4000");
+ HiveTableOperations spyOps = spy(new HiveTableOperations(conf, null, null,
catalog.name(), DB_NAME, "tbl"));
+ Snapshot snapshot = mock(Snapshot.class);
+ Map<String, String> summary = Maps.newHashMap();
+ when(snapshot.summary()).thenReturn(summary);
+
+ // create a snapshot summary whose json string size is less than the limit
+ for (int i = 0; i < 100; i++) {
+ summary.put(String.valueOf(i), "value");
+ }
+ Assert.assertTrue(JsonUtil.mapper().writeValueAsString(summary).length() <
4000);
+ Map<String, String> parameter = Maps.newHashMap();
+ spyOps.setSnapshotSummary(parameter, snapshot);
+ Assert.assertEquals("The snapshot summary must be in parameters", 1,
parameter.size());
+
+ // create a snapshot summary whose json string size exceeds the limit
+ for (int i = 0; i < 1000; i++) {
+ summary.put(String.valueOf(i), "value");
+ }
+ long summarySize = JsonUtil.mapper().writeValueAsString(summary).length();
+ Assert.assertTrue(summarySize > 4000 && summarySize < 32672);
Review Comment:
The main purpose is to test whether we save the summary in HMS parameters
when the size exceeds the limit. Besides, I also want to test if the limit has
changed from 32627 to 4000. That's why I check both.
```
Assert.assertTrue(summarySize > 4000 && summarySize < 32672);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]