This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new fc01fbc129a HIVE-27903: Iceberg: Implement Expire Snapshot with
default table properties. (#4906). (Ayush Saxena, reviewed by zhangbutao)
fc01fbc129a is described below
commit fc01fbc129a13a3892f25f3d6d63a47c6558378c
Author: Ayush Saxena <[email protected]>
AuthorDate: Thu Nov 30 15:44:54 2023 +0530
HIVE-27903: Iceberg: Implement Expire Snapshot with default table
properties. (#4906). (Ayush Saxena, reviewed by zhangbutao)
---
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 12 ++++++++-
.../mr/hive/TestHiveIcebergExpireSnapshots.java | 29 ++++++++++++++++++++++
.../hadoop/hive/ql/parse/AlterClauseParser.g | 4 +--
.../table/execute/AlterTableExecuteAnalyzer.java | 5 +++-
4 files changed, 46 insertions(+), 4 deletions(-)
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index c729fcef430..9cfb210eec3 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -894,7 +894,9 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
LOG.info("Executing expire snapshots on iceberg table {} with {}
threads", icebergTable.name(), numThreads);
deleteExecutorService = getDeleteExecutorService(icebergTable.name(),
numThreads);
}
- if (expireSnapshotsSpec.isExpireByTimestampRange()) {
+ if (expireSnapshotsSpec == null) {
+ expireSnapshotWithDefaultParams(icebergTable, deleteExecutorService);
+ } else if (expireSnapshotsSpec.isExpireByTimestampRange()) {
expireSnapshotByTimestampRange(icebergTable,
expireSnapshotsSpec.getFromTimestampMillis(),
expireSnapshotsSpec.getTimestampMillis(), deleteExecutorService);
} else if (expireSnapshotsSpec.isExpireByIds()) {
@@ -911,6 +913,14 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
}
}
+ private void expireSnapshotWithDefaultParams(Table icebergTable,
ExecutorService deleteExecutorService) {
+ ExpireSnapshots expireSnapshots = icebergTable.expireSnapshots();
+ if (deleteExecutorService != null) {
+ expireSnapshots.executeDeleteWith(deleteExecutorService);
+ }
+ expireSnapshots.commit();
+ }
+
private void expireSnapshotRetainLast(Table icebergTable, int numRetainLast,
ExecutorService deleteExecutorService) {
ExpireSnapshots expireSnapshots = icebergTable.expireSnapshots();
expireSnapshots.retainLast(numRetainLast);
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergExpireSnapshots.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergExpireSnapshots.java
index 4a3b951bde4..9f036a5615a 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergExpireSnapshots.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergExpireSnapshots.java
@@ -36,6 +36,7 @@ import org.junit.Assume;
import org.junit.Test;
import static org.apache.iceberg.TableProperties.MAX_SNAPSHOT_AGE_MS;
+import static org.apache.iceberg.TableProperties.MIN_SNAPSHOTS_TO_KEEP;
/**
* Tests covering the rollback feature
@@ -117,6 +118,34 @@ public class TestHiveIcebergExpireSnapshots extends
HiveIcebergStorageHandlerWit
Assert.assertEquals(5, IterableUtils.size(table.snapshots()));
}
+ @Test
+ public void testExpireSnapshotsWithDefaultParams() throws IOException,
InterruptedException {
+ TableIdentifier identifier = TableIdentifier.of("default", "source");
+ Table table = testTables.createTableWithVersions(shell, identifier.name(),
+ HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat,
+ HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 10);
+ // No snapshot should expire, since the max snapshot age to expire is by
default 5 days
+ shell.executeStatement("ALTER TABLE " + identifier.name() + " EXECUTE
EXPIRE_SNAPSHOTS RETAIN LAST 5");
+ table.refresh();
+ Assert.assertEquals(10, IterableUtils.size(table.snapshots()));
+
+ // Change max snapshot age to expire to 1 ms & min snapshots to keep as 3
& re-execute
+ shell.executeStatement(
+ "ALTER TABLE " + identifier.name() + " SET TBLPROPERTIES('" +
MAX_SNAPSHOT_AGE_MS + "'='1'" + ",'" +
+ MIN_SNAPSHOTS_TO_KEEP + "'='3')");
+ shell.executeStatement("ALTER TABLE " + identifier.name() + " EXECUTE
EXPIRE_SNAPSHOTS");
+ table.refresh();
+ Assert.assertEquals(3, IterableUtils.size(table.snapshots()));
+
+ // Change the min snapshot to keep as 2
+ shell.executeStatement(
+ "ALTER TABLE " + identifier.name() + " SET TBLPROPERTIES('" +
MIN_SNAPSHOTS_TO_KEEP + "'='2')");
+ shell.executeStatement("ALTER TABLE " + identifier.name() + " EXECUTE
EXPIRE_SNAPSHOTS");
+ table.refresh();
+ Assert.assertEquals(2, IterableUtils.size(table.snapshots()));
+
+ }
+
@Test
public void testDeleteOrphanFiles() throws IOException, InterruptedException
{
TableIdentifier identifier = TableIdentifier.of("default", "source");
diff --git
a/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
b/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
index 3e6105957c0..8e8ec4e33f9 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
@@ -475,8 +475,8 @@ alterStatementSuffixExecute
@after { gParent.popMsg(state); }
: KW_EXECUTE KW_ROLLBACK LPAREN (rollbackParam=(StringLiteral | Number))
RPAREN
-> ^(TOK_ALTERTABLE_EXECUTE KW_ROLLBACK $rollbackParam)
- | KW_EXECUTE KW_EXPIRE_SNAPSHOTS LPAREN (expireParam=StringLiteral) RPAREN
- -> ^(TOK_ALTERTABLE_EXECUTE KW_EXPIRE_SNAPSHOTS $expireParam)
+ | KW_EXECUTE KW_EXPIRE_SNAPSHOTS (LPAREN (expireParam=StringLiteral)
RPAREN)?
+ -> ^(TOK_ALTERTABLE_EXECUTE KW_EXPIRE_SNAPSHOTS $expireParam?)
| KW_EXECUTE KW_SET_CURRENT_SNAPSHOT LPAREN (snapshotParam=expression)
RPAREN
-> ^(TOK_ALTERTABLE_EXECUTE KW_SET_CURRENT_SNAPSHOT $snapshotParam)
| KW_EXECUTE KW_FAST_FORWARD sourceBranch=StringLiteral
(targetBranch=StringLiteral)?
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
index 79448df3b2a..275a0e1a4c5 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
@@ -141,7 +141,10 @@ public class AlterTableExecuteAnalyzer extends
AbstractAlterTableAnalyzer {
private static AlterTableExecuteDesc getExpireSnapshotDesc(TableName
tableName, Map<String, String> partitionSpec,
List<Node> children) throws SemanticException {
AlterTableExecuteSpec<ExpireSnapshotsSpec> spec;
-
+ if (children.size() == 1) {
+ spec = new AlterTableExecuteSpec(EXPIRE_SNAPSHOT, null);
+ return new AlterTableExecuteDesc(tableName, partitionSpec, spec);
+ }
ZoneId timeZone = SessionState.get() == null ?
new HiveConf().getLocalTimeZone() :
SessionState.get().getConf().getLocalTimeZone();