This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 3b2e8a9f4a1 HIVE-27789: Iceberg: Add a way to expire snapshots with
retain last. (#4835). (Ayush Saxena, reviewed by Denys Kuzmenko, zhangbutao)
3b2e8a9f4a1 is described below
commit 3b2e8a9f4a120e269abccdb93a649cf3132ae5de
Author: Ayush Saxena <[email protected]>
AuthorDate: Tue Nov 7 07:34:06 2023 +0530
HIVE-27789: Iceberg: Add a way to expire snapshots with retain last.
(#4835). (Ayush Saxena, reviewed by Denys Kuzmenko, zhangbutao)
---
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 11 +++++++++++
.../mr/hive/TestHiveIcebergExpireSnapshots.java | 21 +++++++++++++++++++++
.../apache/hadoop/hive/ql/parse/AlterClauseParser.g | 2 ++
.../table/execute/AlterTableExecuteAnalyzer.java | 8 +++++++-
.../hadoop/hive/ql/parse/AlterTableExecuteSpec.java | 16 ++++++++++++++++
5 files changed, 57 insertions(+), 1 deletion(-)
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 0afdd79ba55..538c7316779 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -867,6 +867,8 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
expireSnapshotsSpec.getTimestampMillis(), deleteExecutorService);
} else if (expireSnapshotsSpec.isExpireByIds()) {
expireSnapshotByIds(icebergTable,
expireSnapshotsSpec.getIdsToExpire(), deleteExecutorService);
+ } else if (expireSnapshotsSpec.isExpireByRetainLast()) {
+ expireSnapshotRetainLast(icebergTable,
expireSnapshotsSpec.getNumRetainLast(), deleteExecutorService);
} else {
expireSnapshotOlderThanTimestamp(icebergTable,
expireSnapshotsSpec.getTimestampMillis(), deleteExecutorService);
}
@@ -877,6 +879,15 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
}
}
+ private void expireSnapshotRetainLast(Table icebergTable, int numRetainLast,
ExecutorService deleteExecutorService) {
+ ExpireSnapshots expireSnapshots = icebergTable.expireSnapshots();
+ expireSnapshots.retainLast(numRetainLast);
+ if (deleteExecutorService != null) {
+ expireSnapshots.executeDeleteWith(deleteExecutorService);
+ }
+ expireSnapshots.commit();
+ }
+
private void expireSnapshotByTimestampRange(Table icebergTable, Long
fromTimestamp, Long toTimestamp,
ExecutorService deleteExecutorService) {
ExpireSnapshots expireSnapshots = icebergTable.expireSnapshots();
diff --git
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergExpireSnapshots.java
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergExpireSnapshots.java
index a851578ee6c..8fd808f4fed 100644
---
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergExpireSnapshots.java
+++
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergExpireSnapshots.java
@@ -28,6 +28,8 @@ import org.apache.iceberg.catalog.TableIdentifier;
import org.junit.Assert;
import org.junit.Test;
+import static org.apache.iceberg.TableProperties.MAX_SNAPSHOT_AGE_MS;
+
/**
* Tests covering the rollback feature
*/
@@ -82,4 +84,23 @@ public class TestHiveIcebergExpireSnapshots extends
HiveIcebergStorageHandlerWit
table.refresh();
Assert.assertEquals(6, IterableUtils.size(table.snapshots()));
}
+
+ @Test
+ public void testExpireSnapshotsWithRetainLast() throws IOException,
InterruptedException {
+ TableIdentifier identifier = TableIdentifier.of("default", "source");
+ Table table = testTables.createTableWithVersions(shell, identifier.name(),
+ HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat,
+ HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 10);
+ // No snapshot should expire, since the max snapshot age to expire is by
default 5 days
+ shell.executeStatement("ALTER TABLE " + identifier.name() + " EXECUTE
EXPIRE_SNAPSHOTS RETAIN LAST 5");
+ table.refresh();
+ Assert.assertEquals(10, IterableUtils.size(table.snapshots()));
+
+ // Change max snapshot age to expire to 1 ms & re-execute, this time it
should retain only 5
+ shell.executeStatement(
+ "ALTER TABLE " + identifier.name() + " SET TBLPROPERTIES('" +
MAX_SNAPSHOT_AGE_MS + "'='1')");
+ shell.executeStatement("ALTER TABLE " + identifier.name() + " EXECUTE
EXPIRE_SNAPSHOTS RETAIN LAST 5");
+ table.refresh();
+ Assert.assertEquals(5, IterableUtils.size(table.snapshots()));
+ }
}
diff --git
a/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
b/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
index 28d60d6262f..a184b41e0f4 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
@@ -485,6 +485,8 @@ alterStatementSuffixExecute
-> ^(TOK_ALTERTABLE_EXECUTE KW_CHERRY_PICK $snapshotId)
| KW_EXECUTE KW_EXPIRE_SNAPSHOTS KW_BETWEEN (fromTimestamp=StringLiteral)
KW_AND (toTimestamp=StringLiteral)
-> ^(TOK_ALTERTABLE_EXECUTE KW_EXPIRE_SNAPSHOTS $fromTimestamp
$toTimestamp)
+ | KW_EXECUTE KW_EXPIRE_SNAPSHOTS KW_RETAIN KW_LAST numToRetain=Number
+ -> ^(TOK_ALTERTABLE_EXECUTE KW_EXPIRE_SNAPSHOTS KW_RETAIN $numToRetain)
;
alterStatementSuffixDropBranch
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
index 8d4a902b56b..cdd6f035d4d 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/execute/AlterTableExecuteAnalyzer.java
@@ -54,6 +54,7 @@ import static
org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec.ExecuteOpera
import static
org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec.ExecuteOperationType.SET_CURRENT_SNAPSHOT;
import static
org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec.RollbackSpec.RollbackType.TIME;
import static
org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec.RollbackSpec.RollbackType.VERSION;
+import static org.apache.hadoop.hive.ql.parse.HiveLexer.KW_RETAIN;
/**
* Analyzer for ALTER TABLE ... EXECUTE commands.
@@ -141,7 +142,12 @@ public class AlterTableExecuteAnalyzer extends
AbstractAlterTableAnalyzer {
SessionState.get().getConf().getLocalTimeZone();
ASTNode firstNode = (ASTNode) children.get(1);
String firstNodeText = PlanUtils.stripQuotes(firstNode.getText().trim());
- if (children.size() == 3) {
+ if (firstNode.getType() == KW_RETAIN) {
+ ASTNode numRetainLastNode = (ASTNode) children.get(2);
+ String numToRetainText =
PlanUtils.stripQuotes(numRetainLastNode.getText());
+ int numToRetain = Integer.parseInt(numToRetainText);
+ spec = new AlterTableExecuteSpec(EXPIRE_SNAPSHOT, new
ExpireSnapshotsSpec(numToRetain));
+ } else if (children.size() == 3) {
ASTNode secondNode = (ASTNode) children.get(2);
String secondNodeText =
PlanUtils.stripQuotes(secondNode.getText().trim());
TimestampTZ fromTime = TimestampTZUtil.parse(firstNodeText, timeZone);
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTableExecuteSpec.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTableExecuteSpec.java
index b3c8edd9d87..2b7ca285e1c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTableExecuteSpec.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/AlterTableExecuteSpec.java
@@ -113,6 +113,8 @@ public class AlterTableExecuteSpec<T> {
private long fromTimestampMillis = -1L;
+ private int numRetainLast = -1;
+
public ExpireSnapshotsSpec(long timestampMillis) {
this.timestampMillis = timestampMillis;
}
@@ -126,6 +128,10 @@ public class AlterTableExecuteSpec<T> {
this.timestampMillis = toTimestampMillis;
}
+ public ExpireSnapshotsSpec(int numRetainLast) {
+ this.numRetainLast = numRetainLast;
+ }
+
public Long getTimestampMillis() {
return timestampMillis;
}
@@ -138,6 +144,10 @@ public class AlterTableExecuteSpec<T> {
return idsToExpire;
}
+ public int getNumRetainLast() {
+ return numRetainLast;
+ }
+
public boolean isExpireByIds() {
return idsToExpire != null;
}
@@ -146,6 +156,10 @@ public class AlterTableExecuteSpec<T> {
return timestampMillis != -1 && fromTimestampMillis != -1;
}
+ public boolean isExpireByRetainLast() {
+ return numRetainLast != -1;
+ }
+
@Override
public String toString() {
MoreObjects.ToStringHelper stringHelper =
MoreObjects.toStringHelper(this);
@@ -153,6 +167,8 @@ public class AlterTableExecuteSpec<T> {
stringHelper.add("fromTimestampMillis",
fromTimestampMillis).add("toTimestampMillis", timestampMillis);
} else if (isExpireByIds()) {
stringHelper.add("idsToExpire", Arrays.toString(idsToExpire));
+ } else if (isExpireByRetainLast()) {
+ stringHelper.add("numRetainLast", numRetainLast);
} else {
stringHelper.add("timestampMillis", timestampMillis);
}