kuczoram commented on a change in pull request #2471:
URL: https://github.com/apache/hive/pull/2471#discussion_r672339586
##########
File path:
iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
##########
@@ -1313,6 +1313,186 @@ public void testScanTableCaseInsensitive() throws
IOException {
Assert.assertArrayEquals(new Object[] {1L, "Bob", "Green"}, rows.get(1));
}
+ @Test
+ public void testTruncateTable() throws IOException, TException,
InterruptedException {
+ // Create an Iceberg table with some records in it then execute a truncate
table command.
+ // Then check if the data is deleted and the table statistics are reset to
0.
+ String databaseName = "default";
+ String tableName = "customers";
+ Table icebergTable = testTables.createTable(shell, tableName,
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA,
+ fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
+ testTruncateTable(databaseName, tableName, icebergTable,
HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS,
+ HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, true, false);
+ }
+
+ @Test
+ public void testTruncateEmptyTable() throws IOException, TException,
InterruptedException {
+ // Create an empty Iceberg table and execute a truncate table command on
it.
+ String databaseName = "default";
+ String tableName = "customers";
+ String fullTableName = databaseName + "." + tableName;
Review comment:
Thanks, I changed that.
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
##########
@@ -3360,42 +3360,50 @@ public CmRecycleResponse cm_recycle(final
CmRecycleRequest request) throws MetaE
public void truncate_table(final String dbName, final String tableName,
List<String> partNames)
throws NoSuchObjectException, MetaException {
// Deprecated path, won't work for txn tables.
- truncateTableInternal(dbName, tableName, partNames, null, -1);
+ truncateTableInternal(dbName, tableName, partNames, null, -1, null);
}
@Override
public TruncateTableResponse truncate_table_req(TruncateTableRequest req)
throws MetaException, TException {
truncateTableInternal(req.getDbName(), req.getTableName(),
req.getPartNames(),
- req.getValidWriteIdList(), req.getWriteId());
+ req.getValidWriteIdList(), req.getWriteId(),
req.getEnvironmentContext());
return new TruncateTableResponse();
}
private void truncateTableInternal(String dbName, String tableName,
List<String> partNames,
- String validWriteIds, long writeId)
throws MetaException, NoSuchObjectException {
+ String validWriteIds, long writeId,
EnvironmentContext context) throws MetaException, NoSuchObjectException {
boolean isSkipTrash = false, needCmRecycle = false;
try {
String[] parsedDbName = parseDbName(dbName, conf);
Table tbl = get_table_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME], tableName);
- boolean truncateFiles = !TxnUtils.isTransactionalTable(tbl) ||
- !MetastoreConf.getBoolVar(getConf(),
MetastoreConf.ConfVars.TRUNCATE_ACID_USE_BASE);
-
- if (truncateFiles) {
- isSkipTrash = MetaStoreUtils.isSkipTrash(tbl.getParameters());
- Database db = get_database_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME]);
- needCmRecycle = ReplChangeManager.shouldEnableCm(db, tbl);
+ boolean skipDataDeletion = false;
+ if (context != null && context.getProperties() != null
+ && context.getProperties().get("truncateSkipDataDeletion") != null) {
Review comment:
We can do that. Fixed it.
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
##########
@@ -3360,42 +3360,50 @@ public CmRecycleResponse cm_recycle(final
CmRecycleRequest request) throws MetaE
public void truncate_table(final String dbName, final String tableName,
List<String> partNames)
throws NoSuchObjectException, MetaException {
// Deprecated path, won't work for txn tables.
- truncateTableInternal(dbName, tableName, partNames, null, -1);
+ truncateTableInternal(dbName, tableName, partNames, null, -1, null);
}
@Override
public TruncateTableResponse truncate_table_req(TruncateTableRequest req)
throws MetaException, TException {
truncateTableInternal(req.getDbName(), req.getTableName(),
req.getPartNames(),
- req.getValidWriteIdList(), req.getWriteId());
+ req.getValidWriteIdList(), req.getWriteId(),
req.getEnvironmentContext());
return new TruncateTableResponse();
}
private void truncateTableInternal(String dbName, String tableName,
List<String> partNames,
- String validWriteIds, long writeId)
throws MetaException, NoSuchObjectException {
+ String validWriteIds, long writeId,
EnvironmentContext context) throws MetaException, NoSuchObjectException {
boolean isSkipTrash = false, needCmRecycle = false;
try {
String[] parsedDbName = parseDbName(dbName, conf);
Table tbl = get_table_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME], tableName);
- boolean truncateFiles = !TxnUtils.isTransactionalTable(tbl) ||
- !MetastoreConf.getBoolVar(getConf(),
MetastoreConf.ConfVars.TRUNCATE_ACID_USE_BASE);
-
- if (truncateFiles) {
- isSkipTrash = MetaStoreUtils.isSkipTrash(tbl.getParameters());
- Database db = get_database_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME]);
- needCmRecycle = ReplChangeManager.shouldEnableCm(db, tbl);
+ boolean skipDataDeletion = false;
+ if (context != null && context.getProperties() != null
Review comment:
Ok, I am not sure that it is more readable for me this way, but I can
change it. What happens if the context or the context.properties is null? I am
just asking because I want to be sure that no NPE would occur.
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
##########
@@ -3360,42 +3360,50 @@ public CmRecycleResponse cm_recycle(final
CmRecycleRequest request) throws MetaE
public void truncate_table(final String dbName, final String tableName,
List<String> partNames)
throws NoSuchObjectException, MetaException {
// Deprecated path, won't work for txn tables.
- truncateTableInternal(dbName, tableName, partNames, null, -1);
+ truncateTableInternal(dbName, tableName, partNames, null, -1, null);
}
@Override
public TruncateTableResponse truncate_table_req(TruncateTableRequest req)
throws MetaException, TException {
truncateTableInternal(req.getDbName(), req.getTableName(),
req.getPartNames(),
- req.getValidWriteIdList(), req.getWriteId());
+ req.getValidWriteIdList(), req.getWriteId(),
req.getEnvironmentContext());
return new TruncateTableResponse();
}
private void truncateTableInternal(String dbName, String tableName,
List<String> partNames,
- String validWriteIds, long writeId)
throws MetaException, NoSuchObjectException {
+ String validWriteIds, long writeId,
EnvironmentContext context) throws MetaException, NoSuchObjectException {
boolean isSkipTrash = false, needCmRecycle = false;
try {
String[] parsedDbName = parseDbName(dbName, conf);
Table tbl = get_table_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME], tableName);
- boolean truncateFiles = !TxnUtils.isTransactionalTable(tbl) ||
- !MetastoreConf.getBoolVar(getConf(),
MetastoreConf.ConfVars.TRUNCATE_ACID_USE_BASE);
-
- if (truncateFiles) {
- isSkipTrash = MetaStoreUtils.isSkipTrash(tbl.getParameters());
- Database db = get_database_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME]);
- needCmRecycle = ReplChangeManager.shouldEnableCm(db, tbl);
+ boolean skipDataDeletion = false;
+ if (context != null && context.getProperties() != null
+ && context.getProperties().get("truncateSkipDataDeletion") != null) {
+ skipDataDeletion =
Boolean.parseBoolean(context.getProperties().get("truncateSkipDataDeletion"));
}
- // This is not transactional
- for (Path location : getLocationsForTruncate(getMS(),
parsedDbName[CAT_NAME],
- parsedDbName[DB_NAME], tableName, tbl, partNames)) {
- FileSystem fs = location.getFileSystem(getConf());
+
+ if (!skipDataDeletion) {
+ boolean truncateFiles = !TxnUtils.isTransactionalTable(tbl)
+ || !MetastoreConf.getBoolVar(getConf(),
MetastoreConf.ConfVars.TRUNCATE_ACID_USE_BASE);
+
if (truncateFiles) {
- truncateDataFiles(location, fs, isSkipTrash, needCmRecycle);
- } else {
- // For Acid tables we don't need to delete the old files, only write
an empty baseDir.
- // Compaction and cleaner will take care of the rest
- addTruncateBaseFile(location, writeId, fs);
+ isSkipTrash = MetaStoreUtils.isSkipTrash(tbl.getParameters());
+ Database db = get_database_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME]);
+ needCmRecycle = ReplChangeManager.shouldEnableCm(db, tbl);
+ }
+ // This is not transactional
+ for (Path location : getLocationsForTruncate(getMS(),
parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName,
+ tbl, partNames)) {
+ FileSystem fs = location.getFileSystem(getConf());
+ if (truncateFiles) {
Review comment:
I am not sure what you mean by same code as before. The truncateFiles is
used for different purpose. It is used to treat the acid tables differently if
the proper flags are set: instead of deleting the files, create a new base. But
we don't want any of these logic for Iceberg tables, so that's why it is put
inside the if (!skipDataDeletion).
We can put the steps from the block 3391 as they should be executed only if
truncateFiles is true. I just didn't touch this original implementation.
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
##########
@@ -3360,42 +3360,50 @@ public CmRecycleResponse cm_recycle(final
CmRecycleRequest request) throws MetaE
public void truncate_table(final String dbName, final String tableName,
List<String> partNames)
throws NoSuchObjectException, MetaException {
// Deprecated path, won't work for txn tables.
- truncateTableInternal(dbName, tableName, partNames, null, -1);
+ truncateTableInternal(dbName, tableName, partNames, null, -1, null);
}
@Override
public TruncateTableResponse truncate_table_req(TruncateTableRequest req)
throws MetaException, TException {
truncateTableInternal(req.getDbName(), req.getTableName(),
req.getPartNames(),
- req.getValidWriteIdList(), req.getWriteId());
+ req.getValidWriteIdList(), req.getWriteId(),
req.getEnvironmentContext());
return new TruncateTableResponse();
}
private void truncateTableInternal(String dbName, String tableName,
List<String> partNames,
- String validWriteIds, long writeId)
throws MetaException, NoSuchObjectException {
+ String validWriteIds, long writeId,
EnvironmentContext context) throws MetaException, NoSuchObjectException {
boolean isSkipTrash = false, needCmRecycle = false;
try {
String[] parsedDbName = parseDbName(dbName, conf);
Table tbl = get_table_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME], tableName);
- boolean truncateFiles = !TxnUtils.isTransactionalTable(tbl) ||
- !MetastoreConf.getBoolVar(getConf(),
MetastoreConf.ConfVars.TRUNCATE_ACID_USE_BASE);
-
- if (truncateFiles) {
- isSkipTrash = MetaStoreUtils.isSkipTrash(tbl.getParameters());
- Database db = get_database_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME]);
- needCmRecycle = ReplChangeManager.shouldEnableCm(db, tbl);
+ boolean skipDataDeletion = false;
+ if (context != null && context.getProperties() != null
+ && context.getProperties().get("truncateSkipDataDeletion") != null) {
+ skipDataDeletion =
Boolean.parseBoolean(context.getProperties().get("truncateSkipDataDeletion"));
}
- // This is not transactional
- for (Path location : getLocationsForTruncate(getMS(),
parsedDbName[CAT_NAME],
- parsedDbName[DB_NAME], tableName, tbl, partNames)) {
- FileSystem fs = location.getFileSystem(getConf());
+
+ if (!skipDataDeletion) {
+ boolean truncateFiles = !TxnUtils.isTransactionalTable(tbl)
+ || !MetastoreConf.getBoolVar(getConf(),
MetastoreConf.ConfVars.TRUNCATE_ACID_USE_BASE);
+
if (truncateFiles) {
- truncateDataFiles(location, fs, isSkipTrash, needCmRecycle);
- } else {
- // For Acid tables we don't need to delete the old files, only write
an empty baseDir.
- // Compaction and cleaner will take care of the rest
- addTruncateBaseFile(location, writeId, fs);
+ isSkipTrash = MetaStoreUtils.isSkipTrash(tbl.getParameters());
+ Database db = get_database_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME]);
+ needCmRecycle = ReplChangeManager.shouldEnableCm(db, tbl);
+ }
+ // This is not transactional
+ for (Path location : getLocationsForTruncate(getMS(),
parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName,
+ tbl, partNames)) {
+ FileSystem fs = location.getFileSystem(getConf());
+ if (truncateFiles) {
Review comment:
I am not sure what you mean by same code as before. The truncateFiles is
used for different purpose. It is used to treat the acid tables differently if
the proper flags are set: instead of deleting the files, create a new base. But
we don't want any of these logic for Iceberg tables, so that's why it is put
inside the if (!skipDataDeletion).
I guess the check in line 3391 is because those steps should be executed
once and not for all path. But it is part of the original implementation which
I only but inside an if and didn't change otherwise, so I am just guessing
about the original intentions.
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
##########
@@ -3360,42 +3360,50 @@ public CmRecycleResponse cm_recycle(final
CmRecycleRequest request) throws MetaE
public void truncate_table(final String dbName, final String tableName,
List<String> partNames)
throws NoSuchObjectException, MetaException {
// Deprecated path, won't work for txn tables.
- truncateTableInternal(dbName, tableName, partNames, null, -1);
+ truncateTableInternal(dbName, tableName, partNames, null, -1, null);
}
@Override
public TruncateTableResponse truncate_table_req(TruncateTableRequest req)
throws MetaException, TException {
truncateTableInternal(req.getDbName(), req.getTableName(),
req.getPartNames(),
- req.getValidWriteIdList(), req.getWriteId());
+ req.getValidWriteIdList(), req.getWriteId(),
req.getEnvironmentContext());
return new TruncateTableResponse();
}
private void truncateTableInternal(String dbName, String tableName,
List<String> partNames,
- String validWriteIds, long writeId)
throws MetaException, NoSuchObjectException {
+ String validWriteIds, long writeId,
EnvironmentContext context) throws MetaException, NoSuchObjectException {
boolean isSkipTrash = false, needCmRecycle = false;
try {
String[] parsedDbName = parseDbName(dbName, conf);
Table tbl = get_table_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME], tableName);
- boolean truncateFiles = !TxnUtils.isTransactionalTable(tbl) ||
- !MetastoreConf.getBoolVar(getConf(),
MetastoreConf.ConfVars.TRUNCATE_ACID_USE_BASE);
-
- if (truncateFiles) {
- isSkipTrash = MetaStoreUtils.isSkipTrash(tbl.getParameters());
- Database db = get_database_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME]);
- needCmRecycle = ReplChangeManager.shouldEnableCm(db, tbl);
+ boolean skipDataDeletion = false;
+ if (context != null && context.getProperties() != null
+ && context.getProperties().get("truncateSkipDataDeletion") != null) {
+ skipDataDeletion =
Boolean.parseBoolean(context.getProperties().get("truncateSkipDataDeletion"));
}
- // This is not transactional
- for (Path location : getLocationsForTruncate(getMS(),
parsedDbName[CAT_NAME],
- parsedDbName[DB_NAME], tableName, tbl, partNames)) {
- FileSystem fs = location.getFileSystem(getConf());
+
+ if (!skipDataDeletion) {
+ boolean truncateFiles = !TxnUtils.isTransactionalTable(tbl)
+ || !MetastoreConf.getBoolVar(getConf(),
MetastoreConf.ConfVars.TRUNCATE_ACID_USE_BASE);
+
if (truncateFiles) {
- truncateDataFiles(location, fs, isSkipTrash, needCmRecycle);
- } else {
- // For Acid tables we don't need to delete the old files, only write
an empty baseDir.
- // Compaction and cleaner will take care of the rest
- addTruncateBaseFile(location, writeId, fs);
+ isSkipTrash = MetaStoreUtils.isSkipTrash(tbl.getParameters());
+ Database db = get_database_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME]);
+ needCmRecycle = ReplChangeManager.shouldEnableCm(db, tbl);
+ }
+ // This is not transactional
+ for (Path location : getLocationsForTruncate(getMS(),
parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName,
+ tbl, partNames)) {
+ FileSystem fs = location.getFileSystem(getConf());
+ if (truncateFiles) {
Review comment:
I am not sure what you mean by same code as before. The truncateFiles is
used for different purpose. It is used to treat the acid tables differently if
the proper flags are set: instead of deleting the files, create a new base. But
we don't want any of these logic for Iceberg tables, so that's why it is put
inside the if (!skipDataDeletion).
I guess the check in line 3391 is because those steps should be executed
once and not for all path. But it is part of the original implementation which
I only put inside an if and didn't change otherwise, so I am just guessing
about the original intentions.
##########
File path:
iceberg/iceberg-handler/src/test/queries/positive/truncate_force_iceberg_table.q
##########
@@ -0,0 +1,20 @@
+set hive.vectorized.execution.enabled=false;
+
+drop table if exists test_truncate;
+create external table test_truncate (id int, value string) stored by iceberg
stored as parquet;
+alter table test_truncate set tblproperties('external.table.purge'='false');
+insert into test_truncate values (1,
'one'),(2,'two'),(3,'three'),(4,'four'),(5,'five');
+insert into test_truncate values (6, 'six'), (7, 'seven');
+insert into test_truncate values (8, 'eight'), (9, 'nine'), (10, 'ten');
+analyze table test_truncate compute statistics;
+
+select * from test_truncate order by id;
Review comment:
\oh yeah, you're right. I always forget it. I changed the tests.
##########
File path:
iceberg/iceberg-handler/src/test/queries/positive/truncate_force_iceberg_table.q
##########
@@ -0,0 +1,20 @@
+set hive.vectorized.execution.enabled=false;
+
+drop table if exists test_truncate;
+create external table test_truncate (id int, value string) stored by iceberg
stored as parquet;
+alter table test_truncate set tblproperties('external.table.purge'='false');
+insert into test_truncate values (1,
'one'),(2,'two'),(3,'three'),(4,'four'),(5,'five');
+insert into test_truncate values (6, 'six'), (7, 'seven');
+insert into test_truncate values (8, 'eight'), (9, 'nine'), (10, 'ten');
+analyze table test_truncate compute statistics;
+
+select * from test_truncate order by id;
Review comment:
Oh yeah, you're right. I always forget it. I changed the tests.
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
##########
@@ -3360,42 +3360,50 @@ public CmRecycleResponse cm_recycle(final
CmRecycleRequest request) throws MetaE
public void truncate_table(final String dbName, final String tableName,
List<String> partNames)
throws NoSuchObjectException, MetaException {
// Deprecated path, won't work for txn tables.
- truncateTableInternal(dbName, tableName, partNames, null, -1);
+ truncateTableInternal(dbName, tableName, partNames, null, -1, null);
}
@Override
public TruncateTableResponse truncate_table_req(TruncateTableRequest req)
throws MetaException, TException {
truncateTableInternal(req.getDbName(), req.getTableName(),
req.getPartNames(),
- req.getValidWriteIdList(), req.getWriteId());
+ req.getValidWriteIdList(), req.getWriteId(),
req.getEnvironmentContext());
return new TruncateTableResponse();
}
private void truncateTableInternal(String dbName, String tableName,
List<String> partNames,
- String validWriteIds, long writeId)
throws MetaException, NoSuchObjectException {
+ String validWriteIds, long writeId,
EnvironmentContext context) throws MetaException, NoSuchObjectException {
boolean isSkipTrash = false, needCmRecycle = false;
try {
String[] parsedDbName = parseDbName(dbName, conf);
Table tbl = get_table_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME], tableName);
- boolean truncateFiles = !TxnUtils.isTransactionalTable(tbl) ||
- !MetastoreConf.getBoolVar(getConf(),
MetastoreConf.ConfVars.TRUNCATE_ACID_USE_BASE);
-
- if (truncateFiles) {
- isSkipTrash = MetaStoreUtils.isSkipTrash(tbl.getParameters());
- Database db = get_database_core(parsedDbName[CAT_NAME],
parsedDbName[DB_NAME]);
- needCmRecycle = ReplChangeManager.shouldEnableCm(db, tbl);
+ boolean skipDataDeletion = false;
+ if (context != null && context.getProperties() != null
Review comment:
Thanks a lot for the hint. I changed it in the code.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]