lcspinter commented on a change in pull request #2219:
URL: https://github.com/apache/hive/pull/2219#discussion_r620416625
##########
File path:
iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
##########
@@ -955,4 +1046,44 @@ private void validateBasicStats(Table icebergTable,
String dbName, String tableN
Assert.assertEquals(summary.get(entry.getValue()),
hmsParams.get(entry.getKey()));
}
}
+
+ private void validateMigration(String tableName, int recordCount) {
+ List<Object[]> originalResult = shell.executeStatement("SELECT * FROM " +
tableName + " ORDER BY a");
+ Assert.assertEquals(recordCount, originalResult.size());
+ List<Object[]> originalDescribe = shell.executeStatement("DESCRIBE
FORMATTED " + tableName);
+ validateDescribeOutput(originalDescribe, fileFormat.name());
Review comment:
You are absolutely right.
##########
File path:
iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
##########
@@ -955,4 +1046,44 @@ private void validateBasicStats(Table icebergTable,
String dbName, String tableN
Assert.assertEquals(summary.get(entry.getValue()),
hmsParams.get(entry.getKey()));
}
}
+
+ private void validateMigration(String tableName, int recordCount) {
+ List<Object[]> originalResult = shell.executeStatement("SELECT * FROM " +
tableName + " ORDER BY a");
+ Assert.assertEquals(recordCount, originalResult.size());
+ List<Object[]> originalDescribe = shell.executeStatement("DESCRIBE
FORMATTED " + tableName);
+ validateDescribeOutput(originalDescribe, fileFormat.name());
+ shell.executeStatement("ALTER TABLE " + tableName + " SET TBLPROPERTIES " +
+
"('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler')");
+ List<Object[]> alterResult = shell.executeStatement("SELECT * FROM " +
tableName + " ORDER BY a");
+ Assert.assertEquals(originalResult.size(), alterResult.size());
+ List<Object[]> alterDescribe = shell.executeStatement("DESCRIBE FORMATTED
" + tableName);
+ validateDescribeOutput(alterDescribe, "iceberg");
+ }
+
+ private void validateMigrationRollback(String tableName, int recordCount) {
+ List<Object[]> originalResult = shell.executeStatement("SELECT * FROM " +
tableName + " ORDER BY a");
+ Assert.assertEquals(recordCount, originalResult.size());
+ List<Object[]> originalDescribe = shell.executeStatement("DESCRIBE
FORMATTED " + tableName);
+ validateDescribeOutput(originalDescribe, fileFormat.name());
+ try (MockedStatic<HiveTableUtil> mockedTableUtil =
Mockito.mockStatic(HiveTableUtil.class)) {
+ mockedTableUtil.when(() ->
HiveTableUtil.importFiles(ArgumentMatchers.anyString(),
ArgumentMatchers.anyString(),
+ ArgumentMatchers.any(PartitionSpecProxy.class),
ArgumentMatchers.anyList(),
+ ArgumentMatchers.any(Properties.class),
ArgumentMatchers.any(Configuration.class)))
+ .thenThrow(new MetaException());
+ shell.executeStatement("ALTER TABLE " + tableName + " SET TBLPROPERTIES
" +
+
"('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler')");
+ List<Object[]> alterResult = shell.executeStatement("SELECT * FROM " +
tableName + " ORDER BY a");
+ Assert.assertEquals(originalResult.size(), alterResult.size());
+ List<Object[]> alterDescribe = shell.executeStatement("DESCRIBE
FORMATTED " + tableName);
+ validateDescribeOutput(alterDescribe, fileFormat.name());
+ }
+ }
+
+ private void validateDescribeOutput(List<Object[]> describe, String format) {
Review comment:
It validates whether the contents of the SD (serde, input/output format)
is changed/retained (in case of rollback). I've changed this method based on
@marton-bod's suggestions.
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AbstractAlterTableOperation.java
##########
@@ -138,8 +141,32 @@ private void finalizeAlterTableWithWriteIdOp(Table table,
Table oldTable, List<P
environmentContext.putToProperties(HiveMetaHook.ALTER_TABLE_OPERATION_TYPE,
desc.getType().name());
if (partitions == null) {
long writeId = desc.getWriteId() != null ? desc.getWriteId() : 0;
- context.getDb().alterTable(desc.getDbTableName(), table,
desc.isCascade(), environmentContext, true,
- writeId);
+ try {
+ context.getDb().alterTable(desc.getDbTableName(), table,
desc.isCascade(), environmentContext, true, writeId);
+ } catch (HiveException ex) {
+ if
(environmentContext.getProperties().containsKey(HiveMetaHook.INITIALIZE_ROLLBACK_ALTER)
&& Boolean
+
.valueOf(environmentContext.getProperties().get(HiveMetaHook.INITIALIZE_ROLLBACK_ALTER)))
{
+ // in case of rollback of alter table do the following:
+ // 1. drop the already altered table but keep the data files
+ // 2. recreate the original table
+ // 3. run msck repair to sync partitions on filesystem with
metastore
Review comment:
I did some manual testing, with adding/removing directories to the
table, and it seems `Msck#repair` is using the file system as a single source
of truth. If some partitions are added/removed it updates the metastore
respectively.
##########
File path: iceberg/pom.xml
##########
@@ -31,7 +31,7 @@
<path.to.iceberg.root>.</path.to.iceberg.root>
<iceberg-api.version>0.11.0</iceberg-api.version>
<kryo-shaded.version>4.0.2</kryo-shaded.version>
- <iceberg.mockito-core.version>1.10.19</iceberg.mockito-core.version>
+ <iceberg.mockito-core.version>3.4.4</iceberg.mockito-core.version>
Review comment:
To bump the version in other places as well I would need to touch 7-8
modules. It's not a trivial change.
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AbstractAlterTableOperation.java
##########
@@ -138,8 +141,32 @@ private void finalizeAlterTableWithWriteIdOp(Table table,
Table oldTable, List<P
environmentContext.putToProperties(HiveMetaHook.ALTER_TABLE_OPERATION_TYPE,
desc.getType().name());
if (partitions == null) {
long writeId = desc.getWriteId() != null ? desc.getWriteId() : 0;
- context.getDb().alterTable(desc.getDbTableName(), table,
desc.isCascade(), environmentContext, true,
- writeId);
+ try {
+ context.getDb().alterTable(desc.getDbTableName(), table,
desc.isCascade(), environmentContext, true, writeId);
+ } catch (HiveException ex) {
+ if
(environmentContext.getProperties().containsKey(HiveMetaHook.INITIALIZE_ROLLBACK_ALTER)
&& Boolean
+
.valueOf(environmentContext.getProperties().get(HiveMetaHook.INITIALIZE_ROLLBACK_ALTER)))
{
+ // in case of rollback of alter table do the following:
+ // 1. drop the already altered table but keep the data files
+ // 2. recreate the original table
+ // 3. run msck repair to sync partitions on filesystem with
metastore
+ context.getDb().dropTable(table.getDbName(), table.getTableName(),
false, true, false);
+ context.getDb().createTable(oldTable);
+ Msck msck = new Msck(false, false);
+ try {
+ msck.init(Msck.getMsckConf(context.getDb().getConf()));
+
msck.updateExpressionProxy(Msck.getProxyClass(context.getDb().getConf()));
+ MsckInfo msckInfo =
+ new MsckInfo(table.getCatalogName(), table.getDbName(),
table.getTableName(), null, null, true, true,
Review comment:
Well, yes :). I was thinking about this, but I would have to change to
code in a dozen more places, which doesn't fit into this PR.
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AbstractAlterTableOperation.java
##########
@@ -41,6 +39,11 @@
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.thrift.TException;
+
+import java.util.ArrayList;
Review comment:
It does not :). Anyway, I reordered them.
##########
File path:
iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
##########
@@ -955,4 +1046,44 @@ private void validateBasicStats(Table icebergTable,
String dbName, String tableN
Assert.assertEquals(summary.get(entry.getValue()),
hmsParams.get(entry.getKey()));
}
}
+
+ private void validateMigration(String tableName, int recordCount) {
+ List<Object[]> originalResult = shell.executeStatement("SELECT * FROM " +
tableName + " ORDER BY a");
+ Assert.assertEquals(recordCount, originalResult.size());
+ List<Object[]> originalDescribe = shell.executeStatement("DESCRIBE
FORMATTED " + tableName);
+ validateDescribeOutput(originalDescribe, fileFormat.name());
+ shell.executeStatement("ALTER TABLE " + tableName + " SET TBLPROPERTIES " +
+
"('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler')");
+ List<Object[]> alterResult = shell.executeStatement("SELECT * FROM " +
tableName + " ORDER BY a");
+ Assert.assertEquals(originalResult.size(), alterResult.size());
+ List<Object[]> alterDescribe = shell.executeStatement("DESCRIBE FORMATTED
" + tableName);
Review comment:
Sure. Thanks for pointing this out.
##########
File path:
iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java
##########
@@ -955,4 +1046,44 @@ private void validateBasicStats(Table icebergTable,
String dbName, String tableN
Assert.assertEquals(summary.get(entry.getValue()),
hmsParams.get(entry.getKey()));
}
}
+
+ private void validateMigration(String tableName, int recordCount) {
+ List<Object[]> originalResult = shell.executeStatement("SELECT * FROM " +
tableName + " ORDER BY a");
+ Assert.assertEquals(recordCount, originalResult.size());
+ List<Object[]> originalDescribe = shell.executeStatement("DESCRIBE
FORMATTED " + tableName);
+ validateDescribeOutput(originalDescribe, fileFormat.name());
+ shell.executeStatement("ALTER TABLE " + tableName + " SET TBLPROPERTIES " +
+
"('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler')");
+ List<Object[]> alterResult = shell.executeStatement("SELECT * FROM " +
tableName + " ORDER BY a");
+ Assert.assertEquals(originalResult.size(), alterResult.size());
Review comment:
Added content check
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/AbstractAlterTableOperation.java
##########
@@ -138,8 +141,32 @@ private void finalizeAlterTableWithWriteIdOp(Table table,
Table oldTable, List<P
environmentContext.putToProperties(HiveMetaHook.ALTER_TABLE_OPERATION_TYPE,
desc.getType().name());
if (partitions == null) {
long writeId = desc.getWriteId() != null ? desc.getWriteId() : 0;
- context.getDb().alterTable(desc.getDbTableName(), table,
desc.isCascade(), environmentContext, true,
- writeId);
+ try {
+ context.getDb().alterTable(desc.getDbTableName(), table,
desc.isCascade(), environmentContext, true, writeId);
+ } catch (HiveException ex) {
+ if
(environmentContext.getProperties().containsKey(HiveMetaHook.INITIALIZE_ROLLBACK_ALTER)
&& Boolean
Review comment:
Fixed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]