This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 8ec6b68 HIVE-21706: REPL Dump with concurrent drop of external table fails with InvalidTableException (Sankar Hariappan, reviewed by Mahesh Kumar Behera) 8ec6b68 is described below commit 8ec6b68a4a3883f157e49c2cc7205fd866af9db1 Author: Sankar Hariappan <sank...@apache.org> AuthorDate: Mon May 13 11:19:09 2019 +0530 HIVE-21706: REPL Dump with concurrent drop of external table fails with InvalidTableException (Sankar Hariappan, reviewed by Mahesh Kumar Behera) Signed-off-by: Sankar Hariappan <sank...@apache.org> --- .../TestReplicationScenariosExternalTables.java | 64 +++++++++++++++++++++- .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 28 ++++++---- .../hive/ql/exec/repl/ReplExternalTables.java | 14 ++++- 3 files changed, 91 insertions(+), 15 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java index 74854db..c6f8e40 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore; import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection; import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments; +import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncoder; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -32,7 +33,6 @@ import org.apache.hadoop.hive.ql.exec.repl.ReplExternalTables; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.repl.PathBuilder; import org.apache.hadoop.security.UserGroupInformation; import org.junit.BeforeClass; @@ -198,10 +198,10 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros private void assertTablePartitionLocation(String sourceTableName, String replicaTableName) throws HiveException { Hive hiveForPrimary = Hive.get(primary.hiveConf); - Table sourceTable = hiveForPrimary.getTable(sourceTableName); + org.apache.hadoop.hive.ql.metadata.Table sourceTable = hiveForPrimary.getTable(sourceTableName); Path sourceLocation = sourceTable.getDataLocation(); Hive hiveForReplica = Hive.get(replica.hiveConf); - Table replicaTable = hiveForReplica.getTable(replicaTableName); + org.apache.hadoop.hive.ql.metadata.Table replicaTable = hiveForReplica.getTable(replicaTableName); Path dataLocation = replicaTable.getDataLocation(); assertEquals(REPLICA_EXTERNAL_BASE + sourceLocation.toUri().getPath(), dataLocation.toUri().getPath()); @@ -712,6 +712,64 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros assertTrue(dataPath.toUri().getPath().equalsIgnoreCase("/tmp/tmp1/abc/xyz")); } + @Test + public void testExternalTablesIncReplicationWithConcurrentDropTable() throws Throwable { + List<String> dumpWithClause = Collections.singletonList( + "'" + HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES.varname + "'='true'" + ); + List<String> loadWithClause = externalTableBasePathWithClause(); + WarehouseInstance.Tuple tupleBootstrap = primary.run("use " + primaryDbName) + .run("create external table t1 (id int)") + .run("insert into table t1 values (1)") + .dump(primaryDbName, null, dumpWithClause); + + replica.load(replicatedDbName, tupleBootstrap.dumpLocation, loadWithClause); + + // Insert a row into "t1" and create another external table using data from "t1". + primary.run("use " + primaryDbName) + .run("insert into table t1 values (2)") + .run("create external table t2 as select * from t1"); + + // Inject a behavior so that getTable returns null for table "t1". This ensures the table is + // skipped for data files listing. + BehaviourInjection<Table, Table> tableNuller = new BehaviourInjection<Table, Table>() { + @Nullable + @Override + public Table apply(@Nullable Table table) { + LOG.info("Performing injection on table " + table.getTableName()); + if (table.getTableName().equalsIgnoreCase("t1")){ + injectionPathCalled = true; + return null; + } else { + nonInjectedPathCalled = true; + return table; + } + } + }; + InjectableBehaviourObjectStore.setGetTableBehaviour(tableNuller); + WarehouseInstance.Tuple tupleInc; + try { + // The t1 table will be skipped from data location listing. + tupleInc = primary.dump(primaryDbName, tupleBootstrap.lastReplicationId, dumpWithClause); + tableNuller.assertInjectionsPerformed(true, true); + } finally { + InjectableBehaviourObjectStore.resetGetTableBehaviour(); // reset the behaviour + } + + // Only table t2 should exist in the data location list file. + assertExternalFileInfo(Collections.singletonList("t2"), + new Path(tupleInc.dumpLocation, FILE_NAME)); + + // The newly inserted data "2" should be missing in table "t1". But, table t2 should exist and have + // inserted data. + replica.load(replicatedDbName, tupleInc.dumpLocation, loadWithClause) + .run("use " + replicatedDbName) + .run("select id from t1 order by id") + .verifyResult("1") + .run("select id from t2 order by id") + .verifyResults(Arrays.asList("1", "2")); + } + private List<String> externalTableBasePathWithClause() throws IOException, SemanticException { Path externalTableLocation = new Path(REPLICA_EXTERNAL_BASE); DistributedFileSystem fileSystem = replica.miniDFSCluster.getFileSystem(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java index 11f8bc3..2f72e23 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java @@ -256,19 +256,25 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable { try (Writer writer = new Writer(dumpRoot, conf)) { for (String tableName : Utils.matchesTbl(hiveDb, dbName, work.tableNameOrPattern)) { - Table table = hiveDb.getTable(dbName, tableName); + try { + Table table = hiveDb.getTable(dbName, tableName); - // Dump external table locations if required. - if (shouldDumpExternalTableLocation() && - TableType.EXTERNAL_TABLE.equals(table.getTableType())) { - writer.dataLocationDump(table); - } + // Dump external table locations if required. + if (shouldDumpExternalTableLocation() && + TableType.EXTERNAL_TABLE.equals(table.getTableType())) { + writer.dataLocationDump(table); + } - // Dump the table to be bootstrapped if required. - if (shouldBootstrapDumpTable(table)) { - HiveWrapper.Tuple<Table> tableTuple = new HiveWrapper(hiveDb, dbName).table(table); - dumpTable(dbName, tableName, validTxnList, dbRoot, bootDumpBeginReplId, hiveDb, - tableTuple); + // Dump the table to be bootstrapped if required. + if (shouldBootstrapDumpTable(table)) { + HiveWrapper.Tuple<Table> tableTuple = new HiveWrapper(hiveDb, dbName).table(table); + dumpTable(dbName, tableName, validTxnList, dbRoot, bootDumpBeginReplId, hiveDb, + tableTuple); + } + } catch (InvalidTableException te) { + // Repl dump shouldn't fail if the table is dropped/renamed while dumping it. + // Just log a debug message and skip it. + LOG.debug(te.getMessage()); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java index 7e33f11..4c504be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java @@ -23,6 +23,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -133,7 +134,18 @@ public final class ReplExternalTables { PathBuilder.fullyQualifiedHDFSUri(table.getDataLocation(), FileSystem.get(hiveConf)); write(lineFor(table.getTableName(), fullyQualifiedDataLocation, hiveConf)); if (table.isPartitioned()) { - List<Partition> partitions = Hive.get(hiveConf).getPartitions(table); + List<Partition> partitions; + try { + partitions = Hive.get(hiveConf).getPartitions(table); + } catch (HiveException e) { + if (e.getCause() instanceof NoSuchObjectException) { + // If table is dropped when dump in progress, just skip partitions data location dump + LOG.debug(e.getMessage()); + return; + } + throw e; + } + for (Partition partition : partitions) { boolean partitionLocOutsideTableLoc = !FileUtils.isPathWithinSubtree( partition.getDataLocation(), table.getDataLocation()