This is an automated email from the ASF dual-hosted git repository. adar pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit c10bae9ed9c063cffab3e45204e9f9e3fbdb95e8 Author: Andrew Wong <[email protected]> AuthorDate: Mon May 20 16:10:38 2019 -0700 hms: fix flakiness of master-stress-test from managed tables The switch to managed tables opened the door for the race described in HIVE-21759, described with more Kudu-specific details below: 1. Master A* sends a create table request for T. 2. The HMS creates a directory for that table and an entry in the HMS catalog. 3. Before A* can write this to the Kudu catalog, master B* becomes leader. A's write fails, and A sends a drop request to the HMS to roll back the created metadata for T. 4. The HMS receives A's drop request, and the HMS entry is dropped. Note: In the process of dropping a table from the HMS, the last thing the operation will do is drop any underlying data in the HMS (the directory, in this case). 5. The create request for T is retried by B*. Because the HMS entry for T is already dropped in the HMS, the request will be processed. 6. The HMS finally drops the underlying directory for T created by A. 7. Along the path of creating the table, the request from B* hits a "Not found" exception, expecting for the directory for T to exist. 8. The create table request is failed, since the HMS transaction to create the table is failed. The end result is that the table doesn't exist in Kudu or in the HMS following this sequence of events. This patch addresses this by catching such errors with the HMS enabled and checking that an underlying Kudu table and HMS entry is not created if so. Without this patch, master-stress-test failed 24/300 times with 7 stress threads. With it, it failed 9/300 times (mostly due to KUDU-2779). Change-Id: I1233e5b133035d36f98ce08265c0273570caa454 Reviewed-on: http://gerrit.cloudera.org:8080/13384 Reviewed-by: Hao Hao <[email protected]> Tested-by: Andrew Wong <[email protected]> --- src/kudu/integration-tests/master-stress-test.cc | 38 ++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/kudu/integration-tests/master-stress-test.cc b/src/kudu/integration-tests/master-stress-test.cc index f5471d6..815ab98 100644 --- a/src/kudu/integration-tests/master-stress-test.cc +++ b/src/kudu/integration-tests/master-stress-test.cc @@ -34,10 +34,14 @@ #include "kudu/client/schema.h" #include "kudu/client/shared_ptr.h" #include "kudu/common/common.pb.h" +#include "kudu/common/table_util.h" #include "kudu/common/wire_protocol.h" #include "kudu/gutil/stl_util.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/gutil/strings/util.h" +#include "kudu/hms/hive_metastore_types.h" +#include "kudu/hms/hms_client.h" +#include "kudu/hms/mini_hms.h" #include "kudu/integration-tests/cluster_itest_util.h" #include "kudu/integration-tests/external_mini_cluster-itest-base.h" #include "kudu/master/master.pb.h" @@ -47,6 +51,7 @@ #include "kudu/rpc/rpc_controller.h" #include "kudu/sentry/mini_sentry.h" #include "kudu/tablet/tablet.pb.h" +#include "kudu/thrift/client.h" #include "kudu/tools/tool_action_common.h" #include "kudu/tserver/tserver.pb.h" #include "kudu/util/atomic.h" @@ -57,6 +62,7 @@ #include "kudu/util/net/sockaddr.h" #include "kudu/util/oid_generator.h" #include "kudu/util/random.h" +#include "kudu/util/slice.h" #include "kudu/util/status.h" #include "kudu/util/test_macros.h" #include "kudu/util/test_util.h" @@ -78,11 +84,13 @@ using kudu::client::KuduClientBuilder; using kudu::client::KuduColumnSchema; using kudu::client::KuduSchema; using kudu::client::KuduSchemaBuilder; +using kudu::client::KuduTable; using kudu::client::KuduTableAlterer; using kudu::client::KuduTableCreator; using kudu::cluster::ExternalMaster; using kudu::cluster::ExternalMiniCluster; using kudu::cluster::ExternalMiniClusterOptions; +using kudu::hms::HmsClient; using kudu::itest::ListTablets; using kudu::itest::SentryMode; using kudu::master::ListTablesRequestPB; @@ -205,6 +213,13 @@ class MasterStressTest : public ExternalMiniClusterITestBase, itest::SetupAdministratorPrivileges(cluster_->kdc(), cluster_->sentry()->address()); } + if (std::get<0>(GetParam()) == HmsMode::ENABLE_METASTORE_INTEGRATION) { + thrift::ClientOptions hms_opts; + hms_opts.enable_kerberos = enable_sentry; + hms_opts.service_principal = "hive"; + hms_client_.reset(new HmsClient(cluster_->hms()->address(), hms_opts)); + ASSERT_OK(hms_client_->Start()); + } } void TearDown() override { @@ -259,6 +274,26 @@ class MasterStressTest : public ExternalMiniClusterITestBase, // following leader restart is robust. continue; } + // Because of HIVE-21759, it's possible that during a master leadership + // change, the rollback from an old leader will race with the create + // table from the new leader. In such cases, the create may fail, and the + // underlying Kudu table should not be created. + if (hms_client_ && s.IsRemoteError() && + MatchPattern(s.ToString(), "*FileNotFoundException*")) { + // Check that the table was not created in Kudu. + client::sp::shared_ptr<KuduTable> table; + s = client_->OpenTable(to_create, &table); + CHECK(s.IsNotFound()) << s.ToString(); + + // Check that the table was not created in the HMS. + Slice db; + Slice table_name; + CHECK_OK(ParseHiveTableIdentifier(to_create, &db, &table_name)); + hive::Table hms_table; + Status s = hms_client_->GetTable(db.ToString(), table_name.ToString(), &hms_table); + CHECK(s.IsNotFound()) << s.ToString(); + continue; + } CHECK_OK(s); num_tables_created_.Increment(); PutTableName(to_create); @@ -490,6 +525,9 @@ class MasterStressTest : public ExternalMiniClusterITestBase, unique_ptr<ExternalMiniCluster> cluster_; client::sp::shared_ptr<KuduClient> client_; + // HMS client to used to check on metadata if the HMS integration is enabled. + unique_ptr<HmsClient> hms_client_; + // Used to ListTablets in the ReplaceTablet thread. // This member is not protected by a lock but it is // only read by the ReplaceTablet threads.
