KUDU-2126: Add conditional check to prevent unnecessary fsyncs This patch includes a unit test to detect fsyncs that happened when a DeleteTablet RPC tries to tombstone an already tombstoned tablet. In order to fix the issue, this patch adds additional checking before executing another DeleteTabletData that leads to additional fsyncs.
Change-Id: Idde8b413374f43ca2ef09339f0f412208f03685e Reviewed-on: http://gerrit.cloudera.org:8080/9069 Reviewed-by: Mike Percy <[email protected]> Tested-by: Kudu Jenkins Reviewed-by: Todd Lipcon <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/7465c91b Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/7465c91b Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/7465c91b Branch: refs/heads/master Commit: 7465c91b84656281535fa825a9befb6bee172a9d Parents: 80e58a7 Author: Jeffrey F. Lukman <[email protected]> Authored: Thu Jan 18 15:41:39 2018 -0800 Committer: Todd Lipcon <[email protected]> Committed: Mon Jan 22 23:24:46 2018 +0000 ---------------------------------------------------------------------- .../integration-tests/delete_tablet-itest.cc | 53 ++++++++++++++++++++ src/kudu/tablet/tablet_metadata.cc | 3 ++ src/kudu/tablet/tablet_metadata.h | 7 +++ src/kudu/tserver/ts_tablet_manager.cc | 6 +++ 4 files changed, 69 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/7465c91b/src/kudu/integration-tests/delete_tablet-itest.cc ---------------------------------------------------------------------- diff --git a/src/kudu/integration-tests/delete_tablet-itest.cc b/src/kudu/integration-tests/delete_tablet-itest.cc index a0e349a..513d5aa 100644 --- a/src/kudu/integration-tests/delete_tablet-itest.cc +++ b/src/kudu/integration-tests/delete_tablet-itest.cc @@ -35,6 +35,7 @@ #include "kudu/mini-cluster/internal_mini_cluster.h" #include "kudu/tablet/metadata.pb.h" #include "kudu/tablet/tablet.h" +#include "kudu/tablet/tablet_metadata.h" #include "kudu/tablet/tablet_replica.h" #include "kudu/tserver/mini_tablet_server.h" #include "kudu/tserver/tablet_server.h" @@ -47,6 +48,7 @@ DECLARE_int64(fs_wal_dir_reserved_bytes); using kudu::tablet::TABLET_DATA_DELETED; +using kudu::tablet::TABLET_DATA_TOMBSTONED; using kudu::tablet::TabletReplica; using kudu::itest::DeleteTablet; using std::atomic; @@ -164,4 +166,55 @@ TEST_F(DeleteTabletITest, TestLeaderElectionDuringDeleteTablet) { } } +// Regression test for KUDU-2126 : Ensure that a DeleteTablet() RPC +// on a tombstoned tablet does not cause any fsyncs. +TEST_F(DeleteTabletITest, TestNoOpDeleteTabletRPC) { + // Setup the Mini Cluster + NO_FATALS(StartCluster(/*num_tablet_servers=*/ 1)); + + // Determine the Mini Cluster Cluster workload + TestWorkload workload(cluster_.get()); + workload.set_num_replicas(1); + workload.Setup(); + workload.Start(); + workload.StopAndJoin(); + + auto* mts = cluster_->mini_tablet_server(0); + auto* ts = ts_map_[mts->uuid()]; + + // Verify number of tablets in the cluster + vector<string> tablet_ids = mts->ListTablets(); + ASSERT_EQ(1, tablet_ids.size()); + const string& tablet_id = tablet_ids[0]; + + // Get the Tablet Replica + scoped_refptr<TabletReplica> tablet_replica; + vector<scoped_refptr<TabletReplica>> tablet_replicas; + mts->server()->tablet_manager()->GetTabletReplicas(&tablet_replicas); + ASSERT_EQ(1, tablet_replicas.size()); + tablet_replica = tablet_replicas[0]; + auto flush_count = [&]() { + return tablet_replica->tablet_metadata()->flush_count_for_tests(); + }; + + // Kill the master, so we can send the DeleteTablet RPC + // without any interference + cluster_->mini_master()->Shutdown(); + + // Send the first DeleteTablet RPC + int flush_count_before = flush_count(); + MonoDelta timeout = MonoDelta::FromSeconds(30); + ASSERT_OK(DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, timeout)); + int flush_count_after = flush_count(); + + ASSERT_EQ(2, flush_count_after - flush_count_before); + + // Send the second DeleteTablet RPC + flush_count_before = flush_count(); + ASSERT_OK(DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, timeout)); + flush_count_after = flush_count(); + + ASSERT_EQ(0, flush_count_after - flush_count_before); +} + } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/7465c91b/src/kudu/tablet/tablet_metadata.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_metadata.cc b/src/kudu/tablet/tablet_metadata.cc index 641c925..004a5ed 100644 --- a/src/kudu/tablet/tablet_metadata.cc +++ b/src/kudu/tablet/tablet_metadata.cc @@ -290,6 +290,7 @@ TabletMetadata::TabletMetadata(FsManager* fs_manager, string tablet_id, tombstone_last_logged_opid_(std::move(tombstone_last_logged_opid)), num_flush_pins_(0), needs_flush_(false), + flush_count_for_tests_(0), pre_flush_callback_(Bind(DoNothingStatusClosure)) { CHECK(schema_->has_column_ids()); CHECK_GT(schema_->num_key_columns(), 0); @@ -308,6 +309,7 @@ TabletMetadata::TabletMetadata(FsManager* fs_manager, string tablet_id) schema_(nullptr), num_flush_pins_(0), needs_flush_(false), + flush_count_for_tests_(0), pre_flush_callback_(Bind(DoNothingStatusClosure)) {} Status TabletMetadata::LoadFromDisk() { @@ -615,6 +617,7 @@ Status TabletMetadata::ReplaceSuperBlockUnlocked(const TabletSuperBlockPB &pb) { fs_manager_->env(), path, pb, pb_util::OVERWRITE, pb_util::SYNC), Substitute("Failed to write tablet metadata $0", tablet_id_)); + flush_count_for_tests_++; RETURN_NOT_OK(UpdateOnDiskSize()); return Status::OK(); http://git-wip-us.apache.org/repos/asf/kudu/blob/7465c91b/src/kudu/tablet/tablet_metadata.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_metadata.h b/src/kudu/tablet/tablet_metadata.h index fd88ee8..fb63f9b 100644 --- a/src/kudu/tablet/tablet_metadata.h +++ b/src/kudu/tablet/tablet_metadata.h @@ -253,6 +253,10 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> { // Return standard "T xxx P yyy" log prefix. std::string LogPrefix() const; + int flush_count_for_tests() const { + return flush_count_for_tests_; + } + private: friend class RefCountedThreadSafe<TabletMetadata>; friend class MetadataTest; @@ -371,6 +375,9 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> { // metadata is persisted. bool needs_flush_; + // The number of times metadata has been flushed to disk + int flush_count_for_tests_; + // A callback that, if set, is called before this metadata is flushed // to disk. Protected by the 'flush_lock_'. StatusClosure pre_flush_callback_; http://git-wip-us.apache.org/repos/asf/kudu/blob/7465c91b/src/kudu/tserver/ts_tablet_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tserver/ts_tablet_manager.cc b/src/kudu/tserver/ts_tablet_manager.cc index 0c8d8c4..1d3abfc 100644 --- a/src/kudu/tserver/ts_tablet_manager.cc +++ b/src/kudu/tserver/ts_tablet_manager.cc @@ -794,6 +794,12 @@ Status TSTabletManager::DeleteTablet( bool tablet_already_deleted = (data_state == TABLET_DATA_DELETED || data_state == TABLET_DATA_TOMBSTONED); + // If a tablet is already tombstoned, then a request to tombstone + // the same tablet should become a no-op. + if (delete_type == TABLET_DATA_TOMBSTONED && data_state == TABLET_DATA_TOMBSTONED) { + return Status::OK(); + } + // They specified an "atomic" delete. Check the committed config's opid_index. // TODO(mpercy): There's actually a race here between the check and shutdown, // but it's tricky to fix. We could try checking again after the shutdown and
