Repository: kudu
Updated Branches:
  refs/heads/master 28c4b8c28 -> 4b045c1d3


consensus: Don't replay config changes

We have invariants in place that make it unnecessary to "replay" a
config change operation, so we will now simply check those invariants
during tablet bootstrap.

This patch removes plumbing of ConsensusMetadataManager into
TabletBootstrap, since it is not needed there, and instead simply passes
in the previously-committed raft config from the consensus metadata in
order to check for correctness.

Change-Id: Id62bf96b21560b2ef5838415e52aeb3720875e62
Reviewed-on: http://gerrit.cloudera.org:8080/7614
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <t...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4b045c1d
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4b045c1d
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4b045c1d

Branch: refs/heads/master
Commit: 4b045c1d301fd9e805ac333feac5315962385307
Parents: 28c4b8c
Author: Mike Percy <mpe...@apache.org>
Authored: Fri Jul 28 13:58:32 2017 -0700
Committer: Todd Lipcon <t...@apache.org>
Committed: Wed Aug 9 02:47:58 2017 +0000

----------------------------------------------------------------------
 src/kudu/consensus/consensus_meta_manager.cc | 16 ++++--
 src/kudu/master/sys_catalog.cc               |  5 +-
 src/kudu/tablet/tablet_bootstrap-test.cc     |  8 ++-
 src/kudu/tablet/tablet_bootstrap.cc          | 68 ++++++++---------------
 src/kudu/tablet/tablet_bootstrap.h           |  7 +--
 src/kudu/tserver/ts_tablet_manager.cc        | 11 +++-
 6 files changed, 55 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/4b045c1d/src/kudu/consensus/consensus_meta_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/consensus_meta_manager.cc 
b/src/kudu/consensus/consensus_meta_manager.cc
index a43f5ce..f9dcb4e 100644
--- a/src/kudu/consensus/consensus_meta_manager.cc
+++ b/src/kudu/consensus/consensus_meta_manager.cc
@@ -21,12 +21,14 @@
 #include "kudu/consensus/consensus_meta.h"
 #include "kudu/fs/fs_manager.h"
 #include "kudu/gutil/map-util.h"
+#include "kudu/gutil/strings/substitute.h"
 
 namespace kudu {
 namespace consensus {
 
 using std::lock_guard;
 using std::string;
+using strings::Substitute;
 
 ConsensusMetadataManager::ConsensusMetadataManager(FsManager* fs_manager)
     : fs_manager_(DCHECK_NOTNULL(fs_manager)) {
@@ -37,8 +39,10 @@ Status ConsensusMetadataManager::Create(const string& 
tablet_id,
                                         int64_t current_term,
                                         scoped_refptr<ConsensusMetadata>* 
cmeta_out) {
   scoped_refptr<ConsensusMetadata> cmeta;
-  RETURN_NOT_OK(ConsensusMetadata::Create(fs_manager_, tablet_id, 
fs_manager_->uuid(),
-                                          config, current_term, &cmeta));
+  RETURN_NOT_OK_PREPEND(ConsensusMetadata::Create(fs_manager_, tablet_id, 
fs_manager_->uuid(),
+                                                  config, current_term, 
&cmeta),
+                        Substitute("Unable to create consensus metadata for 
tablet $0", tablet_id));
+
   lock_guard<Mutex> l(lock_);
   InsertOrDie(&cmeta_cache_, tablet_id, cmeta);
   if (cmeta_out) *cmeta_out = std::move(cmeta);
@@ -62,7 +66,9 @@ Status ConsensusMetadataManager::Load(const string& tablet_id,
 
   // If it's not yet cached, drop the lock before we load it.
   scoped_refptr<ConsensusMetadata> cmeta;
-  RETURN_NOT_OK(ConsensusMetadata::Load(fs_manager_, tablet_id, 
fs_manager_->uuid(), &cmeta));
+  RETURN_NOT_OK_PREPEND(ConsensusMetadata::Load(fs_manager_, tablet_id, 
fs_manager_->uuid(),
+                                                &cmeta),
+                        Substitute("Unable to load consensus metadata for 
tablet $0", tablet_id));
 
   // Cache and return the loaded ConsensusMetadata.
   {
@@ -81,7 +87,9 @@ Status ConsensusMetadataManager::Delete(const string& 
tablet_id) {
     lock_guard<Mutex> l(lock_);
     cmeta_cache_.erase(tablet_id); // OK to delete an uncached cmeta; ignore 
the return value.
   }
-  return ConsensusMetadata::DeleteOnDiskData(fs_manager_, tablet_id);
+  RETURN_NOT_OK_PREPEND(ConsensusMetadata::DeleteOnDiskData(fs_manager_, 
tablet_id),
+                        Substitute("Unable to delete consensus metadata for 
tablet $0", tablet_id));
+  return Status::OK();
 }
 
 } // namespace consensus

http://git-wip-us.apache.org/repos/asf/kudu/blob/4b045c1d/src/kudu/master/sys_catalog.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/sys_catalog.cc b/src/kudu/master/sys_catalog.cc
index 4b5e902..ad7e785 100644
--- a/src/kudu/master/sys_catalog.cc
+++ b/src/kudu/master/sys_catalog.cc
@@ -312,10 +312,13 @@ Status SysCatalogTable::SetupTablet(const 
scoped_refptr<tablet::TabletMetadata>&
       master_->tablet_apply_pool(),
       Bind(&SysCatalogTable::SysCatalogStateChanged, Unretained(this), 
metadata->tablet_id())));
 
+  scoped_refptr<ConsensusMetadata> cmeta;
+  RETURN_NOT_OK(cmeta_manager_->Load(metadata->tablet_id(), &cmeta));
+
   consensus::ConsensusBootstrapInfo consensus_info;
   tablet_replica_->SetBootstrapping();
   RETURN_NOT_OK(BootstrapTablet(metadata,
-                                cmeta_manager_,
+                                cmeta->CommittedConfig(),
                                 scoped_refptr<clock::Clock>(master_->clock()),
                                 master_->mem_tracker(),
                                 scoped_refptr<rpc::ResultTracker>(),

http://git-wip-us.apache.org/repos/asf/kudu/blob/4b045c1d/src/kudu/tablet/tablet_bootstrap-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_bootstrap-test.cc 
b/src/kudu/tablet/tablet_bootstrap-test.cc
index 433ae2b..68d6388 100644
--- a/src/kudu/tablet/tablet_bootstrap-test.cc
+++ b/src/kudu/tablet/tablet_bootstrap-test.cc
@@ -112,11 +112,15 @@ class BootstrapTest : public LogTestBase {
   Status RunBootstrapOnTestTablet(const scoped_refptr<TabletMetadata>& meta,
                                   shared_ptr<Tablet>* tablet,
                                   ConsensusBootstrapInfo* boot_info) {
+
+    scoped_refptr<ConsensusMetadata> cmeta;
+    RETURN_NOT_OK(cmeta_manager_->Load(meta->tablet_id(), &cmeta));
+
     scoped_refptr<LogAnchorRegistry> log_anchor_registry(new 
LogAnchorRegistry());
     // Now attempt to recover the log
     RETURN_NOT_OK(BootstrapTablet(
         meta,
-        cmeta_manager_,
+        cmeta->CommittedConfig(),
         
scoped_refptr<Clock>(LogicalClock::CreateStartingAt(Timestamp::kInitialTimestamp)),
         shared_ptr<MemTracker>(),
         scoped_refptr<rpc::ResultTracker>(),
@@ -411,7 +415,7 @@ TEST_F(BootstrapTest, TestMissingConsensusMetadata) {
   Status s = RunBootstrapOnTestTablet(meta, &tablet, &boot_info);
 
   ASSERT_TRUE(s.IsNotFound());
-  ASSERT_STR_CONTAINS(s.ToString(), "Unable to load Consensus metadata");
+  ASSERT_STR_CONTAINS(s.ToString(), "Unable to load consensus metadata");
 }
 
 TEST_F(BootstrapTest, TestOperationOverwriting) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/4b045c1d/src/kudu/tablet/tablet_bootstrap.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_bootstrap.cc 
b/src/kudu/tablet/tablet_bootstrap.cc
index 21e93fb..1aa78cf 100644
--- a/src/kudu/tablet/tablet_bootstrap.cc
+++ b/src/kudu/tablet/tablet_bootstrap.cc
@@ -29,8 +29,6 @@
 #include "kudu/common/partial_row.h"
 #include "kudu/common/row_operations.h"
 #include "kudu/common/wire_protocol.h"
-#include "kudu/consensus/consensus_meta.h"
-#include "kudu/consensus/consensus_meta_manager.h"
 #include "kudu/consensus/log.h"
 #include "kudu/consensus/log_anchor_registry.h"
 #include "kudu/consensus/log_reader.h"
@@ -80,11 +78,8 @@ namespace tablet {
 using clock::Clock;
 using consensus::ALTER_SCHEMA_OP;
 using consensus::CHANGE_CONFIG_OP;
-using consensus::ChangeConfigRecordPB;
 using consensus::CommitMsg;
 using consensus::ConsensusBootstrapInfo;
-using consensus::ConsensusMetadata;
-using consensus::ConsensusMetadataManager;
 using consensus::MinimumOpId;
 using consensus::NO_OP;
 using consensus::OpId;
@@ -166,7 +161,7 @@ class FlushedStoresSnapshot {
 class TabletBootstrap {
  public:
   TabletBootstrap(const scoped_refptr<TabletMetadata>& tablet_meta,
-                  const scoped_refptr<ConsensusMetadataManager>& cmeta_manager,
+                  RaftConfigPB committed_raft_config,
                   const scoped_refptr<Clock>& clock,
                   shared_ptr<MemTracker> mem_tracker,
                   const scoped_refptr<ResultTracker>& result_tracker,
@@ -337,9 +332,9 @@ class TabletBootstrap {
   // Log a status message and set the TabletReplica's status as well.
   void SetStatusMessage(const string& status);
 
-  scoped_refptr<TabletMetadata> tablet_meta_;
-  const scoped_refptr<ConsensusMetadataManager> cmeta_manager_;
-  scoped_refptr<Clock> clock_;
+  const scoped_refptr<TabletMetadata> tablet_meta_;
+  const RaftConfigPB committed_raft_config_;
+  const scoped_refptr<Clock> clock_;
   shared_ptr<MemTracker> mem_tracker_;
   scoped_refptr<rpc::ResultTracker> result_tracker_;
   MetricRegistry* metric_registry_;
@@ -349,8 +344,6 @@ class TabletBootstrap {
   scoped_refptr<log::Log> log_;
   std::shared_ptr<log::LogReader> log_reader_;
 
-  scoped_refptr<ConsensusMetadata> cmeta_;
-
   // Statistics on the replay of entries in the log.
   struct Stats {
     Stats()
@@ -410,7 +403,7 @@ void TabletBootstrap::SetStatusMessage(const string& 
status) {
 }
 
 Status BootstrapTablet(const scoped_refptr<TabletMetadata>& tablet_meta,
-                       const scoped_refptr<ConsensusMetadataManager>& 
cmeta_manager,
+                       RaftConfigPB committed_raft_config,
                        const scoped_refptr<Clock>& clock,
                        const shared_ptr<MemTracker>& mem_tracker,
                        const scoped_refptr<ResultTracker>& result_tracker,
@@ -422,8 +415,8 @@ Status BootstrapTablet(const scoped_refptr<TabletMetadata>& 
tablet_meta,
                        ConsensusBootstrapInfo* consensus_info) {
   TRACE_EVENT1("tablet", "BootstrapTablet",
                "tablet_id", tablet_meta->tablet_id());
-  TabletBootstrap bootstrap(tablet_meta, cmeta_manager, clock, mem_tracker, 
result_tracker,
-                            metric_registry, tablet_replica, 
log_anchor_registry);
+  TabletBootstrap bootstrap(tablet_meta, std::move(committed_raft_config), 
clock, mem_tracker,
+                            result_tracker, metric_registry, tablet_replica, 
log_anchor_registry);
   RETURN_NOT_OK(bootstrap.Bootstrap(rebuilt_tablet, rebuilt_log, 
consensus_info));
   // This is necessary since OpenNewLog() initially disables sync.
   RETURN_NOT_OK((*rebuilt_log)->ReEnableSyncIfRequired());
@@ -450,14 +443,14 @@ static string DebugInfo(const string& tablet_id,
 
 TabletBootstrap::TabletBootstrap(
     const scoped_refptr<TabletMetadata>& tablet_meta,
-    const scoped_refptr<ConsensusMetadataManager>& cmeta_manager,
+    RaftConfigPB committed_raft_config,
     const scoped_refptr<Clock>& clock, shared_ptr<MemTracker> mem_tracker,
     const scoped_refptr<ResultTracker>& result_tracker,
     MetricRegistry* metric_registry,
     const scoped_refptr<TabletReplica>& tablet_replica,
     const scoped_refptr<LogAnchorRegistry>& log_anchor_registry)
     : tablet_meta_(tablet_meta),
-      cmeta_manager_(cmeta_manager),
+      committed_raft_config_(std::move(committed_raft_config)),
       clock_(clock),
       mem_tracker_(std::move(mem_tracker)),
       result_tracker_(result_tracker),
@@ -507,12 +500,6 @@ Status TabletBootstrap::RunBootstrap(shared_ptr<Tablet>* 
rebuilt_tablet,
                                      ConsensusBootstrapInfo* consensus_info) {
   string tablet_id = tablet_meta_->tablet_id();
 
-  // Replay requires a valid Consensus metadata file to exist in order to
-  // compare the committed consensus configuration seqno with the log entries 
and also to persist
-  // committed but unpersisted changes.
-  RETURN_NOT_OK_PREPEND(cmeta_manager_->Load(tablet_id, &cmeta_),
-                        "Unable to load Consensus metadata");
-
   // Make sure we don't try to locally bootstrap a tablet that was in the 
middle
   // of a tablet copy. It's likely that not all files were copied over
   // successfully.
@@ -563,9 +550,6 @@ Status TabletBootstrap::RunBootstrap(shared_ptr<Tablet>* 
rebuilt_tablet,
 
   RETURN_NOT_OK_PREPEND(PlaySegments(consensus_info), "Failed log replay. 
Reason");
 
-  // Flush the consensus metadata once at the end to persist our changes, if 
any.
-  cmeta_->Flush();
-
   RETURN_NOT_OK(RemoveRecoveryDir());
   FinishBootstrap("Bootstrap complete.", rebuilt_log, rebuilt_tablet);
 
@@ -1418,28 +1402,20 @@ Status 
TabletBootstrap::PlayAlterSchemaRequest(ReplicateMsg* replicate_msg,
 
 Status TabletBootstrap::PlayChangeConfigRequest(ReplicateMsg* replicate_msg,
                                                 const CommitMsg& commit_msg) {
-  ChangeConfigRecordPB* change_config = 
replicate_msg->mutable_change_config_record();
-  RaftConfigPB config = change_config->new_config();
-
-  int64_t cmeta_opid_index =  
cmeta_->GetConfigOpIdIndex(consensus::COMMITTED_CONFIG);
-  if (replicate_msg->id().index() > cmeta_opid_index) {
-    DCHECK(!config.has_opid_index());
-    config.set_opid_index(replicate_msg->id().index());
-    VLOG_WITH_PREFIX(1) << "WAL replay found Raft configuration with log index 
"
-                        << config.opid_index()
-                        << " that is greater than the committed config's index 
"
-                        << cmeta_opid_index
-                        << ". Applying this configuration change.";
-    cmeta_->set_committed_config(config);
-    // We flush once at the end of bootstrap.
-  } else {
-    VLOG_WITH_PREFIX(1) << "WAL replay found Raft configuration with log index 
"
-                        << replicate_msg->id().index()
-                        << ", which is less than or equal to the committed "
-                        << "config's index " << cmeta_opid_index << ". "
-                        << "Skipping application of this config change.";
+  // Invariant: The committed config change request is always locally persisted
+  // in the consensus metadata before the commit message is written to the WAL.
+  if (PREDICT_FALSE(replicate_msg->id().index() > 
committed_raft_config_.opid_index())) {
+    string msg = Substitute("Committed config change op in WAL has opid index 
($0) greater than "
+                            "config persisted in the consensus metadata ($1). "
+                            "Replicate message: {$2}. "
+                            "Committed raft config in consensus metadata: 
{$3}",
+                            replicate_msg->id().index(),
+                            committed_raft_config_.opid_index(),
+                            SecureShortDebugString(*replicate_msg),
+                            SecureShortDebugString(committed_raft_config_));
+    LOG_WITH_PREFIX(DFATAL) << msg;
+    return Status::Corruption(msg);
   }
-
   return AppendCommitMsg(commit_msg);
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/4b045c1d/src/kudu/tablet/tablet_bootstrap.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_bootstrap.h 
b/src/kudu/tablet/tablet_bootstrap.h
index 65cd1df..83df3ff 100644
--- a/src/kudu/tablet/tablet_bootstrap.h
+++ b/src/kudu/tablet/tablet_bootstrap.h
@@ -14,8 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-#ifndef KUDU_TABLET_TABLET_BOOTSTRAP_H_
-#define KUDU_TABLET_TABLET_BOOTSTRAP_H_
+#pragma once
 
 #include <memory>
 #include <string>
@@ -66,7 +65,7 @@ extern const char* kLogRecoveryDir;
 // This is a synchronous method, but is typically called within a thread pool 
by
 // TSTabletManager.
 Status BootstrapTablet(const scoped_refptr<TabletMetadata>& tablet_meta,
-                       const 
scoped_refptr<consensus::ConsensusMetadataManager>& cmeta_manager,
+                       consensus::RaftConfigPB committed_raft_config,
                        const scoped_refptr<clock::Clock>& clock,
                        const std::shared_ptr<MemTracker>& mem_tracker,
                        const scoped_refptr<rpc::ResultTracker>& result_tracker,
@@ -79,5 +78,3 @@ Status BootstrapTablet(const scoped_refptr<TabletMetadata>& 
tablet_meta,
 
 }  // namespace tablet
 }  // namespace kudu
-
-#endif /* KUDU_TABLET_TABLET_BOOTSTRAP_H_ */

http://git-wip-us.apache.org/repos/asf/kudu/blob/4b045c1d/src/kudu/tserver/ts_tablet_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/ts_tablet_manager.cc 
b/src/kudu/tserver/ts_tablet_manager.cc
index ded1441..0d5e8dc 100644
--- a/src/kudu/tserver/ts_tablet_manager.cc
+++ b/src/kudu/tserver/ts_tablet_manager.cc
@@ -740,8 +740,15 @@ void TSTabletManager::OpenTablet(const 
scoped_refptr<TabletReplica>& replica,
   LOG(INFO) << LogPrefix(tablet_id) << "Bootstrapping tablet";
   TRACE("Bootstrapping tablet");
 
+  scoped_refptr<ConsensusMetadata> cmeta;
+  Status s = cmeta_manager_->Load(replica->tablet_id(), &cmeta);
+  if (PREDICT_FALSE(!s.ok())) {
+    LOG(ERROR) << LogPrefix(tablet_id) << "Failed to load consensus metadata: 
" << s.ToString();
+    replica->SetFailed(s);
+    return;
+  }
+
   consensus::ConsensusBootstrapInfo bootstrap_info;
-  Status s;
   LOG_TIMING_PREFIX(INFO, LogPrefix(tablet_id), "bootstrapping tablet") {
     // Disable tracing for the bootstrap, since this would result in
     // potentially millions of transaction traces being attached to the
@@ -752,7 +759,7 @@ void TSTabletManager::OpenTablet(const 
scoped_refptr<TabletReplica>& replica,
     // with a partially created tablet here?
     replica->SetBootstrapping();
     s = BootstrapTablet(replica->tablet_metadata(),
-                        cmeta_manager_,
+                        cmeta->CommittedConfig(),
                         scoped_refptr<clock::Clock>(server_->clock()),
                         server_->mem_tracker(),
                         server_->result_tracker(),

Reply via email to