Repository: kudu Updated Branches: refs/heads/master ec1e47f41 -> 5b09a693d
KUDU-2509 fix use-after-free in case of WAL replay error Fixed use-after-free mistake in case of a failure to apply a pending commit message from the WAL while bootstrapping a tablet. Also, a repro scenario to expose the use-after-free condition is added. Prior to the fix, the repro scenario would crash with SIGSEGV on Linux or with SIGBUS on OS X (at least for DEBUG builds). Change-Id: I11373b1cc34d9e2e0181bee2d3841b49022218ed Reviewed-on: http://gerrit.cloudera.org:8080/10997 Tested-by: Alexey Serbin <[email protected]> Reviewed-by: Adar Dembo <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/6b429e8a Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/6b429e8a Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/6b429e8a Branch: refs/heads/master Commit: 6b429e8a42ad9fb12a97cc26e33ca19ac2626533 Parents: ec1e47f Author: Alexey Serbin <[email protected]> Authored: Thu Jul 19 21:05:54 2018 -0700 Committer: Alexey Serbin <[email protected]> Committed: Tue Jul 24 04:40:20 2018 +0000 ---------------------------------------------------------------------- src/kudu/tablet/tablet_bootstrap-test.cc | 75 ++++++++++++++++++++++++++- src/kudu/tablet/tablet_bootstrap.cc | 2 +- 2 files changed, 74 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/6b429e8a/src/kudu/tablet/tablet_bootstrap-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_bootstrap-test.cc b/src/kudu/tablet/tablet_bootstrap-test.cc index 0b80c20..f5b2668 100644 --- a/src/kudu/tablet/tablet_bootstrap-test.cc +++ b/src/kudu/tablet/tablet_bootstrap-test.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "kudu/consensus/log-test-base.h" +#include "kudu/tablet/tablet_bootstrap.h" #include <cstddef> #include <cstdint> @@ -35,7 +35,9 @@ #include "kudu/clock/logical_clock.h" #include "kudu/common/common.pb.h" #include "kudu/common/iterator.h" +#include "kudu/common/partial_row.h" #include "kudu/common/partition.h" +#include "kudu/common/row_operations.h" #include "kudu/common/schema.h" #include "kudu/common/timestamp.h" #include "kudu/common/wire_protocol-test-util.h" @@ -45,6 +47,7 @@ #include "kudu/consensus/consensus.pb.h" #include "kudu/consensus/consensus_meta.h" #include "kudu/consensus/consensus_meta_manager.h" +#include "kudu/consensus/log-test-base.h" #include "kudu/consensus/log.h" #include "kudu/consensus/log_anchor_registry.h" #include "kudu/consensus/log_reader.h" @@ -67,7 +70,6 @@ #include "kudu/tablet/tablet-test-util.h" #include "kudu/tablet/tablet.h" #include "kudu/tablet/tablet.pb.h" -#include "kudu/tablet/tablet_bootstrap.h" #include "kudu/tablet/tablet_metadata.h" #include "kudu/tablet/tablet_replica.h" #include "kudu/tserver/tserver.pb.h" @@ -675,5 +677,74 @@ TEST_F(BootstrapTest, TestConsensusOnlyOperationOutOfOrderTimestamp) { ASSERT_EQ(1, results.size()); } +// Regression test for KUDU-2509. There was a use-after-free bug that sometimes +// lead to SIGSEGV while replaying the WAL. This scenario would crash or +// at least UB sanitizer would report a warning if such condition exists. +TEST_F(BootstrapTest, TestKudu2509) { + ASSERT_OK(BuildLog()); + + consensus::ReplicateRefPtr replicate = consensus::make_scoped_refptr_replicate( + new consensus::ReplicateMsg()); + replicate->get()->set_op_type(consensus::WRITE_OP); + tserver::WriteRequestPB* batch_request = replicate->get()->mutable_write_request(); + ASSERT_OK(SchemaToPB(schema_, batch_request->mutable_schema())); + batch_request->set_tablet_id(log::kTestTablet); + + // This appends Insert(1) with op 10.10 + const OpId insert_opid = MakeOpId(10, 10); + replicate->get()->mutable_id()->CopyFrom(insert_opid); + replicate->get()->set_timestamp(clock_->Now().ToUint64()); + AddTestRowToPB(RowOperationsPB::INSERT, schema_, 10, 1, + "this is a test insert", batch_request->mutable_row_operations()); + NO_FATALS(AppendReplicateBatch(replicate)); + + // This appends Mutate(1) with op 10.11. The operation would try to update + // a row having an extra column. This should fail since there hasn't been + // corresponding DDL operation committed yet. + const OpId mutate_opid = MakeOpId(10, 11); + batch_request->mutable_row_operations()->Clear(); + replicate->get()->mutable_id()->CopyFrom(mutate_opid); + replicate->get()->set_timestamp(clock_->Now().ToUint64()); + { + // Modify the existing schema to add an extra row. + SchemaBuilder builder(schema_); + ASSERT_OK(builder.AddNullableColumn("string_val_extra", STRING)); + const auto schema = builder.BuildWithoutIds(); + ASSERT_OK(SchemaToPB(schema, batch_request->mutable_schema())); + + KuduPartialRow row(&schema); + ASSERT_OK(row.SetInt32("key", 100)); + ASSERT_OK(row.SetInt32("int_val", 200)); + ASSERT_OK(row.SetStringCopy("string_val", "300")); + ASSERT_OK(row.SetStringCopy("string_val_extra", "100500")); + RowOperationsPBEncoder enc(batch_request->mutable_row_operations()); + enc.Add(RowOperationsPB::UPDATE, row); + } + NO_FATALS(AppendReplicateBatch(replicate)); + + // Now commit the mutate before the insert (in the log). + gscoped_ptr<consensus::CommitMsg> mutate_commit(new consensus::CommitMsg); + mutate_commit->set_op_type(consensus::WRITE_OP); + mutate_commit->mutable_commited_op_id()->CopyFrom(mutate_opid); + mutate_commit->mutable_result()->add_ops()->add_mutated_stores()->set_mrs_id(1); + NO_FATALS(AppendCommit(std::move(mutate_commit))); + + gscoped_ptr<consensus::CommitMsg> insert_commit(new consensus::CommitMsg); + insert_commit->set_op_type(consensus::WRITE_OP); + insert_commit->mutable_commited_op_id()->CopyFrom(insert_opid); + insert_commit->mutable_result()->add_ops()->add_mutated_stores()->set_mrs_id(1); + NO_FATALS(AppendCommit(std::move(insert_commit))); + + ConsensusBootstrapInfo boot_info; + shared_ptr<Tablet> tablet; + const auto s = BootstrapTestTablet(-1, -1, &tablet, &boot_info); + const auto& status_msg = s.ToString(); + ASSERT_TRUE(s.IsInvalidArgument()) << status_msg; + ASSERT_STR_CONTAINS(status_msg, + "Unable to bootstrap test tablet: Failed log replay."); + ASSERT_STR_CONTAINS(status_msg, + "column string_val_extra[string NULLABLE] not present in tablet"); +} + } // namespace tablet } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/6b429e8a/src/kudu/tablet/tablet_bootstrap.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_bootstrap.cc b/src/kudu/tablet/tablet_bootstrap.cc index 58f9f54..fcc472a 100644 --- a/src/kudu/tablet/tablet_bootstrap.cc +++ b/src/kudu/tablet/tablet_bootstrap.cc @@ -946,7 +946,6 @@ Status TabletBootstrap::HandleCommitMessage(ReplayState* state, LogEntryPB* comm // ... if it does, we apply it and all the commits that immediately follow in the sequence. OpId last_applied = commit_entry->commit().commited_op_id(); RETURN_NOT_OK(ApplyCommitMessage(state, commit_entry)); - delete commit_entry; auto iter = state->pending_commits.begin(); while (iter != state->pending_commits.end()) { @@ -960,6 +959,7 @@ Status TabletBootstrap::HandleCommitMessage(ReplayState* state, LogEntryPB* comm break; } + delete commit_entry; return Status::OK(); }
