kudu git commit: KUDU-2509 fix use-after-free in case of WAL replay error
Repository: kudu Updated Branches: refs/heads/branch-1.7.x cf21f3673 -> 7fba099d5 KUDU-2509 fix use-after-free in case of WAL replay error Fixed use-after-free mistake in case of a failure to apply a pending commit message from the WAL while bootstrapping a tablet. Also, a repro scenario to expose the use-after-free condition is added. Prior to the fix, the repro scenario would crash with SIGSEGV on Linux or with SIGBUS on OS X (at least for DEBUG builds). Change-Id: I11373b1cc34d9e2e0181bee2d3841b49022218ed Reviewed-on: http://gerrit.cloudera.org:8080/10997 Tested-by: Alexey Serbin Reviewed-by: Adar Dembo (cherry picked from commit 6b429e8a42ad9fb12a97cc26e33ca19ac2626533) Reviewed-on: http://gerrit.cloudera.org:8080/11063 Tested-by: Kudu Jenkins Reviewed-by: Mike Percy Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/7fba099d Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/7fba099d Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/7fba099d Branch: refs/heads/branch-1.7.x Commit: 7fba099d553ed7b3e56c1d8e89dcc0bae4397616 Parents: cf21f36 Author: Alexey Serbin Authored: Thu Jul 19 21:05:54 2018 -0700 Committer: Alexey Serbin Committed: Tue Jul 31 23:44:10 2018 + -- src/kudu/tablet/tablet_bootstrap-test.cc | 75 ++- src/kudu/tablet/tablet_bootstrap.cc | 2 +- 2 files changed, 74 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/kudu/blob/7fba099d/src/kudu/tablet/tablet_bootstrap-test.cc -- diff --git a/src/kudu/tablet/tablet_bootstrap-test.cc b/src/kudu/tablet/tablet_bootstrap-test.cc index e8131e0..ed32517 100644 --- a/src/kudu/tablet/tablet_bootstrap-test.cc +++ b/src/kudu/tablet/tablet_bootstrap-test.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "kudu/consensus/log-test-base.h" +#include "kudu/tablet/tablet_bootstrap.h" #include #include @@ -35,7 +35,9 @@ #include "kudu/clock/logical_clock.h" #include "kudu/common/common.pb.h" #include "kudu/common/iterator.h" +#include "kudu/common/partial_row.h" #include "kudu/common/partition.h" +#include "kudu/common/row_operations.h" #include "kudu/common/schema.h" #include "kudu/common/timestamp.h" #include "kudu/common/wire_protocol-test-util.h" @@ -45,6 +47,7 @@ #include "kudu/consensus/consensus.pb.h" #include "kudu/consensus/consensus_meta.h" #include "kudu/consensus/consensus_meta_manager.h" +#include "kudu/consensus/log-test-base.h" #include "kudu/consensus/log.h" #include "kudu/consensus/log_anchor_registry.h" #include "kudu/consensus/log_reader.h" @@ -68,7 +71,6 @@ #include "kudu/tablet/tablet-test-util.h" #include "kudu/tablet/tablet.h" #include "kudu/tablet/tablet.pb.h" -#include "kudu/tablet/tablet_bootstrap.h" #include "kudu/tablet/tablet_metadata.h" #include "kudu/tablet/tablet_replica.h" #include "kudu/tserver/tserver.pb.h" @@ -676,5 +678,74 @@ TEST_F(BootstrapTest, TestConsensusOnlyOperationOutOfOrderTimestamp) { ASSERT_EQ(1, results.size()); } +// Regression test for KUDU-2509. There was a use-after-free bug that sometimes +// lead to SIGSEGV while replaying the WAL. This scenario would crash or +// at least UB sanitizer would report a warning if such condition exists. +TEST_F(BootstrapTest, TestKudu2509) { + ASSERT_OK(BuildLog()); + + consensus::ReplicateRefPtr replicate = consensus::make_scoped_refptr_replicate( + new consensus::ReplicateMsg()); + replicate->get()->set_op_type(consensus::WRITE_OP); + tserver::WriteRequestPB* batch_request = replicate->get()->mutable_write_request(); + ASSERT_OK(SchemaToPB(schema_, batch_request->mutable_schema())); + batch_request->set_tablet_id(log::kTestTablet); + + // This appends Insert(1) with op 10.10 + const OpId insert_opid = MakeOpId(10, 10); + replicate->get()->mutable_id()->CopyFrom(insert_opid); + replicate->get()->set_timestamp(clock_->Now().ToUint64()); + AddTestRowToPB(RowOperationsPB::INSERT, schema_, 10, 1, + "this is a test insert", batch_request->mutable_row_operations()); + NO_FATALS(AppendReplicateBatch(replicate)); + + // This appends Mutate(1) with op 10.11. The operation would try to update + // a row having an extra column. This should fail since there hasn't been + // corresponding DDL operation committed yet. + const OpId mutate_opid = MakeOpId(10, 11); + batch_request->mutable_row_operations()->Clear(); + replicate->get()->mutable_id()->CopyFrom(mutate_opid); + replicate->get()->set_timestamp(clock_->Now().ToUint64()); + { +// Modify the existing schema to add an extra row. +SchemaBuilder builder(schema_); +ASSERT_OK(builder.AddNullableColumn("string_val_extra", STRING)); +
[1/2] kudu git commit: KUDU-2509 fix use-after-free in case of WAL replay error
Repository: kudu Updated Branches: refs/heads/master ec1e47f41 -> 5b09a693d KUDU-2509 fix use-after-free in case of WAL replay error Fixed use-after-free mistake in case of a failure to apply a pending commit message from the WAL while bootstrapping a tablet. Also, a repro scenario to expose the use-after-free condition is added. Prior to the fix, the repro scenario would crash with SIGSEGV on Linux or with SIGBUS on OS X (at least for DEBUG builds). Change-Id: I11373b1cc34d9e2e0181bee2d3841b49022218ed Reviewed-on: http://gerrit.cloudera.org:8080/10997 Tested-by: Alexey Serbin Reviewed-by: Adar Dembo Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/6b429e8a Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/6b429e8a Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/6b429e8a Branch: refs/heads/master Commit: 6b429e8a42ad9fb12a97cc26e33ca19ac2626533 Parents: ec1e47f Author: Alexey Serbin Authored: Thu Jul 19 21:05:54 2018 -0700 Committer: Alexey Serbin Committed: Tue Jul 24 04:40:20 2018 + -- src/kudu/tablet/tablet_bootstrap-test.cc | 75 ++- src/kudu/tablet/tablet_bootstrap.cc | 2 +- 2 files changed, 74 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/kudu/blob/6b429e8a/src/kudu/tablet/tablet_bootstrap-test.cc -- diff --git a/src/kudu/tablet/tablet_bootstrap-test.cc b/src/kudu/tablet/tablet_bootstrap-test.cc index 0b80c20..f5b2668 100644 --- a/src/kudu/tablet/tablet_bootstrap-test.cc +++ b/src/kudu/tablet/tablet_bootstrap-test.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "kudu/consensus/log-test-base.h" +#include "kudu/tablet/tablet_bootstrap.h" #include #include @@ -35,7 +35,9 @@ #include "kudu/clock/logical_clock.h" #include "kudu/common/common.pb.h" #include "kudu/common/iterator.h" +#include "kudu/common/partial_row.h" #include "kudu/common/partition.h" +#include "kudu/common/row_operations.h" #include "kudu/common/schema.h" #include "kudu/common/timestamp.h" #include "kudu/common/wire_protocol-test-util.h" @@ -45,6 +47,7 @@ #include "kudu/consensus/consensus.pb.h" #include "kudu/consensus/consensus_meta.h" #include "kudu/consensus/consensus_meta_manager.h" +#include "kudu/consensus/log-test-base.h" #include "kudu/consensus/log.h" #include "kudu/consensus/log_anchor_registry.h" #include "kudu/consensus/log_reader.h" @@ -67,7 +70,6 @@ #include "kudu/tablet/tablet-test-util.h" #include "kudu/tablet/tablet.h" #include "kudu/tablet/tablet.pb.h" -#include "kudu/tablet/tablet_bootstrap.h" #include "kudu/tablet/tablet_metadata.h" #include "kudu/tablet/tablet_replica.h" #include "kudu/tserver/tserver.pb.h" @@ -675,5 +677,74 @@ TEST_F(BootstrapTest, TestConsensusOnlyOperationOutOfOrderTimestamp) { ASSERT_EQ(1, results.size()); } +// Regression test for KUDU-2509. There was a use-after-free bug that sometimes +// lead to SIGSEGV while replaying the WAL. This scenario would crash or +// at least UB sanitizer would report a warning if such condition exists. +TEST_F(BootstrapTest, TestKudu2509) { + ASSERT_OK(BuildLog()); + + consensus::ReplicateRefPtr replicate = consensus::make_scoped_refptr_replicate( + new consensus::ReplicateMsg()); + replicate->get()->set_op_type(consensus::WRITE_OP); + tserver::WriteRequestPB* batch_request = replicate->get()->mutable_write_request(); + ASSERT_OK(SchemaToPB(schema_, batch_request->mutable_schema())); + batch_request->set_tablet_id(log::kTestTablet); + + // This appends Insert(1) with op 10.10 + const OpId insert_opid = MakeOpId(10, 10); + replicate->get()->mutable_id()->CopyFrom(insert_opid); + replicate->get()->set_timestamp(clock_->Now().ToUint64()); + AddTestRowToPB(RowOperationsPB::INSERT, schema_, 10, 1, + "this is a test insert", batch_request->mutable_row_operations()); + NO_FATALS(AppendReplicateBatch(replicate)); + + // This appends Mutate(1) with op 10.11. The operation would try to update + // a row having an extra column. This should fail since there hasn't been + // corresponding DDL operation committed yet. + const OpId mutate_opid = MakeOpId(10, 11); + batch_request->mutable_row_operations()->Clear(); + replicate->get()->mutable_id()->CopyFrom(mutate_opid); + replicate->get()->set_timestamp(clock_->Now().ToUint64()); + { +// Modify the existing schema to add an extra row. +SchemaBuilder builder(schema_); +ASSERT_OK(builder.AddNullableColumn("string_val_extra", STRING)); +const auto schema = builder.BuildWithoutIds(); +ASSERT_OK(SchemaToPB(schema, batch_request->mutable_schema())); + +KuduPartialRow row(); +ASSERT_OK(row.SetInt32("key",