kudu git commit: KUDU-2509 fix use-after-free in case of WAL replay error

2018-08-01 Thread danburkert
Repository: kudu
Updated Branches:
  refs/heads/branch-1.7.x cf21f3673 -> 7fba099d5


KUDU-2509 fix use-after-free in case of WAL replay error

Fixed use-after-free mistake in case of a failure to apply a pending
commit message from the WAL while bootstrapping a tablet.

Also, a repro scenario to expose the use-after-free condition is added.
Prior to the fix, the repro scenario would crash with SIGSEGV on Linux
or with SIGBUS on OS X (at least for DEBUG builds).

Change-Id: I11373b1cc34d9e2e0181bee2d3841b49022218ed
Reviewed-on: http://gerrit.cloudera.org:8080/10997
Tested-by: Alexey Serbin 
Reviewed-by: Adar Dembo 
(cherry picked from commit 6b429e8a42ad9fb12a97cc26e33ca19ac2626533)
Reviewed-on: http://gerrit.cloudera.org:8080/11063
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy 


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/7fba099d
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/7fba099d
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/7fba099d

Branch: refs/heads/branch-1.7.x
Commit: 7fba099d553ed7b3e56c1d8e89dcc0bae4397616
Parents: cf21f36
Author: Alexey Serbin 
Authored: Thu Jul 19 21:05:54 2018 -0700
Committer: Alexey Serbin 
Committed: Tue Jul 31 23:44:10 2018 +

--
 src/kudu/tablet/tablet_bootstrap-test.cc | 75 ++-
 src/kudu/tablet/tablet_bootstrap.cc  |  2 +-
 2 files changed, 74 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kudu/blob/7fba099d/src/kudu/tablet/tablet_bootstrap-test.cc
--
diff --git a/src/kudu/tablet/tablet_bootstrap-test.cc 
b/src/kudu/tablet/tablet_bootstrap-test.cc
index e8131e0..ed32517 100644
--- a/src/kudu/tablet/tablet_bootstrap-test.cc
+++ b/src/kudu/tablet/tablet_bootstrap-test.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "kudu/consensus/log-test-base.h"
+#include "kudu/tablet/tablet_bootstrap.h"
 
 #include 
 #include 
@@ -35,7 +35,9 @@
 #include "kudu/clock/logical_clock.h"
 #include "kudu/common/common.pb.h"
 #include "kudu/common/iterator.h"
+#include "kudu/common/partial_row.h"
 #include "kudu/common/partition.h"
+#include "kudu/common/row_operations.h"
 #include "kudu/common/schema.h"
 #include "kudu/common/timestamp.h"
 #include "kudu/common/wire_protocol-test-util.h"
@@ -45,6 +47,7 @@
 #include "kudu/consensus/consensus.pb.h"
 #include "kudu/consensus/consensus_meta.h"
 #include "kudu/consensus/consensus_meta_manager.h"
+#include "kudu/consensus/log-test-base.h"
 #include "kudu/consensus/log.h"
 #include "kudu/consensus/log_anchor_registry.h"
 #include "kudu/consensus/log_reader.h"
@@ -68,7 +71,6 @@
 #include "kudu/tablet/tablet-test-util.h"
 #include "kudu/tablet/tablet.h"
 #include "kudu/tablet/tablet.pb.h"
-#include "kudu/tablet/tablet_bootstrap.h"
 #include "kudu/tablet/tablet_metadata.h"
 #include "kudu/tablet/tablet_replica.h"
 #include "kudu/tserver/tserver.pb.h"
@@ -676,5 +678,74 @@ TEST_F(BootstrapTest, 
TestConsensusOnlyOperationOutOfOrderTimestamp) {
   ASSERT_EQ(1, results.size());
 }
 
+// Regression test for KUDU-2509. There was a use-after-free bug that sometimes
+// lead to SIGSEGV while replaying the WAL. This scenario would crash or
+// at least UB sanitizer would report a warning if such condition exists.
+TEST_F(BootstrapTest, TestKudu2509) {
+  ASSERT_OK(BuildLog());
+
+  consensus::ReplicateRefPtr replicate = 
consensus::make_scoped_refptr_replicate(
+  new consensus::ReplicateMsg());
+  replicate->get()->set_op_type(consensus::WRITE_OP);
+  tserver::WriteRequestPB* batch_request = 
replicate->get()->mutable_write_request();
+  ASSERT_OK(SchemaToPB(schema_, batch_request->mutable_schema()));
+  batch_request->set_tablet_id(log::kTestTablet);
+
+  // This appends Insert(1) with op 10.10
+  const OpId insert_opid = MakeOpId(10, 10);
+  replicate->get()->mutable_id()->CopyFrom(insert_opid);
+  replicate->get()->set_timestamp(clock_->Now().ToUint64());
+  AddTestRowToPB(RowOperationsPB::INSERT, schema_, 10, 1,
+ "this is a test insert", 
batch_request->mutable_row_operations());
+  NO_FATALS(AppendReplicateBatch(replicate));
+
+  // This appends Mutate(1) with op 10.11. The operation would try to update
+  // a row having an extra column. This should fail since there hasn't been
+  // corresponding DDL operation committed yet.
+  const OpId mutate_opid = MakeOpId(10, 11);
+  batch_request->mutable_row_operations()->Clear();
+  replicate->get()->mutable_id()->CopyFrom(mutate_opid);
+  replicate->get()->set_timestamp(clock_->Now().ToUint64());
+  {
+// Modify the existing schema to add an extra row.
+SchemaBuilder builder(schema_);
+ASSERT_OK(builder.AddNullableColumn("string_val_extra", STRING));
+

[1/2] kudu git commit: KUDU-2509 fix use-after-free in case of WAL replay error

2018-07-23 Thread alexey
Repository: kudu
Updated Branches:
  refs/heads/master ec1e47f41 -> 5b09a693d


KUDU-2509 fix use-after-free in case of WAL replay error

Fixed use-after-free mistake in case of a failure to apply a pending
commit message from the WAL while bootstrapping a tablet.

Also, a repro scenario to expose the use-after-free condition is added.
Prior to the fix, the repro scenario would crash with SIGSEGV on Linux
or with SIGBUS on OS X (at least for DEBUG builds).

Change-Id: I11373b1cc34d9e2e0181bee2d3841b49022218ed
Reviewed-on: http://gerrit.cloudera.org:8080/10997
Tested-by: Alexey Serbin 
Reviewed-by: Adar Dembo 


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/6b429e8a
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/6b429e8a
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/6b429e8a

Branch: refs/heads/master
Commit: 6b429e8a42ad9fb12a97cc26e33ca19ac2626533
Parents: ec1e47f
Author: Alexey Serbin 
Authored: Thu Jul 19 21:05:54 2018 -0700
Committer: Alexey Serbin 
Committed: Tue Jul 24 04:40:20 2018 +

--
 src/kudu/tablet/tablet_bootstrap-test.cc | 75 ++-
 src/kudu/tablet/tablet_bootstrap.cc  |  2 +-
 2 files changed, 74 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/kudu/blob/6b429e8a/src/kudu/tablet/tablet_bootstrap-test.cc
--
diff --git a/src/kudu/tablet/tablet_bootstrap-test.cc 
b/src/kudu/tablet/tablet_bootstrap-test.cc
index 0b80c20..f5b2668 100644
--- a/src/kudu/tablet/tablet_bootstrap-test.cc
+++ b/src/kudu/tablet/tablet_bootstrap-test.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "kudu/consensus/log-test-base.h"
+#include "kudu/tablet/tablet_bootstrap.h"
 
 #include 
 #include 
@@ -35,7 +35,9 @@
 #include "kudu/clock/logical_clock.h"
 #include "kudu/common/common.pb.h"
 #include "kudu/common/iterator.h"
+#include "kudu/common/partial_row.h"
 #include "kudu/common/partition.h"
+#include "kudu/common/row_operations.h"
 #include "kudu/common/schema.h"
 #include "kudu/common/timestamp.h"
 #include "kudu/common/wire_protocol-test-util.h"
@@ -45,6 +47,7 @@
 #include "kudu/consensus/consensus.pb.h"
 #include "kudu/consensus/consensus_meta.h"
 #include "kudu/consensus/consensus_meta_manager.h"
+#include "kudu/consensus/log-test-base.h"
 #include "kudu/consensus/log.h"
 #include "kudu/consensus/log_anchor_registry.h"
 #include "kudu/consensus/log_reader.h"
@@ -67,7 +70,6 @@
 #include "kudu/tablet/tablet-test-util.h"
 #include "kudu/tablet/tablet.h"
 #include "kudu/tablet/tablet.pb.h"
-#include "kudu/tablet/tablet_bootstrap.h"
 #include "kudu/tablet/tablet_metadata.h"
 #include "kudu/tablet/tablet_replica.h"
 #include "kudu/tserver/tserver.pb.h"
@@ -675,5 +677,74 @@ TEST_F(BootstrapTest, 
TestConsensusOnlyOperationOutOfOrderTimestamp) {
   ASSERT_EQ(1, results.size());
 }
 
+// Regression test for KUDU-2509. There was a use-after-free bug that sometimes
+// lead to SIGSEGV while replaying the WAL. This scenario would crash or
+// at least UB sanitizer would report a warning if such condition exists.
+TEST_F(BootstrapTest, TestKudu2509) {
+  ASSERT_OK(BuildLog());
+
+  consensus::ReplicateRefPtr replicate = 
consensus::make_scoped_refptr_replicate(
+  new consensus::ReplicateMsg());
+  replicate->get()->set_op_type(consensus::WRITE_OP);
+  tserver::WriteRequestPB* batch_request = 
replicate->get()->mutable_write_request();
+  ASSERT_OK(SchemaToPB(schema_, batch_request->mutable_schema()));
+  batch_request->set_tablet_id(log::kTestTablet);
+
+  // This appends Insert(1) with op 10.10
+  const OpId insert_opid = MakeOpId(10, 10);
+  replicate->get()->mutable_id()->CopyFrom(insert_opid);
+  replicate->get()->set_timestamp(clock_->Now().ToUint64());
+  AddTestRowToPB(RowOperationsPB::INSERT, schema_, 10, 1,
+ "this is a test insert", 
batch_request->mutable_row_operations());
+  NO_FATALS(AppendReplicateBatch(replicate));
+
+  // This appends Mutate(1) with op 10.11. The operation would try to update
+  // a row having an extra column. This should fail since there hasn't been
+  // corresponding DDL operation committed yet.
+  const OpId mutate_opid = MakeOpId(10, 11);
+  batch_request->mutable_row_operations()->Clear();
+  replicate->get()->mutable_id()->CopyFrom(mutate_opid);
+  replicate->get()->set_timestamp(clock_->Now().ToUint64());
+  {
+// Modify the existing schema to add an extra row.
+SchemaBuilder builder(schema_);
+ASSERT_OK(builder.AddNullableColumn("string_val_extra", STRING));
+const auto schema = builder.BuildWithoutIds();
+ASSERT_OK(SchemaToPB(schema, batch_request->mutable_schema()));
+
+KuduPartialRow row();
+ASSERT_OK(row.SetInt32("key",