KUDU-1551. Ignore log segments which were preallocated but have no header. This fixes a TS startup crash in the case that it finds a log segment which is preallocated but has no valid magic or header at the start.
The new test injects such a crash and verifies that the server can restart and replay its logs. Change-Id: I843cf483b93823cbcc5506958f62cbb076569ca8 Reviewed-on: http://gerrit.cloudera.org:8080/5276 Reviewed-by: Adar Dembo <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b4764f98 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b4764f98 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b4764f98 Branch: refs/heads/master Commit: b4764f98cc99a8b4f37e0be2c1ad90dc3c645903 Parents: 4aacaf6 Author: Todd Lipcon <[email protected]> Authored: Wed Nov 30 11:15:52 2016 -0800 Committer: Todd Lipcon <[email protected]> Committed: Wed Nov 30 21:31:18 2016 +0000 ---------------------------------------------------------------------- src/kudu/consensus/log-test.cc | 3 +- src/kudu/consensus/log_reader.cc | 13 ++++++-- src/kudu/consensus/log_util.cc | 18 +++++------ src/kudu/consensus/log_util.h | 13 ++++++++ src/kudu/integration-tests/ts_recovery-itest.cc | 33 ++++++++++++++++++++ 5 files changed, 66 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/b4764f98/src/kudu/consensus/log-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/consensus/log-test.cc b/src/kudu/consensus/log-test.cc index f6ee85d..e819909 100644 --- a/src/kudu/consensus/log-test.cc +++ b/src/kudu/consensus/log-test.cc @@ -257,8 +257,7 @@ TEST_F(LogTest, TestLogNotTrimmed) { // Test that the reader will not fail if a log file is completely blank. // This happens when it's opened but nothing has been written. -// The reader should gracefully handle this situation, but somehow expose that -// the segment is uninitialized. See KUDU-140. +// The reader should gracefully handle this situation. See KUDU-140. TEST_F(LogTest, TestBlankLogFile) { ASSERT_OK(BuildLog()); http://git-wip-us.apache.org/repos/asf/kudu/blob/b4764f98/src/kudu/consensus/log_reader.cc ---------------------------------------------------------------------- diff --git a/src/kudu/consensus/log_reader.cc b/src/kudu/consensus/log_reader.cc index 4a86505..14b63c7 100644 --- a/src/kudu/consensus/log_reader.cc +++ b/src/kudu/consensus/log_reader.cc @@ -142,8 +142,17 @@ Status LogReader::Init(const string& tablet_wal_path) { if (HasPrefixString(log_file, FsManager::kWalFileNamePrefix)) { string fqp = JoinPathSegments(tablet_wal_path, log_file); scoped_refptr<ReadableLogSegment> segment; - RETURN_NOT_OK_PREPEND(ReadableLogSegment::Open(env, fqp, &segment), - "Unable to open readable log segment"); + Status s = ReadableLogSegment::Open(env, fqp, &segment); + if (s.IsUninitialized()) { + // This indicates that the segment was created but the writer + // crashed before the header was successfully written. In this + // case, we should skip it. + LOG(WARNING) << "Ignoring log segment " << log_file << " since it was uninitialized " + << "(probably left after a prior tablet server crash)"; + continue; + } + + RETURN_NOT_OK_PREPEND(s, "Unable to open readable log segment"); DCHECK(segment); CHECK(segment->IsInitialized()) << "Uninitialized segment at: " << segment->path(); http://git-wip-us.apache.org/repos/asf/kudu/blob/b4764f98/src/kudu/consensus/log_util.cc ---------------------------------------------------------------------- diff --git a/src/kudu/consensus/log_util.cc b/src/kudu/consensus/log_util.cc index 780b517..a7cd07d 100644 --- a/src/kudu/consensus/log_util.cc +++ b/src/kudu/consensus/log_util.cc @@ -38,6 +38,7 @@ #include "kudu/util/crc.h" #include "kudu/util/debug/trace_event.h" #include "kudu/util/env_util.h" +#include "kudu/util/fault_injection.h" #include "kudu/util/flag_tags.h" #include "kudu/util/pb_util.h" #include "kudu/util/scoped_cleanup.h" @@ -58,6 +59,10 @@ DEFINE_bool(log_async_preallocate_segments, true, "Whether the WAL segments preallocation should happen asynchronously"); TAG_FLAG(log_async_preallocate_segments, advanced); +DEFINE_double(fault_crash_before_write_log_segment_header, 0.0, + "Fraction of the time we will crash just before writing the log segment header"); +TAG_FLAG(fault_crash_before_write_log_segment_header, unsafe); + namespace kudu { namespace log { @@ -382,14 +387,6 @@ Status ReadableLogSegment::ReadFileSize() { Status ReadableLogSegment::ReadHeader() { uint32_t header_size; RETURN_NOT_OK(ReadHeaderMagicAndHeaderLength(&header_size)); - if (header_size == 0) { - // If a log file has been pre-allocated but not initialized, then - // 'header_size' will be 0 even the file size is > 0; in this - // case, 'is_initialized_' remains set to false and return - // Status::OK() early. LogReader ignores segments where - // IsInitialized() returns false. - return Status::OK(); - } if (header_size > kLogSegmentMaxHeaderOrFooterSize) { return Status::Corruption( @@ -473,8 +470,7 @@ Status ReadableLogSegment::ParseHeaderMagicAndHeaderLength(const Slice &data, LOG(WARNING) << "Log segment file " << path() << " has 12 initial NULL bytes instead of " << "magic and header length: " << data.ToDebugString() << " and will be treated as a blank segment."; - *parsed_len = 0; - return Status::OK(); + return Status::Uninitialized("log magic and header length are all NULL bytes"); } // If no magic and not uninitialized, the file is considered corrupt. return Status::Corruption(Substitute("Invalid log segment file $0: Bad magic. $1", @@ -713,6 +709,8 @@ WritableLogSegment::WritableLogSegment(string path, written_offset_(0) {} Status WritableLogSegment::WriteHeaderAndOpen(const LogSegmentHeaderPB& new_header) { + MAYBE_FAULT(FLAGS_fault_crash_before_write_log_segment_header); + DCHECK(!IsHeaderWritten()) << "Can only call WriteHeader() once"; DCHECK(new_header.IsInitialized()) << "Log segment header must be initialized" << new_header.InitializationErrorString(); http://git-wip-us.apache.org/repos/asf/kudu/blob/b4764f98/src/kudu/consensus/log_util.h ---------------------------------------------------------------------- diff --git a/src/kudu/consensus/log_util.h b/src/kudu/consensus/log_util.h index 8fd61d6..59b4826 100644 --- a/src/kudu/consensus/log_util.h +++ b/src/kudu/consensus/log_util.h @@ -265,10 +265,23 @@ class ReadableLogSegment : public RefCountedThreadSafe<ReadableLogSegment> { Status ReadFileSize(); + // Read the log file magic and header protobuf into 'header_'. Sets 'first_entry_offset_' + // to indicate the start of the actual log data. + // + // Returns Uninitialized() if the file appears to be preallocated but never + // written. Status ReadHeader(); + // Read the magic and header length from the top of the file, returning + // the header length in 'len'. + // + // Returns Uninitialized() if the file appears to be preallocated but never + // written. Status ReadHeaderMagicAndHeaderLength(uint32_t *len); + // Parse the magic and the PB-header length prefix from 'data'. + // In the case that 'data' is all '\0' bytes, indicating a preallocated + // but never-written segment, returns Status::Uninitialized(). Status ParseHeaderMagicAndHeaderLength(const Slice &data, uint32_t *parsed_len); Status ReadFooter(); http://git-wip-us.apache.org/repos/asf/kudu/blob/b4764f98/src/kudu/integration-tests/ts_recovery-itest.cc ---------------------------------------------------------------------- diff --git a/src/kudu/integration-tests/ts_recovery-itest.cc b/src/kudu/integration-tests/ts_recovery-itest.cc index a3cdebf..3480cec 100644 --- a/src/kudu/integration-tests/ts_recovery-itest.cc +++ b/src/kudu/integration-tests/ts_recovery-itest.cc @@ -191,6 +191,39 @@ TEST_F(TsRecoveryITest, TestCrashDuringLogReplay) { MonoDelta::FromSeconds(30))); } +// Regression test for KUDU-1551: if the tserver crashes after preallocating a segment +// but before writing its header, the TS would previously crash on restart. +// Instead, it should ignore the uninitialized segment. +TEST_F(TsRecoveryITest, TestCrashBeforeWriteLogSegmentHeader) { + NO_FATALS(StartCluster({ "--log_segment_size_mb=1" })); + + TestWorkload work(cluster_.get()); + work.set_num_replicas(1); + work.set_write_timeout_millis(100); + work.set_timeout_allowed(true); + work.Setup(); + + // Enable the fault point after creating the table, but before writing any data. + // Otherwise, we'd crash during creation of the tablet. + ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(0), + "fault_crash_before_write_log_segment_header", "0.5")); + work.Start(); + + // Wait for the process to crash during log roll. + ASSERT_OK(cluster_->tablet_server(0)->WaitForCrash(MonoDelta::FromSeconds(60))); + work.StopAndJoin(); + + cluster_->tablet_server(0)->Shutdown(); + ignore_result(cluster_->tablet_server(0)->Restart()); + + ClusterVerifier v(cluster_.get()); + NO_FATALS(v.CheckRowCountWithRetries(work.table_name(), + ClusterVerifier::AT_LEAST, + work.rows_inserted(), + MonoDelta::FromSeconds(60))); +} + + // A set of threads which pick rows which are known to exist in the table // and issue random updates against them. class UpdaterThreads {
