This is an automated email from the ASF dual-hosted git repository.

hulk pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git


The following commit(s) were added to refs/heads/unstable by this push:
     new 5cdbb195 Try to recover automatically from  SST corruption background 
error(#1667)
5cdbb195 is described below

commit 5cdbb1954657ba387490adeef2af361b744b21be
Author: Myth <[email protected]>
AuthorDate: Tue Aug 15 14:09:34 2023 +0800

    Try to recover automatically from  SST corruption background error(#1667)
    
    When the SST file corrupts, which is an unrecoverable error for the 
rocksdb, then rocksdb will go into read-only 
mode(https://github.com/facebook/rocksdb/wiki/Background-Error-Handling). Only 
restart rocksdb to try to recover from the error.
    
    When does sst file corruption occur? The error message looks like this:
    ```
    1. Corruption: Corrupt or unsupported format_version: 1005 in 
/tmp/kvrocks/data/db/000038.sst
    2. Corruption: Bad table magic number: expected 9863518390377041911, found 
9863518390377041912 in /tmp/kvrocks_db/data/db/000038.sst
    3. Corruption: block checksum mismatch: stored = 3308200672, computed = 
51173877, type = 4 in /tmp/kvrocks_db/data/db/000038.sst offset 0 size 15715
    ```
    
    The cause of the error is usually a hardware issue or a problem with the 
network or cloud disk (when using the cloud disk).
    
    The most common place we see this error is when a file is generated by 
`Compaction` or `Flush` and the `Version` applies the result.
    
    In this case, the result of the compaction is not actually applied, so we 
can ignore the error and avoid restarting the rocksdb.
    
    Tikv introduces this check when sst file corruption occurs, you can refer 
to:
    - https://github.com/tikv/tikv/issues/10578
    - https://github.com/tikv/tikv/pull/10961
    
    
    Let's try it on Kvrocks:
    1. Extract the sst file from the background error message
    2. Determine if it is a living file
    3. If not, we ignore the error and force recovery from the background error
    
    For the rocksdb error message, before the rocksdb v7.10.2, the error 
message was imperfect and we could only recover from a limited number of 
errors. Thanks to this PR https://github.com/facebook/rocksdb/pull/11009, the 
error message is enriched and we can recover from more scenarios.
---
 src/common/string_util.cc         | 14 +++++++++
 src/common/string_util.h          |  1 +
 src/storage/event_listener.cc     | 65 ++++++++++++++++++++++++++-------------
 tests/cppunit/string_util_test.cc | 15 +++++++++
 4 files changed, 74 insertions(+), 21 deletions(-)

diff --git a/src/common/string_util.cc b/src/common/string_util.cc
index 1b3eb39e..b3004a3a 100644
--- a/src/common/string_util.cc
+++ b/src/common/string_util.cc
@@ -22,6 +22,7 @@
 
 #include <fmt/format.h>
 
+#include <regex>
 #include <string>
 
 #include "parse_util.h"
@@ -214,6 +215,19 @@ int StringMatchLen(const char *pattern, size_t 
pattern_len, const char *string,
   return 0;
 }
 
+std::vector<std::string> RegexMatch(const std::string &str, const std::string 
&regex) {
+  std::regex base_regex(regex);
+  std::smatch pieces_match;
+  std::vector<std::string> out;
+
+  if (std::regex_match(str, pieces_match, base_regex)) {
+    for (const auto &piece : pieces_match) {
+      out.emplace_back(piece.str());
+    }
+  }
+  return out;
+}
+
 std::string StringToHex(const std::string &input) {
   static const char hex_digits[] = "0123456789ABCDEF";
   std::string output;
diff --git a/src/common/string_util.h b/src/common/string_util.h
index dfb00706..2ebd7639 100644
--- a/src/common/string_util.h
+++ b/src/common/string_util.h
@@ -34,6 +34,7 @@ std::vector<std::string> Split2KV(const std::string &in, 
const std::string &deli
 bool HasPrefix(const std::string &str, const std::string &prefix);
 int StringMatch(const std::string &pattern, const std::string &in, int nocase);
 int StringMatchLen(const char *p, size_t plen, const char *s, size_t slen, int 
nocase);
+std::vector<std::string> RegexMatch(const std::string &str, const std::string 
&regex);
 std::string StringToHex(const std::string &input);
 std::vector<std::string> TokenizeRedisProtocol(const std::string &value);
 std::string EscapeString(const std::string &s);
diff --git a/src/storage/event_listener.cc b/src/storage/event_listener.cc
index 46f399fc..6e054945 100644
--- a/src/storage/event_listener.cc
+++ b/src/storage/event_listener.cc
@@ -20,10 +20,20 @@
 
 #include "event_listener.h"
 
-#include <map>
 #include <string>
 #include <vector>
 
+#include "fmt/format.h"
+
+std::string BackgroundErrorReason2String(const rocksdb::BackgroundErrorReason 
reason) {
+  std::vector<std::string> background_error_reason = {
+      "flush", "compaction", "write_callback", "memtable", "manifest_write", 
"flush_no_wal", "manifest_write_no_wal"};
+  if (static_cast<size_t>(reason) < background_error_reason.size()) {
+    return background_error_reason[static_cast<size_t>(reason)];
+  }
+  return "unknown";
+}
+
 std::string FileCreatedReason2String(const rocksdb::TableFileCreationReason 
reason) {
   std::vector<std::string> file_created_reason = {"flush", "compaction", 
"recovery", "misc"};
   if (static_cast<size_t>(reason) < file_created_reason.size()) {
@@ -49,6 +59,14 @@ std::string CompressType2String(const 
rocksdb::CompressionType type) {
   return "unknown";
 }
 
+std::string ExtractSSTFileNameFromError(const std::string &error) {
+  auto match_results = util::RegexMatch(error, ".*(/\\w*\\.sst).*");
+  if (match_results.size() == 2) {
+    return match_results[1];
+  }
+  return {};
+}
+
 bool IsDiskQuotaExceeded(const rocksdb::Status &bg_error) {
   // EDQUOT: Disk quota exceeded (POSIX.1-2001)
   std::string exceeded_quota_str = "Disk quota exceeded";
@@ -58,7 +76,7 @@ bool IsDiskQuotaExceeded(const rocksdb::Status &bg_error) {
 }
 
 void EventListener::OnCompactionCompleted(rocksdb::DB *db, const 
rocksdb::CompactionJobInfo &ci) {
-  LOG(INFO) << "[event_listener/compaction_completed] column family: " << 
ci.cf_name
+  LOG(INFO) << "[event_listener/compaction_completed] column family: " << 
ci.cf_name << ", job_id: " << ci.job_id
             << ", compaction reason: " << 
static_cast<int>(ci.compaction_reason)
             << ", output compression type: " << 
CompressType2String(ci.compression)
             << ", base input level(files): " << ci.base_input_level << "(" << 
ci.input_files.size() << ")"
@@ -87,30 +105,35 @@ void EventListener::OnFlushCompleted(rocksdb::DB *db, 
const rocksdb::FlushJobInf
 }
 
 void EventListener::OnBackgroundError(rocksdb::BackgroundErrorReason reason, 
rocksdb::Status *bg_error) {
-  std::string reason_str;
-  switch (reason) {
-    case rocksdb::BackgroundErrorReason::kCompaction:
-      reason_str = "compact";
-      break;
-    case rocksdb::BackgroundErrorReason::kFlush:
-      reason_str = "flush";
-      break;
-    case rocksdb::BackgroundErrorReason::kMemTable:
-      reason_str = "memtable";
-      break;
-    case rocksdb::BackgroundErrorReason::kWriteCallback:
-      reason_str = "writecallback";
-      break;
-    default:
-      // Should not arrive here
-      break;
+  auto reason_str = BackgroundErrorReason2String(reason);
+  auto error_str = bg_error->ToString();
+  if (bg_error->IsCorruption() || bg_error->IsIOError()) {
+    // Background error may occur when SST are generated during 
flush/compaction. If those files are not applied
+    // to Version, we consider them non-fatal background error. We can 
override bg_error to recover from
+    // background error.
+    // Note that we cannot call Resume() manually because the error severity 
is unrecoverable.
+    auto corrupt_sst = ExtractSSTFileNameFromError(error_str);
+    if (!corrupt_sst.empty()) {
+      std::vector<std::string> live_files;
+      uint64_t manifest_size = 0;
+      auto s = storage_->GetDB()->GetLiveFiles(live_files, &manifest_size, 
false /* flush_memtable */);
+      if (s.ok() && std::find(live_files.begin(), live_files.end(), 
corrupt_sst) == live_files.end()) {
+        *bg_error = rocksdb::Status::OK();
+        LOG(WARNING) << fmt::format(
+            "[event_listener/background_error] ignore no-fatal background 
error about sst file, reason: {}, bg_error: "
+            "{}",
+            reason_str, error_str);
+        return;
+      }
+    }
   }
+
   if ((bg_error->IsNoSpace() || IsDiskQuotaExceeded(*bg_error)) &&
       bg_error->severity() < rocksdb::Status::kFatalError) {
     storage_->SetDBInRetryableIOError(true);
   }
 
-  LOG(ERROR) << "[event_listener/background_error] reason: " << reason_str << 
", bg_error: " << bg_error->ToString();
+  LOG(ERROR) << fmt::format("[event_listener/background_error] reason: {}, 
bg_error: {}", reason_str, error_str);
 }
 
 void EventListener::OnTableFileDeleted(const rocksdb::TableFileDeletionInfo 
&info) {
@@ -126,6 +149,6 @@ void EventListener::OnStallConditionsChanged(const 
rocksdb::WriteStallInfo &info
 
 void EventListener::OnTableFileCreated(const rocksdb::TableFileCreationInfo 
&info) {
   LOG(INFO) << "[event_listener/table_file_created] column family: " << 
info.cf_name
-            << ", file path: " << info.file_path << ", file size: " << 
info.file_size << ", job id: " << info.job_id
+            << ", file path: " << info.file_path << ", file size: " << 
info.file_size << ", job_id: " << info.job_id
             << ", reason: " << FileCreatedReason2String(info.reason) << ", 
status: " << info.status.ToString();
 }
diff --git a/tests/cppunit/string_util_test.cc 
b/tests/cppunit/string_util_test.cc
index a3092fed..f95ccbff 100644
--- a/tests/cppunit/string_util_test.cc
+++ b/tests/cppunit/string_util_test.cc
@@ -97,3 +97,18 @@ TEST(StringUtil, EscapeString) {
     ASSERT_TRUE(util::EscapeString(origin) == escaped);
   }
 }
+
+TEST(StringUtil, RegexMatchExtractSSTFile) {
+  // Test for ExtractSSTFileNameFromError() in event_listener.cc
+  auto bg_error_str = {"Corruption: Corrupt or unsupported format_version: 
1005 in /tmp/kvrocks/data/db/000038.sst",
+                       "Corruption: Bad table magic number: expected 
9863518390377041911, found 9863518390377041912 in "
+                       "/tmp/kvrocks_db/data/db/000038.sst",
+                       "Corruption: block checksum mismatch: stored = 
3308200672, computed = 51173877, type = 4  in "
+                       "/tmp/kvrocks_db/data/db/000038.sst offset 0 size 
15715"};
+
+  for (const auto &str : bg_error_str) {
+    auto match_results = util::RegexMatch(str, ".*(/\\w*\\.sst).*");
+    ASSERT_TRUE(match_results.size() == 2);
+    ASSERT_TRUE(match_results[1] == "/000038.sst");
+  }
+}

Reply via email to