This is an automated email from the ASF dual-hosted git repository.
wangdan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git
The following commit(s) were added to refs/heads/master by this push:
new ee369d380 fix: parse timestamp from the name of data dir for gc
instead of the last update time (#1667)
ee369d380 is described below
commit ee369d380cb871173983f04d7b877ae5cc913496
Author: Dan Wang <[email protected]>
AuthorDate: Tue Nov 7 15:16:49 2023 +0800
fix: parse timestamp from the name of data dir for gc instead of the last
update time (#1667)
https://github.com/apache/incubator-pegasus/issues/1673
It's found that sometimes the data directories of replicas are removed
immediately after they are renamed with postfixes `.err/.gar`, though
actually both of `gc_disk_error_replica_interval_seconds` and
`gc_disk_garbage_replica_interval_seconds` have been configured
with at least one day.
The reason is that the base time for expiration time is **the last write
time**, that is, `st_mtime` within `struct stat` returned by `stat()`.
Once a long time has passed since the last write time, the data directory
will be removed immediately after it is renamed with postfixes `.err/.gar`.
To fix this problem, just use the timestamp within the directory name as
the base time that is generated when the data directory is renamed with
postfixes `.err/.gar`. The last update time would be used iff the timestamp
is NOT found within the directory name.
---
src/replica/disk_cleaner.cpp | 200 +++++++++++++++++++++++++++------
src/replica/disk_cleaner.h | 23 +---
src/replica/replica_stub.cpp | 2 +-
src/replica/test/replica_disk_test.cpp | 80 +++++++++----
src/test_util/test_util.h | 14 ++-
src/utils/macros.h | 28 +++++
src/utils/metrics.h | 3 +-
7 files changed, 275 insertions(+), 75 deletions(-)
diff --git a/src/replica/disk_cleaner.cpp b/src/replica/disk_cleaner.cpp
index 9488c32a6..cc9ffeb66 100644
--- a/src/replica/disk_cleaner.cpp
+++ b/src/replica/disk_cleaner.cpp
@@ -19,11 +19,13 @@
#include "disk_cleaner.h"
+#include <boost/algorithm/string/predicate.hpp>
#include <fmt/core.h>
#include <stdint.h>
#include <sys/types.h>
#include <algorithm>
#include <atomic>
+#include <cctype>
#include "common/fs_manager.h"
#include "metadata_types.h"
@@ -32,6 +34,9 @@
#include "utils/filesystem.h"
#include "utils/flags.h"
#include "utils/fmt_logging.h"
+#include "utils/macros.h"
+#include "utils/string_conv.h"
+#include "utils/string_view.h"
namespace dsn {
namespace replication {
@@ -70,6 +75,108 @@ const std::string kFolderSuffixBak = ".bak";
const std::string kFolderSuffixOri = ".ori";
const std::string kFolderSuffixTmp = ".tmp";
+namespace {
+
+// TODO(wangdan): we could study later whether ctime (i.e. `st_ctime` within
`struct stat`,
+// the time of last status change) could be used instead of mtime (i.e.
`st_ctime` within
+// `struct stat`, the last write time), since ctime of the new directory would
be updated
+// to the current time once rename() is called, while mtime would not be
updated.
+bool get_expiration_timestamp_by_last_write_time(const std::string &path,
+ uint64_t delay_seconds,
+ uint64_t
&expiration_timestamp_s)
+{
+ time_t last_write_time_s;
+ if (!dsn::utils::filesystem::last_write_time(path, last_write_time_s)) {
+ LOG_WARNING("gc_disk: failed to get last write time of {}", path);
+ return false;
+ }
+
+ expiration_timestamp_s = static_cast<uint64_t>(last_write_time_s) +
delay_seconds;
+ return true;
+}
+
+// Unix timestamp in microseconds for 2010-01-01 00:00:00 GMT+0000.
+// This timestamp could be used as the minimum, since it's far earlier than
the time when
+// Pegasus was born.
+#define MIN_TIMESTAMP_US 1262304000000000
+#define MIN_TIMESTAMP_US_LENGTH (sizeof(STRINGIFY(MIN_TIMESTAMP_US)) - 1)
+
+// Parse timestamp from the directory name.
+//
+// There are only 2 kinds of directory names that could include timestamp: one
is the faulty
+// replicas whose name has suffix ".err"; another is the dropped replicas
whose name has
+// suffix ".gar". The examples for both kinds of directory names:
+// 1.1.pegasus.1698843209235962.err
+// 1.2.pegasus.1698843214240709.gar
+//
+// Specify the size of suffix by `suffix_size`. For both kinds of names (.err
and .gar),
+// `suffix_size` is 4.
+//
+// The timestamp is the number just before the suffix, between the 2 dots. For
example, in
+// 1.1.pegasus.1698843209235962.err, 1698843209235962 is the timestamp in
microseconds,
+// generated by dsn_now_us().
+//
+// `timestamp_us` is parsed result while returning true; otherwise, it would
never be assigned.
+bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t
×tamp_us)
+{
+ CHECK_GE(name.size(), suffix_size);
+
+ if (suffix_size == name.size()) {
+ return false;
+ }
+
+ const size_t end_idx = name.size() - suffix_size;
+ auto begin_idx = name.find_last_of('.', end_idx - 1);
+ if (begin_idx == std::string::npos || ++begin_idx >= end_idx) {
+ return false;
+ }
+
+ const auto length = end_idx - begin_idx;
+ if (length < MIN_TIMESTAMP_US_LENGTH) {
+ return false;
+ }
+
+ // std::isdigit() is not an addressable standard library function, thus it
can't be used
+ // directly as an algorithm predicate.
+ //
+ // See following docs for details.
+ //
https://stackoverflow.com/questions/75868796/differences-between-isdigit-and-stdisdigit
+ // https://en.cppreference.com/w/cpp/string/byte/isdigit
+ const auto begin_itr = name.cbegin() + begin_idx;
+ if (!std::all_of(
+ begin_itr, begin_itr + length, [](unsigned char c) { return
std::isdigit(c); })) {
+ return false;
+ }
+
+ const auto ok =
+ dsn::buf2uint64(dsn::string_view(name.data() + begin_idx, length),
timestamp_us);
+ return ok ? timestamp_us > MIN_TIMESTAMP_US : false;
+}
+
+bool get_expiration_timestamp(const std::string &name,
+ const std::string &path,
+ size_t suffix_size,
+ uint64_t delay_seconds,
+ uint64_t &expiration_timestamp_s)
+{
+ uint64_t timestamp_us = 0;
+ if (!parse_timestamp_us(name, suffix_size, timestamp_us)) {
+ // Once the timestamp could not be extracted from the directory name,
the last write time
+ // would be used as the base time to compute the expiration time.
+ LOG_WARNING("gc_disk: failed to parse timestamp from {}, turn to "
+ "the last write time for {}",
+ name,
+ path);
+ return get_expiration_timestamp_by_last_write_time(
+ path, delay_seconds, expiration_timestamp_s);
+ }
+
+ expiration_timestamp_s = timestamp_us / 1000000 + delay_seconds;
+ return true;
+}
+
+} // anonymous namespace
+
error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>>
&dir_nodes,
/*output*/ disk_cleaning_report &report)
{
@@ -87,59 +194,85 @@ error_s disk_remove_useless_dirs(const
std::vector<std::shared_ptr<dir_node>> &d
}
sub_list.insert(sub_list.end(), tmp_list.begin(), tmp_list.end());
}
- for (auto &fpath : sub_list) {
- auto name = dsn::utils::filesystem::get_file_name(fpath);
- if (!is_data_dir_removable(name)) {
- continue;
- }
- std::string folder_suffix = name.substr(name.length() - 4);
-
- time_t mt;
- if (!dsn::utils::filesystem::last_write_time(fpath, mt)) {
- LOG_WARNING("gc_disk: failed to get last write time of {}", fpath);
- continue;
- }
- auto last_write_time = (uint64_t)mt;
- uint64_t current_time_ms = dsn_now_ms();
- uint64_t remove_interval_seconds = current_time_ms / 1000;
+ for (const auto &path : sub_list) {
+ uint64_t expiration_timestamp_s = 0;
- // don't delete ".bak" directory because it is backed by administrator.
- if (folder_suffix == kFolderSuffixErr) {
+ // Note: don't delete ".bak" directory since it could be did by
administrator.
+ const auto name = dsn::utils::filesystem::get_file_name(path);
+ if (boost::algorithm::ends_with(name, kFolderSuffixErr)) {
report.error_replica_count++;
- remove_interval_seconds =
FLAGS_gc_disk_error_replica_interval_seconds;
- } else if (folder_suffix == kFolderSuffixGar) {
+ if (!get_expiration_timestamp(name,
+ path,
+ kFolderSuffixErr.size(),
+
FLAGS_gc_disk_error_replica_interval_seconds,
+ expiration_timestamp_s)) {
+ continue;
+ }
+ } else if (boost::algorithm::ends_with(name, kFolderSuffixGar)) {
report.garbage_replica_count++;
- remove_interval_seconds =
FLAGS_gc_disk_garbage_replica_interval_seconds;
- } else if (folder_suffix == kFolderSuffixTmp) {
+ if (!get_expiration_timestamp(name,
+ path,
+ kFolderSuffixGar.size(),
+
FLAGS_gc_disk_garbage_replica_interval_seconds,
+ expiration_timestamp_s)) {
+ continue;
+ }
+ } else if (boost::algorithm::ends_with(name, kFolderSuffixTmp)) {
report.disk_migrate_tmp_count++;
- remove_interval_seconds =
FLAGS_gc_disk_migration_tmp_replica_interval_seconds;
- } else if (folder_suffix == kFolderSuffixOri) {
+ if (!get_expiration_timestamp_by_last_write_time(
+ path,
+ FLAGS_gc_disk_migration_tmp_replica_interval_seconds,
+ expiration_timestamp_s)) {
+ continue;
+ }
+ } else if (boost::algorithm::ends_with(name, kFolderSuffixOri)) {
report.disk_migrate_origin_count++;
- remove_interval_seconds =
FLAGS_gc_disk_migration_origin_replica_interval_seconds;
+ if (!get_expiration_timestamp_by_last_write_time(
+ path,
+ FLAGS_gc_disk_migration_origin_replica_interval_seconds,
+ expiration_timestamp_s)) {
+ continue;
+ }
+ } else {
+ continue;
}
- if (last_write_time + remove_interval_seconds <= current_time_ms /
1000) {
- if (!dsn::utils::filesystem::remove_path(fpath)) {
- LOG_WARNING("gc_disk: failed to delete directory '{}',
time_used_ms = {}",
- fpath,
- dsn_now_ms() - current_time_ms);
- } else {
+ const auto current_time_ms = dsn_now_ms();
+ if (expiration_timestamp_s <= current_time_ms / 1000) {
+ if (dsn::utils::filesystem::remove_path(path)) {
LOG_WARNING("gc_disk: replica_dir_op succeed to delete
directory '{}'"
", time_used_ms = {}",
- fpath,
+ path,
dsn_now_ms() - current_time_ms);
report.remove_dir_count++;
+ } else {
+ LOG_WARNING("gc_disk: failed to delete directory '{}',
time_used_ms = {}",
+ path,
+ dsn_now_ms() - current_time_ms);
}
} else {
LOG_INFO("gc_disk: reserve directory '{}', wait_seconds = {}",
- fpath,
- last_write_time + remove_interval_seconds -
current_time_ms / 1000);
+ path,
+ expiration_timestamp_s - current_time_ms / 1000);
}
}
return error_s::ok();
}
+bool is_data_dir_removable(const std::string &dir)
+{
+ return boost::algorithm::ends_with(dir, kFolderSuffixErr) ||
+ boost::algorithm::ends_with(dir, kFolderSuffixGar) ||
+ boost::algorithm::ends_with(dir, kFolderSuffixTmp) ||
+ boost::algorithm::ends_with(dir, kFolderSuffixOri);
+}
+
+bool is_data_dir_invalid(const std::string &dir)
+{
+ return is_data_dir_removable(dir) || boost::algorithm::ends_with(dir,
kFolderSuffixBak);
+}
+
void move_to_err_path(const std::string &path, const std::string &log_prefix)
{
const std::string new_path = fmt::format("{}.{}{}", path, dsn_now_us(),
kFolderSuffixErr);
@@ -150,5 +283,6 @@ void move_to_err_path(const std::string &path, const
std::string &log_prefix)
new_path);
LOG_WARNING("{}: succeed to move directory from '{}' to '{}'", log_prefix,
path, new_path);
}
+
} // namespace replication
} // namespace dsn
diff --git a/src/replica/disk_cleaner.h b/src/replica/disk_cleaner.h
index 7961d084c..9e2edb752 100644
--- a/src/replica/disk_cleaner.h
+++ b/src/replica/disk_cleaner.h
@@ -55,26 +55,13 @@ struct disk_cleaning_report
extern error_s disk_remove_useless_dirs(const
std::vector<std::shared_ptr<dir_node>> &dir_nodes,
/*output*/ disk_cleaning_report
&report);
-inline bool is_data_dir_removable(const std::string &dir)
-{
- if (dir.length() < 4) {
- return false;
- }
- const std::string folder_suffix = dir.substr(dir.length() - 4);
- return (folder_suffix == kFolderSuffixErr || folder_suffix ==
kFolderSuffixGar ||
- folder_suffix == kFolderSuffixTmp || folder_suffix ==
kFolderSuffixOri);
-}
+bool is_data_dir_removable(const std::string &dir);
-// Note: ".bak" is invalid but not allow delete, because it can be backed by
administrator.
-inline bool is_data_dir_invalid(const std::string &dir)
-{
- if (dir.length() < 4) {
- return false;
- }
- const std::string folder_suffix = dir.substr(dir.length() - 4);
- return is_data_dir_removable(dir) || folder_suffix == kFolderSuffixBak;
-}
+// Note: ".bak" is invalid but not allowed to be deleted, because it could be
did by
+// administrator on purpose.
+bool is_data_dir_invalid(const std::string &dir);
void move_to_err_path(const std::string &path, const std::string &log_prefix);
+
} // namespace replication
} // namespace dsn
diff --git a/src/replica/replica_stub.cpp b/src/replica/replica_stub.cpp
index c5bd5000a..e1815b48b 100644
--- a/src/replica/replica_stub.cpp
+++ b/src/replica/replica_stub.cpp
@@ -1754,7 +1754,7 @@ void replica_stub::on_gc_replica(replica_stub_ptr this_,
gpid id)
CHECK(
dsn::utils::filesystem::directory_exists(replica_path), "dir({}) not
exist", replica_path);
LOG_INFO("start to move replica({}) as garbage, path: {}", id,
replica_path);
- const auto rename_path = fmt::format("{}.{}.gar", replica_path,
dsn_now_us());
+ const auto rename_path = fmt::format("{}.{}{}", replica_path,
dsn_now_us(), kFolderSuffixGar);
if (!dsn::utils::filesystem::rename_path(replica_path, rename_path)) {
LOG_WARNING("gc_replica: failed to move directory '{}' to '{}'",
replica_path, rename_path);
diff --git a/src/replica/test/replica_disk_test.cpp
b/src/replica/test/replica_disk_test.cpp
index 3bf745934..6717ffd40 100644
--- a/src/replica/test/replica_disk_test.cpp
+++ b/src/replica/test/replica_disk_test.cpp
@@ -17,6 +17,7 @@
* under the License.
*/
+#include <fmt/core.h>
// IWYU pragma: no_include <gtest/gtest-param-test.h>
// IWYU pragma: no_include <gtest/gtest-message.h>
// IWYU pragma: no_include <gtest/gtest-test-part.h>
@@ -42,6 +43,7 @@
#include "replica/test/mock_utils.h"
#include "replica_admin_types.h"
#include "replica_disk_test_base.h"
+#include "runtime/api_layer1.h"
#include "runtime/rpc/rpc_holder.h"
#include "test_util/test_util.h"
#include "utils/autoref_ptr.h"
@@ -198,46 +200,86 @@ TEST_P(replica_disk_test, on_query_disk_info_one_app)
}
}
+TEST_P(replica_disk_test, check_data_dir_removable)
+{
+ struct test_case
+ {
+ std::string path;
+ bool expected_removable;
+ bool expected_invalid;
+ } tests[] = {{"./replica.0.err", true, true},
+ {"./replica.1.gar", true, true},
+ {"./replica.2.tmp", true, true},
+ {"./replica.3.ori", true, true},
+ {"./replica.4.bak", false, true},
+ {"./replica.5.abcde", false, false},
+ {"./replica.6.x", false, false},
+ {"./replica.7.8", false, false}};
+
+ for (const auto &test : tests) {
+ EXPECT_EQ(test.expected_removable, is_data_dir_removable(test.path));
+ EXPECT_EQ(test.expected_invalid, is_data_dir_invalid(test.path));
+ }
+}
+
TEST_P(replica_disk_test, gc_disk_useless_dir)
{
+ PRESERVE_FLAG(gc_disk_error_replica_interval_seconds);
+ PRESERVE_FLAG(gc_disk_garbage_replica_interval_seconds);
+ PRESERVE_FLAG(gc_disk_migration_origin_replica_interval_seconds);
+ PRESERVE_FLAG(gc_disk_migration_tmp_replica_interval_seconds);
+
FLAGS_gc_disk_error_replica_interval_seconds = 1;
FLAGS_gc_disk_garbage_replica_interval_seconds = 1;
FLAGS_gc_disk_migration_origin_replica_interval_seconds = 1;
FLAGS_gc_disk_migration_tmp_replica_interval_seconds = 1;
- std::vector<std::string> tests{
- "./replica1.err",
- "./replica2.err",
- "./replica.gar",
- "./replica.tmp",
- "./replica.ori",
- "./replica.bak",
- "./replica.1.1",
- };
+ struct test_case
+ {
+ std::string path;
+ bool expected_exists;
+ } tests[] = {{"./replica1.err", false},
+ {"./replica2.err", false},
+ {"./replica.gar", false},
+ {"./replica.tmp", false},
+ {"./replica.ori", false},
+ {"./replica.bak", true},
+ {"./replica.1.1", true},
+ {"./1.1.pegasus.1234567890.err", false},
+ {"./1.2.pegasus.0123456789.gar", false},
+ {"./2.1.pegasus.1234567890123456.err", false},
+ {"./2.2.pegasus.1234567890abcdef.gar", false},
+ {fmt::format("./1.1.pegasus.{}.err", dsn_now_us()), false},
+ {fmt::format("./2.1.pegasus.{}.gar", dsn_now_us()), false},
+ {fmt::format("./1.2.pegasus.{}.gar", dsn_now_us() + 1000 *
1000 * 1000), true},
+ {fmt::format("./2.2.pegasus.{}.err", dsn_now_us() + 1000 *
1000 * 1000), true}};
for (const auto &test : tests) {
- utils::filesystem::create_directory(test);
- ASSERT_TRUE(utils::filesystem::directory_exists(test));
+ // Ensure that every directory does not exist and should be created.
+ CHECK_TRUE(utils::filesystem::create_directory(test.path));
+ ASSERT_TRUE(utils::filesystem::directory_exists(test.path));
}
sleep(5);
disk_cleaning_report report{};
-
dsn::replication::disk_remove_useless_dirs({std::make_shared<dir_node>("test",
"./")}, report);
+ ASSERT_TRUE(dsn::replication::disk_remove_useless_dirs(
+ {std::make_shared<dir_node>("test", "./")}, report));
for (const auto &test : tests) {
- if (!dsn::replication::is_data_dir_removable(test)) {
- ASSERT_TRUE(utils::filesystem::directory_exists(test));
- continue;
+ ASSERT_EQ(test.expected_exists,
utils::filesystem::directory_exists(test.path));
+ if (test.expected_exists) {
+ // Delete existing directories, in case that they are mixed with
later test cases
+ // to affect test results.
+ CHECK_TRUE(dsn::utils::filesystem::remove_path(test.path));
}
- ASSERT_FALSE(utils::filesystem::directory_exists(test));
}
- ASSERT_EQ(report.remove_dir_count, 5);
+ ASSERT_EQ(report.remove_dir_count, 11);
ASSERT_EQ(report.disk_migrate_origin_count, 1);
ASSERT_EQ(report.disk_migrate_tmp_count, 1);
- ASSERT_EQ(report.garbage_replica_count, 1);
- ASSERT_EQ(report.error_replica_count, 2);
+ ASSERT_EQ(report.garbage_replica_count, 5);
+ ASSERT_EQ(report.error_replica_count, 6);
}
TEST_P(replica_disk_test, disk_status_test)
diff --git a/src/test_util/test_util.h b/src/test_util/test_util.h
index ba253e7b3..90930c226 100644
--- a/src/test_util/test_util.h
+++ b/src/test_util/test_util.h
@@ -19,16 +19,20 @@
#pragma once
+#include <fmt/core.h>
+#include <gtest/gtest.h>
#include <chrono>
#include <cstdint>
#include <cstdio>
-#include <fmt/core.h>
#include <functional>
-#include <gtest/gtest.h>
#include <string>
#include "runtime/api_layer1.h"
#include "utils/env.h"
+// IWYU refused to include "utils/defer.h" everywhere, both in .h and .cpp
files.
+// However, once "utils/defer.h" is not included, it is inevitable that
compilation
+// will fail since dsn::defer is referenced. Thus force IWYU to keep it.
+#include "utils/defer.h" // IWYU pragma: keep
#include "utils/flags.h"
#include "utils/test_macros.h"
@@ -40,6 +44,12 @@ class file_meta;
DSN_DECLARE_bool(encrypt_data_at_rest);
+// Save the current value of a flag and restore it at the end of the function.
+#define PRESERVE_FLAG(name)
\
+ auto PRESERVED_FLAGS_##name = FLAGS_##name;
\
+ auto PRESERVED_FLAGS_##name##_cleanup =
\
+ dsn::defer([PRESERVED_FLAGS_##name]() { FLAGS_##name =
PRESERVED_FLAGS_##name; })
+
namespace pegasus {
// A base parameterized test class for testing enable/disable encryption at
rest.
diff --git a/src/utils/macros.h b/src/utils/macros.h
new file mode 100644
index 000000000..c44c45e25
--- /dev/null
+++ b/src/utils/macros.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+// Suppose there is a macro defined as:
+// #define FOO 123
+//
+// Once we need the value represented by FOO to be a string, i.e. "123", just
do:
+// STRINGIFY(FOO)
+//
+// See https://gcc.gnu.org/onlinedocs/gcc-4.8.5/cpp/Stringification.html for
details.
+#define STRINGIFY_HELPER(x) #x
+#define STRINGIFY(x) STRINGIFY_HELPER(x)
diff --git a/src/utils/metrics.h b/src/utils/metrics.h
index 27c6355f3..75afc31d8 100644
--- a/src/utils/metrics.h
+++ b/src/utils/metrics.h
@@ -46,6 +46,7 @@
#include "utils/enum_helper.h"
#include "utils/fmt_logging.h"
#include "utils/long_adder.h"
+#include "utils/macros.h"
#include "utils/nth_element.h"
#include "utils/ports.h"
#include "utils/singleton.h"
@@ -973,8 +974,6 @@ struct kth_percentile_property
double decimal;
};
-#define STRINGIFY_HELPER(x) #x
-#define STRINGIFY(x) STRINGIFY_HELPER(x)
#define STRINGIFY_KTH_PERCENTILE_NAME(kth) STRINGIFY(KTH_PERCENTILE_NAME(kth))
#define KTH_TO_DECIMAL(kth) 0.##kth
#define KTH_PERCENTILE_PROPERTY_LIST(kth)
\
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]