env_util: Factor out helper DeleteExcessFilesByPattern() The logic contained in here is generally useful for log rotation purposes, and we can reuse it for minidump rotation in a later patch.
Change-Id: I6e76911b0a68f7c1397d93eb027b6a5c99e2fbed Reviewed-on: http://gerrit.cloudera.org:8080/5740 Tested-by: Kudu Jenkins Reviewed-by: Adar Dembo <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/fb3bbbc8 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/fb3bbbc8 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/fb3bbbc8 Branch: refs/heads/master Commit: fb3bbbc8ce8527a63144b268cbad4aee92da4c1f Parents: 6985a54 Author: Mike Percy <[email protected]> Authored: Wed Jan 18 17:49:05 2017 -0800 Committer: Adar Dembo <[email protected]> Committed: Fri Jan 20 21:10:47 2017 +0000 ---------------------------------------------------------------------- src/kudu/util/env_util-test.cc | 44 ++++++++++++++++++++++++++++++++++--- src/kudu/util/env_util.cc | 31 ++++++++++++++++++++++++++ src/kudu/util/env_util.h | 7 ++++++ src/kudu/util/logging.cc | 25 ++------------------- 4 files changed, 81 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/env_util-test.cc b/src/kudu/util/env_util-test.cc index 90b308d..7c79068 100644 --- a/src/kudu/util/env_util-test.cc +++ b/src/kudu/util/env_util-test.cc @@ -15,15 +15,20 @@ // specific language governing permissions and limitations // under the License. +#include <sys/statvfs.h> +#include <sys/time.h> #include <unistd.h> -#include "kudu/util/env_util.h" +#include <memory> +#include <unordered_set> #include <gflags/gflags.h> -#include <memory> -#include <sys/statvfs.h> +#include <glog/stl_logging.h> +#include "kudu/gutil/map-util.h" #include "kudu/gutil/strings/substitute.h" +#include "kudu/gutil/walltime.h" +#include "kudu/util/env_util.h" #include "kudu/util/path_util.h" #include "kudu/util/test_macros.h" #include "kudu/util/test_util.h" @@ -33,6 +38,7 @@ DECLARE_int64(disk_reserved_bytes_free_for_testing); using std::string; using std::unique_ptr; +using std::unordered_set; using strings::Substitute; namespace kudu { @@ -108,5 +114,37 @@ TEST_F(EnvUtilTest, TestCreateDirsRecursively) { ASSERT_TRUE(is_dir); } +// Ensure that DeleteExcessFilesByPattern() works. +// We ensure that the number of files remaining after running it is the number +// expected, and we manually set the modification times on the relevant files +// to allow us to test that files are deleted oldest-first. +TEST_F(EnvUtilTest, TestDeleteExcessFilesByPattern) { + string dir = JoinPathSegments(test_dir_, "excess"); + ASSERT_OK(env_->CreateDir(dir)); + vector<string> filenames = {"a", "b", "c", "d"}; + int now_sec = GetCurrentTimeMicros() / 1000; + for (int i = 0; i < filenames.size(); i++) { + const string& filename = filenames[i]; + string path = JoinPathSegments(dir, filename); + unique_ptr<WritableFile> file; + ASSERT_OK(env_->NewWritableFile(path, &file)); + ASSERT_OK(file->Close()); + + // Set the last-modified time of the file. + struct timeval target_time { .tv_sec = now_sec + (i * 2), .tv_usec = 0 }; + struct timeval times[2] = { target_time, target_time }; + ASSERT_EQ(0, utimes(path.c_str(), times)) << errno; + } + vector<string> children; + ASSERT_OK(env_->GetChildren(dir, &children)); + ASSERT_EQ(6, children.size()); // 4 files plus "." and "..". + ASSERT_OK(DeleteExcessFilesByPattern(env_, dir + "/*", 2)); + ASSERT_OK(env_->GetChildren(dir, &children)); + ASSERT_EQ(4, children.size()); // 2 files plus "." and "..". + unordered_set<string> children_set(children.begin(), children.end()); + unordered_set<string> expected_set({".", "..", "c", "d"}); + ASSERT_EQ(expected_set, children_set) << children; +} + } // namespace env_util } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/env_util.cc b/src/kudu/util/env_util.cc index e398831..178d391 100644 --- a/src/kudu/util/env_util.cc +++ b/src/kudu/util/env_util.cc @@ -236,6 +236,37 @@ Status CopyFile(Env* env, const string& source_path, const string& dest_path, return Status::OK(); } +Status DeleteExcessFilesByPattern(Env* env, const string& pattern, int max_matches) { + // Negative numbers don't make sense for our interface. + DCHECK_GE(max_matches, 0); + + vector<string> matching_files; + RETURN_NOT_OK(env->Glob(pattern, &matching_files)); + + if (matching_files.size() <= max_matches) { + return Status::OK(); + } + + vector<pair<time_t, string>> matching_file_mtimes; + for (string& matching_file_path : matching_files) { + int64_t mtime; + RETURN_NOT_OK(env->GetFileModifiedTime(matching_file_path, &mtime)); + matching_file_mtimes.emplace_back(mtime, std::move(matching_file_path)); + } + + // Use mtime to determine which matching files to delete. This could + // potentially be ambiguous, depending on the resolution of last-modified + // timestamp in the filesystem, but that is part of the contract. + std::sort(matching_file_mtimes.begin(), matching_file_mtimes.end()); + matching_file_mtimes.resize(matching_file_mtimes.size() - max_matches); + + for (const auto& matching_file : matching_file_mtimes) { + RETURN_NOT_OK(env->DeleteFile(matching_file.second)); + } + + return Status::OK(); +} + ScopedFileDeleter::ScopedFileDeleter(Env* env, std::string path) : env_(DCHECK_NOTNULL(env)), path_(std::move(path)), should_delete_(true) {} http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/env_util.h ---------------------------------------------------------------------- diff --git a/src/kudu/util/env_util.h b/src/kudu/util/env_util.h index c54bfa8..884b17c 100644 --- a/src/kudu/util/env_util.h +++ b/src/kudu/util/env_util.h @@ -81,6 +81,13 @@ Status CreateDirsRecursively(Env* env, const std::string& path); Status CopyFile(Env* env, const std::string& source_path, const std::string& dest_path, WritableFileOptions opts); +// Deletes files matching 'pattern' in excess of 'max_matches' files. +// 'max_matches' must be greater than or equal to 0. +// The oldest files are deleted first, as determined by last modified time. +// In the case that multiple files have the same last modified time, it is not +// defined which file will be deleted first. +Status DeleteExcessFilesByPattern(Env* env, const std::string& pattern, int max_matches); + // Deletes a file or directory when this object goes out of scope. // // The deletion may be cancelled by calling .Cancel(). http://git-wip-us.apache.org/repos/asf/kudu/blob/fb3bbbc8/src/kudu/util/logging.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/logging.cc b/src/kudu/util/logging.cc index 4438380..8332349 100644 --- a/src/kudu/util/logging.cc +++ b/src/kudu/util/logging.cc @@ -38,6 +38,7 @@ #include "kudu/util/debug-util.h" #include "kudu/util/debug/leakcheck_disabler.h" #include "kudu/util/env.h" +#include "kudu/util/env_util.h" #include "kudu/util/flag_tags.h" #include "kudu/util/status.h" @@ -359,23 +360,10 @@ Status DeleteExcessLogFiles(Env* env) { if (max_log_files <= 0) return Status::OK(); for (int severity = 0; severity < google::NUM_SEVERITIES; ++severity) { - vector<string> logfiles; // Build glob pattern for input // e.g. /var/log/kudu/kudu-master.*.INFO.* string pattern = strings::Substitute("$0/$1.*.$2.*", FLAGS_log_dir, FLAGS_log_filename, google::GetLogSeverityName(severity)); - RETURN_NOT_OK(env->Glob(pattern, &logfiles)); - - if (logfiles.size() <= max_log_files) { - continue; - } - - vector<pair<time_t, string>> logfile_mtimes; - for (string& logfile : logfiles) { - int64_t mtime; - RETURN_NOT_OK(env->GetFileModifiedTime(logfile, &mtime)); - logfile_mtimes.emplace_back(mtime, std::move(logfile)); - } // Keep the 'max_log_files' most recent log files, as compared by // modification time. Glog files contain a second-granularity timestamp in @@ -383,16 +371,7 @@ Status DeleteExcessLogFiles(Env* env) { // guaranteed by glob, however this code has been adapted from Impala which // uses mtime to determine which files to delete, and there haven't been any // issues in production settings. - std::sort(logfile_mtimes.begin(), logfile_mtimes.end()); - logfile_mtimes.resize(logfile_mtimes.size() - max_log_files); - - VLOG(2) << "Deleting " << logfile_mtimes.size() - << " excess glog files at " << google::GetLogSeverityName(severity) - << " severity"; - - for (const auto& logfile: logfile_mtimes) { - RETURN_NOT_OK(env->DeleteFile(logfile.second)); - } + RETURN_NOT_OK(env_util::DeleteExcessFilesByPattern(env, pattern, max_log_files)); } return Status::OK(); }
