This is an automated email from the ASF dual-hosted git repository.
zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new f130bd3 Use Env function to operate directory (#1980)
f130bd3 is described below
commit f130bd3e7b29729eabe32964b92922f68a9eb680
Author: ZHAO Chun <[email protected]>
AuthorDate: Tue Oct 15 09:25:12 2019 +0800
Use Env function to operate directory (#1980)
Now Env has unify all environment operation, such as file operation.
However some of our old functions don't leverage it. This change unify
FileUtils::scan_dir to use Env's function.
---
be/src/agent/task_worker_pool.cpp | 8 +--
be/src/common/status.h | 2 +
be/src/env/env.h | 16 ++++++
be/src/env/env_posix.cpp | 17 +++++++
be/src/http/action/restore_tablet_action.cpp | 3 +-
be/src/http/download_action.cpp | 3 +-
be/src/olap/data_dir.cpp | 3 +-
be/src/runtime/load_path_mgr.cpp | 18 ++++---
be/src/runtime/small_file_mgr.cpp | 20 +++++---
be/src/runtime/snapshot_loader.cpp | 3 +-
be/src/runtime/user_function_cache.cpp | 21 +++++---
be/src/util/doris_metrics.cpp | 6 ++-
be/src/util/file_utils.cpp | 76 ++++++----------------------
be/src/util/file_utils.h | 30 +++++++----
be/test/env/env_posix_test.cpp | 39 ++++++++++++++
15 files changed, 164 insertions(+), 101 deletions(-)
diff --git a/be/src/agent/task_worker_pool.cpp
b/be/src/agent/task_worker_pool.cpp
index db3bcad..be36334 100644
--- a/be/src/agent/task_worker_pool.cpp
+++ b/be/src/agent/task_worker_pool.cpp
@@ -29,10 +29,12 @@
#include <string>
#include <sys/stat.h>
-#include "boost/filesystem.hpp"
-#include "boost/lexical_cast.hpp"
+#include <boost/filesystem.hpp>
+#include <boost/lexical_cast.hpp>
+
#include "agent/status.h"
#include "agent/utils.h"
+#include "env/env.h"
#include "gen_cpp/FrontendService.h"
#include "gen_cpp/Types_types.h"
#include "http/http_client.h"
@@ -1552,7 +1554,7 @@ void*
TaskWorkerPool::_make_snapshot_thread_callback(void* arg_this) {
std::stringstream ss;
ss << snapshot_path << "/" << snapshot_request.tablet_id
<< "/" << snapshot_request.schema_hash << "/";
- Status st = FileUtils::scan_dir(ss.str(), &snapshot_files);
+ Status st = FileUtils::list_files(Env::Default(), ss.str(),
&snapshot_files);
if (!st.ok()) {
status_code = TStatusCode::RUNTIME_ERROR;
OLAP_LOG_WARNING("make_snapshot failed. tablet_id: %ld,
schema_hash: %ld, version: %d,"
diff --git a/be/src/common/status.h b/be/src/common/status.h
index 655b3c8..e095f88 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -129,6 +129,8 @@ public:
bool is_thrift_rpc_error() const { return code() ==
TStatusCode::THRIFT_RPC_ERROR; }
bool is_end_of_file() const { return code() == TStatusCode::END_OF_FILE; }
+
+ bool is_not_found() const { return code() == TStatusCode::NOT_FOUND; }
// Convert into TStatus. Call this if 'status_container' contains an
optional
// TStatus field named 'status'. This also sets __isset.status.
template <typename T>
diff --git a/be/src/env/env.h b/be/src/env/env.h
index 338fb08..704c860 100644
--- a/be/src/env/env.h
+++ b/be/src/env/env.h
@@ -118,6 +118,22 @@ public:
virtual Status get_children(const std::string& dir,
std::vector<std::string>* result) = 0;
+ // Iterate the specified directory and call given callback function with
child's
+ // name. This function continues execution until all children have been
iterated
+ // or callback function return false.
+ // The names are relative to "dir".
+ //
+ // The function call extra cost is acceptable. Compared with returning all
children
+ // into a given vector, the performance of this method is 5% worse.
However this
+ // approach is more flexiable and efficient in fulfilling other
requirements.
+ //
+ // Returns OK if "dir" exists.
+ // NotFound if "dir" does not exist, the calling process does not
have
+ // permission to access "dir", or if "dir" is invalid.
+ // IOError if an IO Error was encountered
+ virtual Status iterate_dir(const std::string& dir,
+ const std::function<bool(const char*)>& cb) = 0;
+
// Delete the named file.
virtual Status delete_file(const std::string& fname) = 0;
diff --git a/be/src/env/env_posix.cpp b/be/src/env/env_posix.cpp
index 4350016..4bb087b 100644
--- a/be/src/env/env_posix.cpp
+++ b/be/src/env/env_posix.cpp
@@ -565,6 +565,23 @@ public:
return Status::OK();
}
+ Status iterate_dir(const std::string& dir,
+ const std::function<bool(const char*)>& cb) override {
+ DIR* d = opendir(dir.c_str());
+ if (d == nullptr) {
+ return io_error(dir, errno);
+ }
+ struct dirent* entry;
+ while ((entry = readdir(d)) != nullptr) {
+ // callback returning false means to terminate iteration
+ if (!cb(entry->d_name)) {
+ break;
+ }
+ }
+ closedir(d);
+ return Status::OK();
+ }
+
Status delete_file(const std::string& fname) override {
if (unlink(fname.c_str()) != 0) {
return io_error(fname, errno);
diff --git a/be/src/http/action/restore_tablet_action.cpp
b/be/src/http/action/restore_tablet_action.cpp
index 6d0e1a7..bb24881 100644
--- a/be/src/http/action/restore_tablet_action.cpp
+++ b/be/src/http/action/restore_tablet_action.cpp
@@ -24,6 +24,7 @@
#include "boost/lexical_cast.hpp"
#include "agent/cgroups_mgr.h"
+#include "env/env.h"
#include "http/http_channel.h"
#include "http/http_headers.h"
#include "http/http_request.h"
@@ -189,7 +190,7 @@ Status RestoreTabletAction::_restore(const std::string&
key, int64_t tablet_id,
Status RestoreTabletAction::_create_hard_link_recursive(const std::string&
src, const std::string& dst) {
std::vector<std::string> files;
- RETURN_IF_ERROR(FileUtils::scan_dir(src, &files));
+ RETURN_IF_ERROR(FileUtils::list_files(Env::Default(), src, &files));
for (auto& file : files) {
std::string from = src + "/" + file;
std::string to = dst + "/" + file;
diff --git a/be/src/http/download_action.cpp b/be/src/http/download_action.cpp
index 1d4db19..4a2afad 100644
--- a/be/src/http/download_action.cpp
+++ b/be/src/http/download_action.cpp
@@ -29,6 +29,7 @@
#include <boost/filesystem.hpp>
#include "agent/cgroups_mgr.h"
+#include "env/env.h"
#include "http/http_channel.h"
#include "http/http_headers.h"
#include "http/http_request.h"
@@ -138,7 +139,7 @@ void DownloadAction::handle(HttpRequest *req) {
void DownloadAction::do_dir_response(
const std::string& dir_path, HttpRequest *req) {
std::vector<std::string> files;
- Status status = FileUtils::scan_dir(dir_path, &files);
+ Status status = FileUtils::list_files(Env::Default(), dir_path, &files);
if (!status.ok()) {
LOG(WARNING) << "Failed to scan dir. dir=" << dir_path;
HttpChannel::send_error(req, HttpStatus::INTERNAL_SERVER_ERROR);
diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index dba6d76..acd8da4 100755
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -37,6 +37,7 @@
#include <boost/filesystem.hpp>
#include <boost/interprocess/sync/file_lock.hpp>
+#include "env/env.h"
#include "olap/file_helper.h"
#include "olap/olap_define.h"
#include "olap/olap_snapshot_converter.h"
@@ -472,7 +473,7 @@ void DataDir::find_tablet_in_trash(int64_t tablet_id,
std::vector<std::string>*
// path: /root_path/trash/time_label/tablet_id/schema_hash
std::string trash_path = _path + TRASH_PREFIX;
std::vector<std::string> sub_dirs;
- FileUtils::scan_dir(trash_path, &sub_dirs);
+ FileUtils::list_files(Env::Default(), trash_path, &sub_dirs);
for (auto& sub_dir : sub_dirs) {
// sub dir is time_label
std::string sub_path = trash_path + "/" + sub_dir;
diff --git a/be/src/runtime/load_path_mgr.cpp b/be/src/runtime/load_path_mgr.cpp
index b992c92..a794ae7 100644
--- a/be/src/runtime/load_path_mgr.cpp
+++ b/be/src/runtime/load_path_mgr.cpp
@@ -24,6 +24,7 @@
#include <boost/algorithm/string/join.hpp>
+#include "env/env.h"
#include "olap/olap_define.h"
#include "olap/storage_engine.h"
#include "util/file_utils.h"
@@ -168,9 +169,12 @@ void LoadPathMgr::process_path(time_t now, const
std::string& path, int64_t rese
}
void LoadPathMgr::clean_one_path(const std::string& path) {
+ Env* env = Env::Default();
+
std::vector<std::string> dbs;
- Status status = FileUtils::scan_dir(path, &dbs);
- if (!status.ok()) {
+ Status status = FileUtils::list_files(env, path, &dbs);
+ // path may not exist
+ if (!status.ok() && !status.is_not_found()) {
LOG(WARNING) << "scan one path to delete directory failed. path=" <<
path;
return;
}
@@ -179,7 +183,7 @@ void LoadPathMgr::clean_one_path(const std::string& path) {
for (auto& db : dbs) {
std::string db_dir = path + "/" + db;
std::vector<std::string> sub_dirs;
- status = FileUtils::scan_dir(db_dir, &sub_dirs);
+ status = FileUtils::list_files(env, db_dir, &sub_dirs);
if (!status.ok()) {
LOG(WARNING) << "scan db of trash dir failed, continue. dir=" <<
db_dir;
continue;
@@ -192,7 +196,7 @@ void LoadPathMgr::clean_one_path(const std::string& path) {
// sub_dir starts with SHARD_PREFIX
// process shard sub dir
std::vector<std::string> labels;
- Status status = FileUtils::scan_dir(sub_path, &labels);
+ Status status = FileUtils::list_files(env, sub_path, &labels);
if (!status.ok()) {
LOG(WARNING) << "scan one path to delete directory failed.
path=" << sub_path;
continue;
@@ -217,9 +221,11 @@ void LoadPathMgr::clean() {
}
void LoadPathMgr::clean_error_log() {
+ Env* env = Env::Default();
+
time_t now = time(nullptr);
std::vector<std::string> sub_dirs;
- Status status = FileUtils::scan_dir(_error_log_dir, &sub_dirs);
+ Status status = FileUtils::list_files(env, _error_log_dir, &sub_dirs);
if (!status.ok()) {
LOG(WARNING) << "scan error_log dir failed. dir=" << _error_log_dir;
return;
@@ -232,7 +238,7 @@ void LoadPathMgr::clean_error_log() {
// sub_dir starts with SHARD_PREFIX
// process shard sub dir
std::vector<std::string> error_log_files;
- Status status = FileUtils::scan_dir(sub_path, &error_log_files);
+ Status status = FileUtils::list_files(env, sub_path,
&error_log_files);
if (!status.ok()) {
LOG(WARNING) << "scan one path to delete directory failed.
path=" << sub_path;
continue;
diff --git a/be/src/runtime/small_file_mgr.cpp
b/be/src/runtime/small_file_mgr.cpp
index ced27a3..7b6c80f 100644
--- a/be/src/runtime/small_file_mgr.cpp
+++ b/be/src/runtime/small_file_mgr.cpp
@@ -21,17 +21,18 @@
#include <stdio.h>
#include <sstream>
+#include <boost/algorithm/string/split.hpp> // boost::split
+#include <boost/algorithm/string/predicate.hpp> //
boost::algorithm::starts_with
+#include <boost/algorithm/string/classification.hpp> // boost::is_any_of
+
#include "common/status.h"
+#include "env/env.h"
+#include "gen_cpp/HeartbeatService.h"
#include "http/http_client.h"
#include "runtime/exec_env.h"
#include "util/file_utils.h"
#include "util/md5.h"
-#include <boost/algorithm/string/split.hpp> // boost::split
-#include <boost/algorithm/string/predicate.hpp> //
boost::algorithm::starts_with
-#include <boost/algorithm/string/classification.hpp> // boost::is_any_of
-
-#include "gen_cpp/HeartbeatService.h"
namespace doris {
@@ -53,15 +54,18 @@ Status SmallFileMgr::init() {
Status SmallFileMgr::_load_local_files() {
RETURN_IF_ERROR(FileUtils::create_dir(_local_path));
- auto scan_cb = [this] (const std::string& dir, const std::string& file) {
- auto st = _load_single_file(dir, file);
+ auto scan_cb = [this] (const char* file) {
+ if (is_dot_or_dotdot(file)) {
+ return true;
+ }
+ auto st = _load_single_file(_local_path, file);
if (!st.ok()) {
LOG(WARNING) << "load small file failed: " << st.get_error_msg();
}
return true;
};
- RETURN_IF_ERROR(FileUtils::scan_dir(_local_path, scan_cb));
+ RETURN_IF_ERROR(Env::Default()->iterate_dir(_local_path, scan_cb));
return Status::OK();
}
diff --git a/be/src/runtime/snapshot_loader.cpp
b/be/src/runtime/snapshot_loader.cpp
index 8f560e7..cf2fc0b 100644
--- a/be/src/runtime/snapshot_loader.cpp
+++ b/be/src/runtime/snapshot_loader.cpp
@@ -26,6 +26,7 @@
#include "gen_cpp/HeartbeatService_types.h"
#include "common/logging.h"
+#include "env/env.h"
#include "exec/broker_reader.h"
#include "exec/broker_writer.h"
#include "olap/file_helper.h"
@@ -771,7 +772,7 @@ Status SnapshotLoader::_get_existing_files_from_local(
const std::string& local_path,
std::vector<std::string>* local_files) {
- Status status = FileUtils::scan_dir(local_path, local_files);
+ Status status = FileUtils::list_files(Env::Default(), local_path,
local_files);
if (!status.ok()) {
std::stringstream ss;
ss << "failed to list files in local path: " << local_path << ", msg: "
diff --git a/be/src/runtime/user_function_cache.cpp
b/be/src/runtime/user_function_cache.cpp
index 26b6ebd..9e15435 100644
--- a/be/src/runtime/user_function_cache.cpp
+++ b/be/src/runtime/user_function_cache.cpp
@@ -24,6 +24,7 @@
#include <boost/algorithm/string/predicate.hpp> // boost::algorithm::ends_with
#include <boost/algorithm/string/classification.hpp> // boost::is_any_of
+#include "env/env.h"
#include "http/http_client.h"
#include "util/dynamic_util.h"
#include "util/file_utils.h"
@@ -155,17 +156,21 @@ Status UserFunctionCache::_load_cached_lib() {
// create library directory if not exist
RETURN_IF_ERROR(FileUtils::create_dir(_lib_dir));
- auto scan_cb = [this] (const std::string& dir, const std::string& file) {
- auto st = _load_entry_from_lib(dir, file);
- if (!st.ok()) {
- LOG(WARNING) << "load a library failed, dir=" << dir << ", file="
<< file;
- }
- return true;
- };
for (int i = 0; i < kLibShardNum; ++i) {
std::string sub_dir = _lib_dir + "/" + std::to_string(i);
RETURN_IF_ERROR(FileUtils::create_dir(sub_dir));
- RETURN_IF_ERROR(FileUtils::scan_dir(sub_dir, scan_cb));
+
+ auto scan_cb = [this, &sub_dir] (const char* file) {
+ if (is_dot_or_dotdot(file)) {
+ return true;
+ }
+ auto st = _load_entry_from_lib(sub_dir, file);
+ if (!st.ok()) {
+ LOG(WARNING) << "load a library failed, dir=" << sub_dir << ",
file=" << file;
+ }
+ return true;
+ };
+ RETURN_IF_ERROR(Env::Default()->iterate_dir(sub_dir, scan_cb));
}
return Status::OK();
}
diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp
index 8e71f42..a4c08ea 100644
--- a/be/src/util/doris_metrics.cpp
+++ b/be/src/util/doris_metrics.cpp
@@ -20,6 +20,8 @@
#include "util/doris_metrics.h"
+#include "env/env.h"
+
#include "util/debug_util.h"
#include "util/file_utils.h"
#include "util/system_metrics.h"
@@ -303,7 +305,7 @@ void DorisMetrics::_update_process_thread_num() {
ss << "/proc/" << pid << "/task/";
int64_t count = 0;
- Status st = FileUtils::scan_dir(ss.str(), nullptr, &count);
+ Status st = FileUtils::get_children_count(Env::Default(), ss.str(),
&count);
if (!st.ok()) {
LOG(WARNING) << "failed to count thread num from: " << ss.str();
process_thread_num.set_value(0);
@@ -321,7 +323,7 @@ void DorisMetrics::_update_process_fd_num() {
std::stringstream ss;
ss << "/proc/" << pid << "/fd/";
int64_t count = 0;
- Status st = FileUtils::scan_dir(ss.str(), nullptr, &count);
+ Status st = FileUtils::get_children_count(Env::Default(), ss.str(),
&count);
if (!st.ok()) {
LOG(WARNING) << "failed to count fd from: " << ss.str();
process_fd_num_used.set_value(0);
diff --git a/be/src/util/file_utils.cpp b/be/src/util/file_utils.cpp
index 8073ee3..7caa90e 100644
--- a/be/src/util/file_utils.cpp
+++ b/be/src/util/file_utils.cpp
@@ -33,6 +33,7 @@
#include <openssl/md5.h>
+#include "env/env.h"
#include "olap/file_helper.h"
#include "util/defer_op.h"
@@ -81,70 +82,25 @@ Status FileUtils::remove_all(const std::string& file_path) {
return Status::OK();
}
-Status FileUtils::scan_dir(
- const std::string& dir_path, std::vector<std::string>* files,
- int64_t* file_count) {
-
- DIR* dir = opendir(dir_path.c_str());
- if (dir == nullptr) {
- char buf[64];
- std::stringstream ss;
- ss << "opendir(" << dir_path << ") failed, because: " <<
strerror_r(errno, buf, 64);
- return Status::InternalError(ss.str());
- }
- DeferOp close_dir(std::bind<void>(&closedir, dir));
-
- int64_t count = 0;
- while (true) {
- auto result = readdir(dir);
- if (result == nullptr) {
- break;
- }
- std::string file_name = result->d_name;
- if (file_name == "." || file_name == "..") {
- continue;
+Status FileUtils::list_files(Env* env, const std::string& dir,
+ std::vector<std::string>* files) {
+ auto cb = [files](const char* name) -> bool {
+ if (!is_dot_or_dotdot(name)) {
+ files->push_back(name);
}
-
- if (files != nullptr) {
- files->emplace_back(std::move(file_name));
- }
- count++;
- }
-
- if (file_count != nullptr) {
- *file_count = count;
- }
-
- return Status::OK();
+ return true;
+ };
+ return env->iterate_dir(dir, cb);
}
-Status FileUtils::scan_dir(
- const std::string& dir_path,
- const std::function<bool(const std::string&, const std::string&)>&
callback) {
- auto dir_closer = [] (DIR* dir) { closedir(dir); };
- std::unique_ptr<DIR, decltype(dir_closer)> dir(opendir(dir_path.c_str()),
dir_closer);
- if (dir == nullptr) {
- char buf[64];
- LOG(WARNING) << "fail to open dir, dir=" << dir_path << ", errmsg=" <<
strerror_r(errno, buf, 64);
- return Status::InternalError("fail to opendir");
- }
-
- while (true) {
- auto result = readdir(dir.get());
- if (result == nullptr) {
- break;
- }
- std::string file_name = result->d_name;
- if (file_name == "." || file_name == "..") {
- continue;
+Status FileUtils::get_children_count(Env* env, const std::string& dir,
int64_t* count) {
+ auto cb = [count](const char* name) -> bool {
+ if (!is_dot_or_dotdot(name)) {
+ *count += 1;
}
- auto is_continue = callback(dir_path, file_name);
- if (!is_continue) {
- break;
- }
- }
-
- return Status::OK();
+ return true;
+ };
+ return env->iterate_dir(dir, cb);
}
bool FileUtils::is_dir(const std::string& path) {
diff --git a/be/src/util/file_utils.h b/be/src/util/file_utils.h
index dd0343f..8043841 100644
--- a/be/src/util/file_utils.h
+++ b/be/src/util/file_utils.h
@@ -25,8 +25,16 @@
namespace doris {
+class Env;
+
+// Return true if file is '.' or '..'
+inline bool is_dot_or_dotdot(const char* name) {
+ return name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] ==
'\0'));
+}
+
class FileUtils {
public:
+
// Create directory of dir_path,
// This function will create directory recursively,
// if dir's parent directory doesn't exist
@@ -38,16 +46,18 @@ public:
// Delete file recursively.
static Status remove_all(const std::string& dir_path);
- // Scan dir path and return all files in this path without '.' and '..'
- // Item in files is the filename in 'dir_path', which is not absolute path
- // if files == nullptr, no file names will be returned.
- // if file_count != nullptr, it will save the number of files.
- static Status scan_dir(
- const std::string& dir_path, std::vector<std::string>* files,
- int64_t* file_count = nullptr);
- static Status scan_dir(
- const std::string& dir_path,
- const std::function<bool(const std::string&, const std::string&)>&
callback);
+ // List all files in the specified directory without '.' and '..'.
+ // If you want retreive all files, you can use Env::iterate_dir.
+ // All valid files will be stored in given *files.
+ static Status list_files(
+ Env* env,
+ const std::string& dir,
+ std::vector<std::string>* files);
+
+ // Get the number of children belong to the specified directory, this
+ // funciton also exclude '.' and '..'.
+ // Return OK with *count is set to the count, if execute successful.
+ static Status get_children_count(Env* env, const std::string& dir,
int64_t* count);
// If the file_path is not exist, or is not a dir, return false.
static bool is_dir(const std::string& file_path);
diff --git a/be/test/env/env_posix_test.cpp b/be/test/env/env_posix_test.cpp
index eef4e1b..d21eb68 100644
--- a/be/test/env/env_posix_test.cpp
+++ b/be/test/env/env_posix_test.cpp
@@ -19,7 +19,10 @@
#include <gtest/gtest.h>
+#include <algorithm>
+
#include "common/logging.h"
+#include "util/file_utils.h"
namespace doris {
@@ -201,6 +204,42 @@ TEST_F(EnvPosixTest, random_rw) {
}
}
+TEST_F(EnvPosixTest, iterate_dir) {
+ std::string dir_path = "./ut_dir/env_posix/iterate_dir";
+ FileUtils::remove_all(dir_path);
+ auto st = Env::Default()->create_dir_if_missing(dir_path);
+ ASSERT_TRUE(st.ok());
+
+ st = Env::Default()->create_dir_if_missing(dir_path + "/abc");
+ ASSERT_TRUE(st.ok());
+
+ st = Env::Default()->create_dir_if_missing(dir_path + "/123");
+ ASSERT_TRUE(st.ok());
+
+ {
+ std::vector<std::string> children;
+ st = Env::Default()->get_children(dir_path, &children);
+ ASSERT_EQ(4, children.size());
+ std::sort(children.begin(), children.end());
+
+ ASSERT_STREQ(".", children[0].c_str());
+ ASSERT_STREQ("..", children[1].c_str());
+ ASSERT_STREQ("123", children[2].c_str());
+ ASSERT_STREQ("abc", children[3].c_str());
+ }
+ {
+ std::vector<std::string> children;
+ st = FileUtils::list_files(Env::Default(), dir_path, &children);
+ ASSERT_EQ(2, children.size());
+ std::sort(children.begin(), children.end());
+
+ ASSERT_STREQ("123", children[0].c_str());
+ ASSERT_STREQ("abc", children[1].c_str());
+ }
+
+ FileUtils::remove_all(dir_path);
+}
+
}
int main(int argc, char* argv[]) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]