gavinchou commented on code in PR #59269:
URL: https://github.com/apache/doris/pull/59269#discussion_r2681071202
##########
be/src/io/cache/fs_file_cache_storage.cpp:
##########
@@ -996,82 +1063,632 @@ FSFileCacheStorage::~FSFileCacheStorage() {
if (_cache_background_load_thread.joinable()) {
_cache_background_load_thread.join();
}
+ stop_leak_cleaner();
}
-size_t FSFileCacheStorage::estimate_file_count_from_statfs() const {
- struct statvfs vfs;
- if (statvfs(_cache_base_path.c_str(), &vfs) != 0) {
- LOG(WARNING) << "Failed to get filesystem statistics for path: " <<
_cache_base_path
- << ", error: " << strerror(errno);
+size_t FSFileCacheStorage::estimate_file_count_from_inode() const {
+ int64_t duration_ns = 0;
+ size_t cache_files = 0;
+ {
+ SCOPED_RAW_TIMER(&duration_ns);
+ do {
+ struct statvfs vfs {};
+ int statvfs_res = 0;
+#ifdef BE_TEST
+ if (auto* hooks = inode_test_hooks(); hooks &&
hooks->statvfs_override) {
+ statvfs_res = hooks->statvfs_override(_cache_base_path, &vfs);
+ } else
+#endif
+ {
+ statvfs_res = statvfs(_cache_base_path.c_str(), &vfs);
+ }
+ if (statvfs_res != 0) {
+ LOG(WARNING) << "Failed to get filesystem statistics for path:
" << _cache_base_path
+ << ", error: " << strerror(errno);
+ break;
+ }
+
+ if (vfs.f_files == 0) {
+ LOG(WARNING) << "Filesystem returned zero total inodes for
path "
+ << _cache_base_path;
+ break;
+ }
+
+ struct stat cache_stat {};
+ int lstat_res = 0;
+#ifdef BE_TEST
+ if (auto* hooks = inode_test_hooks(); hooks &&
hooks->lstat_override) {
+ lstat_res = hooks->lstat_override(_cache_base_path,
&cache_stat);
+ } else
+#endif
+ {
+ lstat_res = lstat(_cache_base_path.c_str(), &cache_stat);
+ }
+ if (lstat_res != 0) {
+ LOG(WARNING) << "Failed to stat cache base path " <<
_cache_base_path << ": "
+ << strerror(errno);
+ break;
+ }
+
+ size_t total_inodes_used = vfs.f_files - vfs.f_ffree;
+ size_t non_cache_inodes = estimate_non_cache_inode_usage();
+ size_t directory_inodes = estimate_cache_directory_inode_usage();
+
+ if (total_inodes_used > non_cache_inodes + directory_inodes) {
+ cache_files = total_inodes_used - non_cache_inodes -
directory_inodes;
+ } else {
+ LOG(WARNING) << fmt::format(
+ "Inode subtraction underflow: total={} non_cache={}
directory={}",
+ total_inodes_used, non_cache_inodes, directory_inodes);
+ }
+
+ LOG(INFO) << fmt::format(
+ "Cache inode estimation: total_used={}, non_cache={},
directories≈{}, files≈{}",
+ total_inodes_used, non_cache_inodes, directory_inodes,
cache_files);
+ } while (false);
+ }
+ const double duration_ms = static_cast<double>(duration_ns) / 1'000'000.0;
+ LOG(INFO) << fmt::format("estimate_file_count_from_inode
duration_ms={:.3f}, files={}",
+ duration_ms, cache_files);
+ return cache_files;
+}
+
+size_t FSFileCacheStorage::count_inodes_for_path(
+ const std::filesystem::path& path, dev_t target_dev,
+ const std::filesystem::path& excluded_root,
+ std::unordered_set<InodeKey, InodeKeyHash>& visited) const {
+#ifdef BE_TEST
+ if (auto* hooks = inode_test_hooks(); hooks &&
hooks->count_inodes_override) {
+ return hooks->count_inodes_override(*this, path, target_dev,
excluded_root, visited);
+ }
+#endif
+ if (!excluded_root.empty()) {
+ std::error_code eq_ec;
+ bool is_excluded = std::filesystem::equivalent(path, excluded_root,
eq_ec);
+ if (eq_ec) {
+ LOG(WARNING) << "Failed to compare " << path << " with " <<
excluded_root << ": "
+ << eq_ec.message();
+ } else if (is_excluded) {
+ return 0;
+ }
+ }
+
+ struct stat st {};
+ if (lstat(path.c_str(), &st) != 0) {
+ LOG(WARNING) << "Failed to stat path " << path << ": " <<
strerror(errno);
+ return 0;
+ }
+ if (st.st_dev != target_dev) {
+ return 0;
+ }
+ InodeKey key {st.st_dev, st.st_ino};
+ if (!visited.insert(key).second) {
return 0;
}
- // Get total size of cache directory to estimate file count
+ size_t count = 1;
+ if (S_ISDIR(st.st_mode)) {
+ std::error_code ec;
+ for (std::filesystem::directory_iterator it {path, ec};
+ !ec && it != std::filesystem::directory_iterator(); ++it) {
+ count += count_inodes_for_path(it->path(), target_dev,
excluded_root, visited);
+ }
+ if (ec) {
+ LOG(WARNING) << "Failed to iterate directory " << path << ": " <<
ec.message();
+ }
+ }
+ return count;
+}
+
+bool FSFileCacheStorage::is_cache_prefix_directory(
+ const std::filesystem::directory_entry& entry) const {
+ if (!entry.is_directory()) {
+ return false;
+ }
+ auto name = entry.path().filename().native();
+ if (name == META_DIR_NAME || name.empty()) {
+ return false;
+ }
+ if (name.size() != KEY_PREFIX_LENGTH) {
+ return false;
+ }
+ return std::all_of(name.begin(), name.end(), [](unsigned char c) { return
std::isxdigit(c); });
+}
+
+std::filesystem::path FSFileCacheStorage::find_mount_root(dev_t cache_dev)
const {
+#ifdef BE_TEST
+ if (auto* hooks = inode_test_hooks(); hooks &&
hooks->find_mount_root_override) {
+ return hooks->find_mount_root_override(*this, cache_dev);
+ }
+#endif
std::error_code ec;
- uintmax_t total_size = 0;
- std::vector<std::filesystem::path> pending_dirs
{std::filesystem::path(_cache_base_path)};
- while (!pending_dirs.empty()) {
- auto current_dir = pending_dirs.back();
- pending_dirs.pop_back();
+ std::filesystem::path current =
std::filesystem::absolute(_cache_base_path, ec);
+ if (ec) {
+ LOG(WARNING) << "Failed to resolve absolute cache base path " <<
_cache_base_path << ": "
+ << ec.message();
+ current = _cache_base_path;
+ }
- std::filesystem::directory_iterator it(current_dir, ec);
- if (ec) {
- LOG(WARNING) << "Failed to list directory while estimating file
count, dir="
- << current_dir << ", err=" << ec.message();
- ec.clear();
+ std::filesystem::path result = current;
+ while (result.has_parent_path()) {
+ auto parent = result.parent_path();
+ if (parent.empty() || parent == result) {
+ break;
+ }
+ struct stat st {};
+ if (lstat(parent.c_str(), &st) != 0) {
+ LOG(WARNING) << "Failed to stat parent path " << parent << ": " <<
strerror(errno);
+ break;
+ }
+ if (st.st_dev != cache_dev) {
+ break;
+ }
+ result = parent;
+ }
+ return result;
+}
+
+size_t FSFileCacheStorage::estimate_non_cache_inode_usage() const {
+#ifdef BE_TEST
+ if (auto* hooks = inode_test_hooks(); hooks && hooks->non_cache_override) {
+ return hooks->non_cache_override(*this);
+ }
+#endif
+ struct stat cache_stat {};
+ if (lstat(_cache_base_path.c_str(), &cache_stat) != 0) {
+ LOG(WARNING) << "Failed to stat cache base path " << _cache_base_path
<< ": "
+ << strerror(errno);
+ return 0;
+ }
+
+ auto mount_root = find_mount_root(cache_stat.st_dev);
+ if (mount_root.empty()) {
+ LOG(WARNING) << "Failed to determine mount root for cache path " <<
_cache_base_path;
+ return 0;
+ }
+
+ std::unordered_set<InodeKey, InodeKeyHash> visited;
+ std::error_code abs_ec;
+ std::filesystem::path excluded =
std::filesystem::absolute(_cache_base_path, abs_ec);
+ if (abs_ec) {
+ LOG(WARNING) << "Failed to get absolute cache base path " <<
_cache_base_path << ": "
+ << abs_ec.message();
+ excluded = _cache_base_path;
+ }
+
+ return count_inodes_for_path(mount_root, cache_stat.st_dev, excluded,
visited);
+}
+
+size_t FSFileCacheStorage::estimate_cache_directory_inode_usage() const {
Review Comment:
is this real "inode"?
better log how much time this function takes
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]