This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f0ee17bc9f3 [fix](cloud) fix read cache block file return stat 
NOT_FOUND (#43220)
f0ee17bc9f3 is described below

commit f0ee17bc9f3966024e0c67f2a1a5b8799d06b6af
Author: zhengyu <[email protected]>
AuthorDate: Fri Nov 8 15:39:54 2024 +0800

    [fix](cloud) fix read cache block file return stat NOT_FOUND (#43220)
    
    Previously, cache blocks could be downloaded as different types than the
    target type we intended to read, leading to false cache misses. E.g., a
    block might be downloaded as an 'idx' type when the current context
    expected a 'ttl' type.
    
    The root cause of this problem is the original design encoding meta info
    such as type and expiration time into cache block file path and readers
    of this cache block file have inconsistent view of the type so they use
    different name to locate file and run into error in the end.
    
    This commit tries other type if the initial type failed to locate the
    file (return NOT_FOUND). Be ware that this is a nasty quick fix. We will
    elimite the metadate encoded in the file path in the near future to get
    rid of all the path related problems.
---
 be/src/common/config.cpp                      |  2 -
 be/src/common/config.h                        |  2 -
 be/src/io/cache/cached_remote_file_reader.cpp |  2 +
 be/src/io/cache/fs_file_cache_storage.cpp     | 55 ++++++++++++++++++---------
 be/src/io/cache/fs_file_cache_storage.h       |  3 ++
 5 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 32604a65e58..7c8abfeb8f4 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1016,8 +1016,6 @@ DEFINE_mInt64(file_cache_ttl_valid_check_interval_second, 
"0"); // zero for not
 // If true, evict the ttl cache using LRU when full.
 // Otherwise, only expiration can evict ttl and new data won't add to cache 
when full.
 DEFINE_Bool(enable_ttl_cache_evict_using_lru, "true");
-// rename ttl filename to new format during read, with some performance cost
-DEFINE_mBool(translate_to_new_ttl_format_during_read, "false");
 DEFINE_mBool(enbale_dump_error_file, "true");
 // limit the max size of error log on disk
 DEFINE_mInt64(file_cache_error_log_limit_bytes, "209715200"); // 200MB
diff --git a/be/src/common/config.h b/be/src/common/config.h
index c4fa4f94672..d6a581a7614 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1063,8 +1063,6 @@ 
DECLARE_mInt64(file_cache_ttl_valid_check_interval_second);
 // If true, evict the ttl cache using LRU when full.
 // Otherwise, only expiration can evict ttl and new data won't add to cache 
when full.
 DECLARE_Bool(enable_ttl_cache_evict_using_lru);
-// rename ttl filename to new format during read, with some performance cost
-DECLARE_Bool(translate_to_new_ttl_format_during_read);
 DECLARE_mBool(enbale_dump_error_file);
 // limit the max size of error log on disk
 DECLARE_mInt64(file_cache_error_log_limit_bytes);
diff --git a/be/src/io/cache/cached_remote_file_reader.cpp 
b/be/src/io/cache/cached_remote_file_reader.cpp
index 0a46c98390e..c9a273c5d36 100644
--- a/be/src/io/cache/cached_remote_file_reader.cpp
+++ b/be/src/io/cache/cached_remote_file_reader.cpp
@@ -292,6 +292,8 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset, 
Slice result, size_t*
                                  file_offset);
             }
             if (!st || block_state != FileBlock::State::DOWNLOADED) {
+                LOG(WARNING) << "Read data failed from file cache downloaded 
by others. err="
+                             << st.msg() << ", block state=" << block_state;
                 size_t bytes_read {0};
                 stats.hit_cache = false;
                 s3_read_counter << 1;
diff --git a/be/src/io/cache/fs_file_cache_storage.cpp 
b/be/src/io/cache/fs_file_cache_storage.cpp
index d99869c1c8f..cf1cd41a537 100644
--- a/be/src/io/cache/fs_file_cache_storage.cpp
+++ b/be/src/io/cache/fs_file_cache_storage.cpp
@@ -160,30 +160,36 @@ Status FSFileCacheStorage::read(const FileCacheKey& key, 
size_t value_offset, Sl
                 get_path_in_local_cache(get_path_in_local_cache(key.hash, 
key.meta.expiration_time),
                                         key.offset, key.meta.type);
         Status s = fs->open_file(file, &file_reader);
-        if (!s.ok()) {
-            if (!s.is<ErrorCode::NOT_FOUND>() || key.meta.type != 
FileCacheType::TTL) {
-                return s;
+
+        // handle the case that the file is not found but actually exists in 
other type format
+        // TODO(zhengyu): nasty! better eliminate the type encoding in file 
name in the future
+        if (!s.ok() && !s.is<ErrorCode::NOT_FOUND>()) {
+            LOG(WARNING) << "open file failed, file=" << file << ", error=" << 
s.to_string();
+            return s;                                         // return other 
error directly
+        } else if (!s.ok() && s.is<ErrorCode::NOT_FOUND>()) { // but handle 
NOT_FOUND error
+            auto candidates = get_path_in_local_cache_all_candidates(
+                    get_path_in_local_cache(key.hash, 
key.meta.expiration_time), key.offset);
+            for (auto& candidate : candidates) {
+                s = fs->open_file(candidate, &file_reader);
+                if (s.ok()) {
+                    break; // success with one of there candidates
+                }
             }
-            std::string file_old_format = 
get_path_in_local_cache_old_ttl_format(
-                    get_path_in_local_cache(key.hash, 
key.meta.expiration_time), key.offset,
-                    key.meta.type);
-            if (config::translate_to_new_ttl_format_during_read) {
-                // try to rename the file with old ttl format to new and retry
-                VLOG(7) << "try to rename the file with old ttl format to new 
and retry"
-                        << " oldformat=" << file_old_format << " original=" << 
file;
-                RETURN_IF_ERROR(fs->rename(file_old_format, file));
-                RETURN_IF_ERROR(fs->open_file(file, &file_reader));
-            } else {
-                // try to open the file with old ttl format
-                VLOG(7) << "try to open the file with old ttl format"
-                        << " oldformat=" << file_old_format << " original=" << 
file;
-                RETURN_IF_ERROR(fs->open_file(file_old_format, &file_reader));
+            if (!s.ok()) { // still not found, return error
+                LOG(WARNING) << "open file failed, file=" << file << ", 
error=" << s.to_string();
+                return s;
             }
-        }
+        } // else, s.ok() means open file success
+
         FDCache::instance()->insert_file_reader(fd_key, file_reader);
     }
     size_t bytes_read = 0;
-    RETURN_IF_ERROR(file_reader->read_at(value_offset, buffer, &bytes_read));
+    auto s = file_reader->read_at(value_offset, buffer, &bytes_read);
+    if (!s.ok()) {
+        LOG(WARNING) << "read file failed, file=" << file_reader->path()
+                     << ", error=" << s.to_string();
+        return s;
+    }
     DCHECK(bytes_read == buffer.get_size());
     return Status::OK();
 }
@@ -270,6 +276,17 @@ std::string 
FSFileCacheStorage::get_path_in_local_cache_old_ttl_format(const std
     return Path(dir) / (std::to_string(offset) + 
BlockFileCache::cache_type_to_string(type));
 }
 
+std::vector<std::string> 
FSFileCacheStorage::get_path_in_local_cache_all_candidates(
+        const std::string& dir, size_t offset) {
+    std::vector<std::string> candidates;
+    std::string base = get_path_in_local_cache(dir, offset, 
FileCacheType::NORMAL);
+    candidates.push_back(base);
+    candidates.push_back(base + "_idx");
+    candidates.push_back(base + "_ttl");
+    candidates.push_back(base + "_disposable");
+    return candidates;
+}
+
 std::string FSFileCacheStorage::get_path_in_local_cache(const UInt128Wrapper& 
value,
                                                         uint64_t 
expiration_time) const {
     auto str = value.to_string();
diff --git a/be/src/io/cache/fs_file_cache_storage.h 
b/be/src/io/cache/fs_file_cache_storage.h
index fb3490bcfe0..8a97aa109ad 100644
--- a/be/src/io/cache/fs_file_cache_storage.h
+++ b/be/src/io/cache/fs_file_cache_storage.h
@@ -102,6 +102,9 @@ private:
 
     void load_cache_info_into_memory(BlockFileCache* _mgr) const;
 
+    [[nodiscard]] std::vector<std::string> 
get_path_in_local_cache_all_candidates(
+            const std::string& dir, size_t offset);
+
     std::string _cache_base_path;
     std::thread _cache_background_load_thread;
     const std::shared_ptr<LocalFileSystem>& fs = global_local_filesystem();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to