This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 132df2d5876 [Improvement](inverted index) Remove the check for
inverted index file exists (#36929)
132df2d5876 is described below
commit 132df2d587692e2b4ff58a8837e57b84ddc9deec
Author: Sun Chenyang <[email protected]>
AuthorDate: Fri Jun 28 12:10:48 2024 +0800
[Improvement](inverted index) Remove the check for inverted index file
exists (#36929)
## Proposed changes
backport #36945
---
be/src/clucene | 2 +-
be/src/olap/olap_common.h | 1 -
.../rowset/segment_v2/inverted_index_cache.cpp | 19 ++++----
.../inverted_index_compound_directory.cpp | 45 +++++++------------
.../segment_v2/inverted_index_compound_reader.h | 2 +-
.../rowset/segment_v2/inverted_index_reader.cpp | 51 +++++++++-------------
be/src/vec/exec/scan/new_olap_scan_node.cpp | 2 -
be/src/vec/exec/scan/new_olap_scan_node.h | 1 -
be/src/vec/exec/scan/new_olap_scanner.cpp | 2 -
9 files changed, 48 insertions(+), 77 deletions(-)
diff --git a/be/src/clucene b/be/src/clucene
index a28adab869f..51f15724f5b 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit a28adab869f1397aefd7c3636d977c406613617d
+Subproject commit 51f15724f5bdb7ae5f6f5e5d7072d43a5bda63f8
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index e5f029d2a29..811e77590f9 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -357,7 +357,6 @@ struct OlapReaderStatistics {
int64_t inverted_index_query_timer = 0;
int64_t inverted_index_query_cache_hit = 0;
int64_t inverted_index_query_cache_miss = 0;
- int64_t inverted_index_query_file_exists_timer = 0;
int64_t inverted_index_query_null_bitmap_timer = 0;
int64_t inverted_index_query_bitmap_copy_timer = 0;
int64_t inverted_index_query_bitmap_op_timer = 0;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp
index 035e28efabd..41e3b134a32 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp
@@ -136,18 +136,17 @@ Status
InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr&
std::unique_ptr<MemTracker>(new
MemTracker("InvertedIndexSearcherCacheWithRead"));
#ifndef BE_TEST
{
- bool exists = false;
- {
- SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer);
- RETURN_IF_ERROR(fs->exists(file_path, &exists));
- }
- if (!exists) {
- return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
- "inverted index path: {} not exist.", file_path);
- }
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer);
SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get());
- index_searcher = build_index_searcher(fs, index_dir, file_name);
+ try {
+ index_searcher = build_index_searcher(fs, index_dir, file_name);
+ } catch (CLuceneError& err) {
+ if (err.number() == CL_ERR_FileNotFound) {
+ return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
+ "inverted index path: {} not exist.", file_path);
+ }
+ throw err;
+ }
}
#endif
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp
index 0eb7e31a027..1c1857f2ac2 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_directory.cpp
@@ -266,30 +266,26 @@ bool DorisCompoundDirectory::FSIndexInput::open(const
io::FileSystemSPtr& fs, co
io::FileBlockCachePathPolicy cache_policy;
auto type = config::enable_file_cache ? config::file_cache_type : "";
io::FileReaderOptions reader_options(io::cache_type_from_string(type),
cache_policy);
- if (!fs->open_file(fd, reader_options, &h->_reader).ok()) {
- error.set(CL_ERR_IO, "open file error");
+ auto st = fs->open_file(fd, reader_options, &h->_reader);
+ if (st.is_not_found()) {
+ error.set(CL_ERR_FileNotFound, "File does not exist");
+ } else if (st.is_io_error()) {
+ error.set(CL_ERR_IO, "File open io error");
+ } else if (st.code() == ErrorCode::PERMISSION_DENIED) {
+ error.set(CL_ERR_IO, "File Access denied");
+ } else {
+ error.set(CL_ERR_IO, "Could not open file");
}
//Check if a valid handle was retrieved
- if (h->_reader) {
+ if (st.ok() && h->_reader) {
//Store the file length
h->_length = h->_reader->size();
h->_fpos = 0;
ret = _CLNEW FSIndexInput(h, buffer_size);
return true;
-
- } else {
- int err = errno;
- if (err == ENOENT) {
- error.set(CL_ERR_IO, "File does not exist");
- } else if (err == EACCES) {
- error.set(CL_ERR_IO, "File Access denied");
- } else if (err == EMFILE) {
- error.set(CL_ERR_IO, "Too many open files");
- } else {
- error.set(CL_ERR_IO, "Could not open file");
- }
}
+
delete h->_shared_lock;
_CLDECDELETE(h)
return false;
@@ -532,19 +528,6 @@ void DorisCompoundDirectory::init(const
io::FileSystemSPtr& _fs, const char* _pa
}
lucene::store::Directory::setLockFactory(lock_factory);
-
- // It's fail checking directory existence in S3.
- if (fs->type() == io::FileSystemType::S3) {
- return;
- }
- bool exists = false;
- LOG_AND_THROW_IF_ERROR(fs->exists(directory, &exists),
- "Doris compound directory init IO error");
- if (!exists) {
- auto e = "Doris compound directory init error: " + directory + " is
not a directory";
- LOG(WARNING) << e;
- _CLTHROWA(CL_ERR_IO, e.c_str());
- }
}
void DorisCompoundDirectory::priv_getFN(char* buffer, const char* name) const {
@@ -616,7 +599,11 @@ int64_t DorisCompoundDirectory::fileLength(const char*
name) const {
char buffer[CL_MAX_DIR];
priv_getFN(buffer, name);
int64_t size = -1;
- LOG_AND_THROW_IF_ERROR(fs->file_size(buffer, &size), "Get file size IO
error");
+ auto st = fs->file_size(buffer, &size);
+ if (st.is_not_found()) {
+ _CLTHROWA(CL_ERR_FileNotFound, "File does not exist");
+ }
+ LOG_AND_THROW_IF_ERROR(st, "Get file size IO error");
return size;
}
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h
b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h
index c084141c656..d9daadb2b04 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.h
@@ -54,7 +54,7 @@ private:
lucene::store::RAMDirectory* ram_dir;
std::string directory;
std::string file_name;
- CL_NS(store)::IndexInput* stream;
+ CL_NS(store)::IndexInput* stream = nullptr;
using EntriesType =
lucene::util::CLHashMap<char*, ReaderFileEntry*,
lucene::util::Compare::Char,
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 7db21a96c65..b6b8f9c0441 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -99,13 +99,6 @@ bool
InvertedIndexReader::_is_match_query(InvertedIndexQueryType query_type) {
query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY);
}
-bool InvertedIndexReader::indexExists(io::Path& index_file_path) {
- // SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer);
- bool exists = false;
- RETURN_IF_ERROR(_fs->exists(index_file_path, &exists));
- return exists;
-}
-
std::unique_ptr<lucene::analysis::Analyzer>
InvertedIndexReader::create_analyzer(
InvertedIndexCtx* inverted_index_ctx) {
std::unique_ptr<lucene::analysis::Analyzer> analyzer;
@@ -210,21 +203,18 @@ Status
InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats,
return Status::OK();
}
- bool exists = false;
- {
- SCOPED_RAW_TIMER(&stats->inverted_index_query_file_exists_timer);
- RETURN_IF_ERROR(_fs->exists(index_file_path, &exists));
- }
- if (!exists) {
- LOG(WARNING) << "inverted index: " << index_file_path.native() <<
" not exist.";
- return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
- "inverted index path: {} not exist.",
index_file_path.native());
- }
-
if (!dir) {
- dir = new DorisCompoundReader(
- DorisCompoundDirectoryFactory::getDirectory(_fs,
index_dir.c_str()),
- index_file_name.c_str(),
config::inverted_index_read_buffer_size);
+ try {
+ dir = new DorisCompoundReader(
+ DorisCompoundDirectoryFactory::getDirectory(_fs,
index_dir.c_str()),
+ index_file_name.c_str(),
config::inverted_index_read_buffer_size);
+ } catch (CLuceneError& err) {
+ if (err.number() == CL_ERR_FileNotFound) {
+ return
Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
+ "inverted index path: {} not exist.",
index_file_path.native());
+ }
+ throw err;
+ }
owned_dir = true;
}
@@ -763,17 +753,18 @@ BkdIndexReader::BkdIndexReader(io::FileSystemSPtr fs,
const std::string& path,
auto index_dir = io_path.parent_path();
auto index_file_name =
InvertedIndexDescriptor::get_index_file_name(io_path.filename(),
index_meta->index_id());
-
- // check index file existence
auto index_file = index_dir / index_file_name;
- if (!indexExists(index_file)) {
- LOG(WARNING) << "bkd index: " << index_file.string() << " not exist.";
- return;
- }
_file_full_path = index_file;
- _compoundReader = std::make_unique<DorisCompoundReader>(
- DorisCompoundDirectoryFactory::getDirectory(fs, index_dir.c_str()),
- index_file_name.c_str(), config::inverted_index_read_buffer_size);
+ try {
+ _compoundReader = std::make_unique<DorisCompoundReader>(
+ DorisCompoundDirectoryFactory::getDirectory(fs,
index_dir.c_str()),
+ index_file_name.c_str(),
config::inverted_index_read_buffer_size);
+ } catch (CLuceneError& err) {
+ if (err.number() == CL_ERR_FileNotFound) {
+ LOG(WARNING) << "bkd index: " << index_file.string() << " not
exist.";
+ return;
+ }
+ }
}
Status BkdIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState*
runtime_state,
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp
b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index 61147cc77a8..98a2371fa68 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -173,8 +173,6 @@ Status NewOlapScanNode::_init_profile() {
_inverted_index_query_timer = ADD_TIMER(_segment_profile,
"InvertedIndexQueryTime");
_inverted_index_query_null_bitmap_timer =
ADD_TIMER(_segment_profile, "InvertedIndexQueryNullBitmapTime");
- _inverted_index_query_file_exists_timer =
- ADD_TIMER(_segment_profile, "InvertedIndexQueryFileExistsTime");
_inverted_index_query_bitmap_copy_timer =
ADD_TIMER(_segment_profile, "InvertedIndexQueryBitmapCopyTime");
_inverted_index_query_bitmap_op_timer =
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h
b/be/src/vec/exec/scan/new_olap_scan_node.h
index 35b4290ecfa..a8e0f6dde7e 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.h
+++ b/be/src/vec/exec/scan/new_olap_scan_node.h
@@ -191,7 +191,6 @@ private:
RuntimeProfile::Counter* _inverted_index_query_cache_hit_counter = nullptr;
RuntimeProfile::Counter* _inverted_index_query_cache_miss_counter =
nullptr;
RuntimeProfile::Counter* _inverted_index_query_timer = nullptr;
- RuntimeProfile::Counter* _inverted_index_query_file_exists_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_query_null_bitmap_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_query_bitmap_copy_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_query_bitmap_op_timer = nullptr;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index f80802cc2f7..ddecaccd195 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -587,8 +587,6 @@ void NewOlapScanner::_update_counters_before_close() {
COUNTER_UPDATE(olap_parent->_inverted_index_query_cache_miss_counter,
stats.inverted_index_query_cache_miss);
COUNTER_UPDATE(olap_parent->_inverted_index_query_timer,
stats.inverted_index_query_timer);
- COUNTER_UPDATE(olap_parent->_inverted_index_query_file_exists_timer,
- stats.inverted_index_query_file_exists_timer);
COUNTER_UPDATE(olap_parent->_inverted_index_query_null_bitmap_timer,
stats.inverted_index_query_null_bitmap_timer);
COUNTER_UPDATE(olap_parent->_inverted_index_query_bitmap_copy_timer,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]