This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 973bc9c0310 branch-3.1: [fix](inverted index) Fix compilation warnings
#53359 (#53550)
973bc9c0310 is described below
commit 973bc9c0310dea3ab1ea1aa2d6d973354dc56813
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jul 18 23:19:06 2025 +0800
branch-3.1: [fix](inverted index) Fix compilation warnings #53359 (#53550)
Cherry-picked from #53359
Co-authored-by: zzzxl <[email protected]>
---
.../inverted_index/analyzer/basic/basic_tokenizer.cpp | 10 ++++++----
.../inverted_index/analyzer/ik/IKTokenizer.cpp | 6 ++++--
.../inverted_index/analyzer/ik/core/AnalyzeContext.cpp | 7 +++++--
.../inverted_index/analyzer/ik/core/CJKSegmenter.cpp | 3 +++
.../analyzer/ik/core/CN_QuantifierSegmenter.cpp | 9 ++++++---
.../inverted_index/analyzer/ik/core/CharacterUtil.h | 6 +++++-
.../inverted_index/analyzer/ik/core/IKSegmenter.cpp | 7 +++++--
.../inverted_index/analyzer/ik/core/IKSegmenter.h | 2 +-
.../inverted_index/analyzer/ik/core/LetterSegmenter.cpp | 17 ++++++++++-------
.../inverted_index/analyzer/ik/dic/Dictionary.cpp | 6 +++---
.../inverted_index/analyzer/ik/dic/Dictionary.h | 2 +-
.../inverted_index/query/conjunction_query.cpp | 2 ++
.../inverted_index/query/phrase_prefix_query.cpp | 2 ++
.../inverted_index/query/phrase_prefix_query.h | 2 +-
.../inverted_index/query/phrase_query/phrase_matcher.h | 4 +++-
.../query/phrase_query/sloppy_phrase_matcher.cpp | 12 +++++++-----
.../segment_v2/inverted_index/query/regexp_query.cpp | 10 ++++++----
.../token_filter/ascii_folding_filter.cpp | 8 +++++---
.../token_filter/word_delimiter_filter.cpp | 6 ++++--
.../token_filter/word_delimiter_filter_factory.h | 8 +++++---
.../tokenizer/keyword/keyword_tokenizer.h | 4 +++-
.../inverted_index/tokenizer/ngram/ngram_tokenizer.cpp | 6 ++++--
.../inverted_index/tokenizer/ngram/ngram_tokenizer.h | 4 +++-
.../tokenizer/standard/standard_tokenizer_impl.h | 5 ++++-
.../segment_v2/inverted_index/util/docid_set_iterator.h | 2 ++
.../segment_v2/inverted_index/util/fixed_bit_set.h | 10 ++++++----
.../segment_v2/inverted_index/util/mock_iterator.h | 2 +-
.../segment_v2/inverted_index/util/term_iterator.h | 2 +-
.../inverted_index/util/union_term_iterator.h | 4 ++--
29 files changed, 110 insertions(+), 58 deletions(-)
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_tokenizer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_tokenizer.cpp
index f1afba794d1..0679fdbdd26 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_tokenizer.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/basic/basic_tokenizer.cpp
@@ -20,6 +20,7 @@
#include <unicode/unistr.h>
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
#define IS_IN_RANGE(c, start, end) ((uint32_t)((c) - (start)) <= ((end) -
(start)))
@@ -45,7 +46,7 @@ Token* BasicTokenizer::next(Token* token) {
std::string_view& token_text = _tokens_text[_buffer_index++];
size_t size = std::min(token_text.size(),
static_cast<size_t>(LUCENE_MAX_WORD_LEN));
- token->setNoCopy(token_text.data(), 0, size);
+ token->setNoCopy(token_text.data(), 0, static_cast<int32_t>(size));
return token;
}
@@ -55,18 +56,18 @@ void BasicTokenizer::reset(lucene::util::Reader* reader) {
_tokens_text.clear();
_buffer.resize(reader->size());
- int32_t numRead = reader->readCopy(_buffer.data(), 0, _buffer.size());
+ size_t numRead = reader->readCopy(_buffer.data(), 0,
static_cast<int32_t>(_buffer.size()));
(void)numRead;
assert(_buffer.size() == numRead);
cut();
- _data_len = _tokens_text.size();
+ _data_len = static_cast<int32_t>(_tokens_text.size());
}
void BasicTokenizer::cut() {
auto* s = (uint8_t*)_buffer.data();
- int32_t length = _buffer.size();
+ auto length = static_cast<int32_t>(_buffer.size());
for (int32_t i = 0; i < length;) {
uint8_t firstByte = s[i];
@@ -104,4 +105,5 @@ void BasicTokenizer::cut() {
}
}
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/IKTokenizer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/IKTokenizer.cpp
index 72b906fa4e1..2355f099b82 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/IKTokenizer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/IKTokenizer.cpp
@@ -18,6 +18,7 @@
#include "IKTokenizer.h"
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
IKTokenizer::IKTokenizer(std::shared_ptr<Configuration> config, bool
lower_case, bool own_reader) {
this->lowercase = lower_case;
@@ -36,7 +37,7 @@ Token* IKTokenizer::next(Token* token) {
// TODO(ryan19929): do regularizeString in fillBuffer.
CharacterUtil::regularizeString(token_text, this->lowercase);
size_t size = std::min(token_text.size(),
static_cast<size_t>(LUCENE_MAX_WORD_LEN));
- token->setNoCopy(token_text.data(), 0, size);
+ token->setNoCopy(token_text.data(), 0, static_cast<int32_t>(size));
return token;
}
@@ -60,7 +61,8 @@ void IKTokenizer::reset(lucene::util::Reader* reader) {
_CLTHROWT(CL_ERR_Runtime,
("Uncaught exception in IKTokenizer: " +
std::string(e.what())).c_str());
}
- data_length_ = tokens_text_.size();
+ data_length_ = static_cast<int32_t>(tokens_text_.size());
}
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/AnalyzeContext.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/AnalyzeContext.cpp
index 3356210f20c..d1bfaf07e15 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/AnalyzeContext.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/AnalyzeContext.cpp
@@ -18,6 +18,7 @@
#include "AnalyzeContext.h"
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
AnalyzeContext::AnalyzeContext(IKMemoryPool<Cell>& pool,
std::shared_ptr<Configuration> config)
: segment_buff_(),
@@ -76,8 +77,8 @@ size_t AnalyzeContext::fillBuffer(lucene::util::Reader*
reader) {
std::memmove(segment_buff_.data(),
segment_buff_.data() +
typed_runes_[cursor_].getNextBytePosition(),
offset);
- readCount = std::max(
- 0, reader->readCopy(segment_buff_.data() + offset, 0,
BUFF_SIZE - offset));
+ readCount = std::max(0, reader->readCopy(segment_buff_.data()
+ offset, 0,
+
static_cast<int32_t>(BUFF_SIZE - offset)));
readCount += offset;
} else {
readCount = std::max(0, reader->readCopy(segment_buff_.data(),
0, BUFF_SIZE));
@@ -293,4 +294,6 @@ void AnalyzeContext::outputSingleCJK(size_t index) {
index, index);
}
}
+
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CJKSegmenter.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CJKSegmenter.cpp
index 217df37eca8..b7bf7973db3 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CJKSegmenter.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CJKSegmenter.cpp
@@ -18,6 +18,7 @@
#include "CJKSegmenter.h"
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
CJKSegmenter::CJKSegmenter() = default;
@@ -79,4 +80,6 @@ void CJKSegmenter::analyze(AnalyzeContext& context) {
void CJKSegmenter::reset() {
tmp_hits_.clear();
}
+
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CN_QuantifierSegmenter.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CN_QuantifierSegmenter.cpp
index cd6def585df..009ae4a20c8 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CN_QuantifierSegmenter.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CN_QuantifierSegmenter.cpp
@@ -18,6 +18,7 @@
#include "CN_QuantifierSegmenter.h"
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
const std::u32string CN_QuantifierSegmenter::CHINESE_NUMBERS =
U"一二两三四五六七八九十零壹贰叁肆伍陆柒捌玖拾百千万亿拾佰仟萬億兆卅廿";
@@ -58,15 +59,15 @@ void CN_QuantifierSegmenter::processCNumber(AnalyzeContext&
context) {
if (CharacterUtil::CHAR_CHINESE == context.getCurrentCharType() &&
CHINESE_NUMBER_CHARS.find(currentChar) !=
CHINESE_NUMBER_CHARS.end()) {
// Record the starting and ending positions of numeral words.
- number_start_ = context.getCursor();
- number_end_ = context.getCursor();
+ number_start_ = static_cast<int32_t>(context.getCursor());
+ number_end_ = static_cast<int32_t>(context.getCursor());
}
} else {
// Processing status
if (CharacterUtil::CHAR_CHINESE == context.getCurrentCharType() &&
CHINESE_NUMBER_CHARS.find(context.getCurrentChar()) !=
CHINESE_NUMBER_CHARS.end()) {
// Record the end position of numeral words
- number_end_ = context.getCursor();
+ number_end_ = static_cast<int32_t>(context.getCursor());
} else {
// Output numeral
outputNumLexeme(context);
@@ -161,4 +162,6 @@ void
CN_QuantifierSegmenter::outputNumLexeme(AnalyzeContext& context) {
context.addLexeme(newLexeme);
}
}
+
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CharacterUtil.h
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CharacterUtil.h
index c60f8bb30ce..a43ba05b630 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CharacterUtil.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/CharacterUtil.h
@@ -27,6 +27,7 @@
#include "CLucene/analysis/jieba/Unicode.hpp"
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
class CharacterUtil {
public:
@@ -47,7 +48,9 @@ public:
TypedRune() : RuneStr(), char_type(0) {}
TypedRune(int32_t in_rune, size_t in_offset, size_t in_len, size_t
in_unicode_offset,
size_t in_unicode_length)
- : RuneStr(in_rune, in_offset, in_len, in_unicode_offset,
in_unicode_length),
+ : RuneStr(in_rune, static_cast<uint32_t>(in_offset),
static_cast<uint32_t>(in_len),
+ static_cast<uint32_t>(in_unicode_offset),
+ static_cast<uint32_t>(in_unicode_length)),
char_type(CharacterUtil::identifyCharType(rune)) {}
void init(const RuneStr& runeStr) {
@@ -85,4 +88,5 @@ public:
static void regularizeString(std::string& input, bool use_lowercase =
true);
};
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKSegmenter.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKSegmenter.cpp
index 674f22dfd2d..48c740c7d3e 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKSegmenter.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKSegmenter.cpp
@@ -18,6 +18,7 @@
#include "IKSegmenter.h"
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
constexpr size_t DEFAULT_MEMORY_POOL_SIZE = 512;
@@ -40,7 +41,7 @@ std::vector<std::unique_ptr<ISegmenter>>
IKSegmenter::loadSegmenters() {
bool IKSegmenter::next(Lexeme& lexeme) {
while (!context_->getNextLexeme(lexeme)) {
// Read data from the reader and fill the buffer
- int available = context_->fillBuffer(input_);
+ auto available = static_cast<int32_t>(context_->fillBuffer(input_));
if (available <= 0) {
context_->reset();
return false;
@@ -74,7 +75,9 @@ void IKSegmenter::reset(lucene::util::Reader* newInput) {
}
}
-int IKSegmenter::getLastUselessCharNum() {
+size_t IKSegmenter::getLastUselessCharNum() {
return context_->getLastUselessCharNum();
}
+
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKSegmenter.h
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKSegmenter.h
index 33defbbe31a..87975377d4d 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKSegmenter.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/IKSegmenter.h
@@ -36,7 +36,7 @@ public:
IKSegmenter(std::shared_ptr<Configuration> config);
bool next(Lexeme& lexeme);
void reset(lucene::util::Reader* newInput);
- int getLastUselessCharNum();
+ size_t getLastUselessCharNum();
private:
std::vector<std::unique_ptr<ISegmenter>> loadSegmenters();
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/LetterSegmenter.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/LetterSegmenter.cpp
index c593a1ec63d..ef1394bfce9 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/LetterSegmenter.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/core/LetterSegmenter.cpp
@@ -18,6 +18,7 @@
#include "LetterSegmenter.h"
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
LetterSegmenter::LetterSegmenter()
: letter_connectors_ {'#', '&', '+', '-', '.', '@', '_'},
num_connectors_ {',', '.'} {
@@ -57,14 +58,14 @@ bool LetterSegmenter::processEnglishLetter(AnalyzeContext&
context) {
// The current tokenizer has not yet started processing English
characters
if (context.getCurrentCharType() == CharacterUtil::CHAR_ENGLISH) {
// Record the starting pointer position, indicate that the
tokenizer enters the processing state
- english_start_ = context.getCursor();
+ english_start_ = static_cast<int32_t>(context.getCursor());
english_end_ = english_start_;
}
} else {
// The current tokenizer is processing English characters
if (context.getCurrentCharType() == CharacterUtil::CHAR_ENGLISH) {
// Record the current pointer position as the end position
- english_end_ = context.getCursor();
+ english_end_ = static_cast<int32_t>(context.getCursor());
} else {
// Encounter non-English characters, output tokens
Lexeme newLexeme =
@@ -98,14 +99,14 @@ bool LetterSegmenter::processArabicLetter(AnalyzeContext&
context) {
// The current tokenizer has not yet started processing numeric
characters
if (context.getCurrentCharType() == CharacterUtil::CHAR_ARABIC) {
// Record the starting pointer position, indicate that the
tokenizer enters the processing state
- arabic_start_ = context.getCursor();
+ arabic_start_ = static_cast<int32_t>(context.getCursor());
arabic_end_ = arabic_start_;
}
} else {
// The current tokenizer is processing numeric characters
if (context.getCurrentCharType() == CharacterUtil::CHAR_ARABIC) {
// Record the current pointer position as the end position
- arabic_end_ = context.getCursor();
+ arabic_end_ = static_cast<int32_t>(context.getCursor());
} else if (context.getCurrentCharType() == CharacterUtil::CHAR_USELESS
&&
isNumConnector(context.getCurrentChar())) {
// Do not output numbers, but do not mark the end
@@ -141,7 +142,7 @@ bool LetterSegmenter::processMixLetter(AnalyzeContext&
context) {
// The current tokenizer has not yet started processing characters.
if (context.getCurrentCharType() == CharacterUtil::CHAR_ARABIC ||
context.getCurrentCharType() == CharacterUtil::CHAR_ENGLISH) {
- start_ = context.getCursor();
+ start_ = static_cast<int32_t>(context.getCursor());
end_ = start_;
}
} else {
@@ -149,11 +150,11 @@ bool LetterSegmenter::processMixLetter(AnalyzeContext&
context) {
if (context.getCurrentCharType() == CharacterUtil::CHAR_ARABIC ||
context.getCurrentCharType() == CharacterUtil::CHAR_ENGLISH) {
// Record the possible end positions
- end_ = context.getCursor();
+ end_ = static_cast<int32_t>(context.getCursor());
} else if (context.getCurrentCharType() == CharacterUtil::CHAR_USELESS
&&
isLetterConnector(context.getCurrentChar())) {
// Record the possible end positions
- end_ = context.getCursor();
+ end_ = static_cast<int32_t>(context.getCursor());
} else {
// Encounter non-letter characters, output a token
Lexeme newLexeme = createLexeme(context, start_, end_,
Lexeme::Type::Letter);
@@ -197,4 +198,6 @@ Lexeme LetterSegmenter::createLexeme(AnalyzeContext&
context, int start, int end
typed_runes[end].getNextBytePosition() -
typed_runes[start].getBytePosition(),
type, start, end);
}
+
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/dic/Dictionary.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/dic/Dictionary.cpp
index 86c648101d3..e0abb60adaf 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/dic/Dictionary.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/dic/Dictionary.cpp
@@ -151,9 +151,9 @@ Hit Dictionary::matchInQuantifierDict(const
CharacterUtil::TypedRuneArray& typed
return result;
}
-void Dictionary::matchWithHit(const CharacterUtil::TypedRuneArray&
typed_runes, int current_index,
- Hit& hit) {
- if (auto matchedSegment = hit.getMatchedDictSegment()) {
+void Dictionary::matchWithHit(const CharacterUtil::TypedRuneArray& typed_runes,
+ size_t current_index, Hit& hit) {
+ if (auto* matchedSegment = hit.getMatchedDictSegment()) {
matchedSegment->match(typed_runes, current_index, 1, hit);
return;
}
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/dic/Dictionary.h
b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/dic/Dictionary.h
index ed48f7bf1ad..fc4dd3d6924 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/dic/Dictionary.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/analyzer/ik/dic/Dictionary.h
@@ -149,7 +149,7 @@ public:
size_t length);
Hit matchInQuantifierDict(const CharacterUtil::TypedRuneArray& typed_runes,
size_t unicode_offset, size_t length);
- void matchWithHit(const CharacterUtil::TypedRuneArray& typed_runes, int
current_index,
+ void matchWithHit(const CharacterUtil::TypedRuneArray& typed_runes, size_t
current_index,
Hit& hit);
bool isStopWord(const CharacterUtil::TypedRuneArray& typed_runes, size_t
unicode_offset,
size_t length);
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp
index ba7790a3ed5..a6b63f6edc5 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp
@@ -18,6 +18,7 @@
#include "conjunction_query.h"
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
ConjunctionQuery::ConjunctionQuery(const
std::shared_ptr<lucene::search::IndexSearcher>& searcher,
const TQueryOptions& query_options, const
io::IOContext* io_ctx)
@@ -155,4 +156,5 @@ int32_t ConjunctionQuery::do_next(int32_t doc) {
}
}
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
index 2b2531587cb..01b71cb24ee 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp
@@ -20,6 +20,7 @@
#include "olap/rowset/segment_v2/inverted_index/query/query.h"
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
PhrasePrefixQuery::PhrasePrefixQuery(const
std::shared_ptr<lucene::search::IndexSearcher>& searcher,
const TQueryOptions& query_options,
@@ -76,4 +77,5 @@ void PhrasePrefixQuery::search(roaring::Roaring& roaring) {
}
}
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h
b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h
index 2320de2ac72..75478bec0d7 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h
@@ -36,7 +36,7 @@ public:
private:
std::shared_ptr<lucene::search::IndexSearcher> _searcher;
- int32_t _term_size = 0;
+ size_t _term_size = 0;
int32_t _max_expansions = 50;
PhraseQuery _phrase_query;
PrefixQuery _prefix_query;
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query/phrase_matcher.h
b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query/phrase_matcher.h
index ebf19c3af9e..7e8ef995bd9 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query/phrase_matcher.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query/phrase_matcher.h
@@ -20,6 +20,7 @@
#include "olap/rowset/segment_v2/inverted_index/util/docid_set_iterator.h"
namespace doris::segment_v2::inverted_index {
+#include "common/compile_check_begin.h"
class PostingsAndFreq {
public:
@@ -34,7 +35,7 @@ public:
DISI _postings;
int32_t _position = 0;
std::vector<std::string> _terms;
- int32_t _n_terms = 0;
+ size_t _n_terms = 0;
};
class PostingsAndPosition {
@@ -63,4 +64,5 @@ private:
const Derived* derived() const { return static_cast<const Derived*>(this);
}
};
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query/sloppy_phrase_matcher.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query/sloppy_phrase_matcher.cpp
index 5c441f588ed..342e04b1490 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query/sloppy_phrase_matcher.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query/sloppy_phrase_matcher.cpp
@@ -18,9 +18,10 @@
#include
"olap/rowset/segment_v2/inverted_index/query/phrase_query/sloppy_phrase_matcher.h"
namespace doris::segment_v2::inverted_index {
+#include "common/compile_check_begin.h"
SloppyPhraseMatcher::SloppyPhraseMatcher(const std::vector<PostingsAndFreq>&
postings, int32_t slop)
- : _slop(slop), _num_postings(postings.size()) {
+ : _slop(slop), _num_postings(static_cast<int32_t>(postings.size())) {
_pq = std::make_unique<PhraseQueue>(postings.size());
_phrase_positions.resize(postings.size());
for (size_t i = 0; i < postings.size(); i++) {
@@ -83,7 +84,7 @@ bool SloppyPhraseMatcher::advance_rpts(PhrasePositions* pp) {
return true;
}
const auto& rg = _rpt_groups[pp->_rpt_group];
- FixedBitSet bits(rg.size());
+ FixedBitSet bits(static_cast<int32_t>(rg.size()));
int32_t k0 = pp->_rpt_ind;
int32_t k = 0;
while ((k = collide(pp)) >= 0) {
@@ -187,7 +188,7 @@ LinkedHashMap<std::string, int32_t>
SloppyPhraseMatcher::repeating_terms() {
for (const auto& t : pp->_terms) {
tcnt[t]++;
if (tcnt[t] == 2) {
- tord.insert(t, tord.size());
+ tord.insert(t, static_cast<int32_t>(tord.size()));
}
}
}
@@ -220,7 +221,7 @@ std::vector<std::vector<PhrasePositions*>>
SloppyPhraseMatcher::gather_rpt_group
}
int32_t g = pp->_rpt_group;
if (g < 0) {
- g = res.size();
+ g = static_cast<int32_t>(res.size());
pp->_rpt_group = g;
std::vector<PhrasePositions*> rl;
rl.reserve(2);
@@ -262,7 +263,7 @@ void
SloppyPhraseMatcher::sort_rpt_groups(std::vector<std::vector<PhrasePosition
});
_rpt_groups[i] = rg;
for (size_t j = 0; j < _rpt_groups[i].size(); ++j) {
- _rpt_groups[i][j]->_rpt_ind = j;
+ _rpt_groups[i][j]->_rpt_ind = static_cast<int32_t>(j);
}
}
}
@@ -314,4 +315,5 @@ bool SloppyPhraseMatcher::init_complex() {
return true;
}
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp
index 5e665d0f532..6829bef3c21 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp
@@ -25,6 +25,7 @@
#include "util/debug_points.h"
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
RegexpQuery::RegexpQuery(const std::shared_ptr<lucene::search::IndexSearcher>&
searcher,
const TQueryOptions& query_options, const
io::IOContext* io_ctx)
@@ -129,8 +130,8 @@ void RegexpQuery::collect_matching_terms(const
std::wstring& field_name,
try {
if (prefix) {
std::wstring ws_prefix = StringUtil::string_to_wstring(*prefix);
- Term prefix(field_name.c_str(), ws_prefix.c_str());
- enumerator = _searcher->getReader()->terms(&prefix, _io_ctx);
+ Term prefix_term(field_name.c_str(), ws_prefix.c_str());
+ enumerator = _searcher->getReader()->terms(&prefix_term, _io_ctx);
} else {
enumerator = _searcher->getReader()->terms(nullptr, _io_ctx);
enumerator->next();
@@ -147,8 +148,8 @@ void RegexpQuery::collect_matching_terms(const
std::wstring& field_name,
}
bool is_match = false;
- if (hs_scan(database, input.data(), input.size(), 0, scratch,
on_match,
- (void*)&is_match) != HS_SUCCESS) {
+ if (hs_scan(database, input.data(),
static_cast<uint32_t>(input.size()), 0, scratch,
+ on_match, (void*)&is_match) != HS_SUCCESS) {
LOG(ERROR) << "hyperscan match failed: " << input;
break;
}
@@ -174,4 +175,5 @@ void RegexpQuery::collect_matching_terms(const
std::wstring& field_name,
})
}
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/token_filter/ascii_folding_filter.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/token_filter/ascii_folding_filter.cpp
index b11a45b2d2d..26ff26d2821 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/token_filter/ascii_folding_filter.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/token_filter/ascii_folding_filter.cpp
@@ -21,6 +21,7 @@
#include <string_view>
namespace doris::segment_v2::inverted_index {
+#include "common/compile_check_begin.h"
ASCIIFoldingFilter::ASCIIFoldingFilter(const TokenStreamPtr& in, bool
preserve_original)
: DorisTokenFilter(in), _preserve_original(preserve_original),
_output(512, 0) {}
@@ -34,7 +35,7 @@ Token* ASCIIFoldingFilter::next(Token* t) {
}
if (_in->next(t)) {
const char* buffer = t->termBuffer<char>();
- int32_t length = t->termLength<char>();
+ auto length = static_cast<int32_t>(t->termLength<char>());
for (int32_t i = 0; i < length;) {
UChar32 c = U_UNASSIGNED;
U8_NEXT(buffer, i, length, c);
@@ -96,7 +97,7 @@ int32_t ASCIIFoldingFilter::fold_to_ascii(const char* in,
int32_t input_pos, cha
// Quick test: if it's not in range then just keep current character
if (c < 0x0080) {
- out[output_pos++] = c;
+ out[output_pos++] = static_cast<char>(c);
} else {
switch (c) {
case 0x00C0: // À [LATIN CAPITAL LETTER A WITH GRAVE]
@@ -2007,7 +2008,7 @@ int32_t ASCIIFoldingFilter::fold_to_ascii(const char* in,
int32_t input_pos, cha
out[output_pos++] = '~';
break;
default: {
- for (int32_t i = prev_pos; i < pos; i++) {
+ for (size_t i = prev_pos; i < pos; i++) {
out[output_pos++] = in[i];
}
} break;
@@ -2017,4 +2018,5 @@ int32_t ASCIIFoldingFilter::fold_to_ascii(const char* in,
int32_t input_pos, cha
return output_pos;
}
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter.cpp
index 2e700b4f5b9..ea37bd7fd5f 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter.cpp
@@ -24,6 +24,7 @@
#include "olap/rowset/segment_v2/inverted_index/token_filter/token_filter.h"
namespace doris::segment_v2::inverted_index {
+#include "common/compile_check_begin.h"
WordDelimiterFilter::WordDelimiterFilter(const TokenStreamPtr& in,
std::vector<char> char_type_table,
@@ -48,11 +49,11 @@ Token* WordDelimiterFilter::next(Token* t) {
}
// todo: has(IGNORE_KEYWORDS)
char* term_buffer = t->termBuffer<char>();
- int32_t term_length = t->termLength<char>();
+ auto term_length = static_cast<int32_t>(t->termLength<char>());
std::string_view term(term_buffer, term_length);
_accum_pos_inc += get_position_increment(t);
- _iterator->set_text(term.data(), term.size());
+ _iterator->set_text(term.data(),
static_cast<int32_t>(term.size()));
_iterator->next();
if ((_iterator->_current == 0 && _iterator->_end == term_length) ||
@@ -246,4 +247,5 @@ bool WordDelimiterFilter::should_generate_parts(int32_t
word_type) {
(has(GENERATE_NUMBER_PARTS) && is_digit(word_type));
}
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_factory.h
b/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_factory.h
index 659fc4a03c7..a125150bcad 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_factory.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_factory.h
@@ -22,6 +22,7 @@
#include "word_delimiter_filter.h"
namespace doris::segment_v2::inverted_index {
+#include "common/compile_check_begin.h"
class WordDelimiterFilterFactory : public TokenFilterFactory {
friend class WordDelimiterFilterFactoryTest;
@@ -115,7 +116,7 @@ public:
WordDelimiterIterator::DEFAULT_WORD_DELIM_TABLE.size());
std::vector<char> types(table_size, 0);
for (size_t i = 0; i < types.size(); ++i) {
- types[i] = WordDelimiterIterator::get_type(i);
+ types[i] =
WordDelimiterIterator::get_type(static_cast<int32_t>(i));
}
for (const auto& mapping : type_map) {
types[mapping.first] = mapping.second;
@@ -144,8 +145,8 @@ private:
static std::string parse_string(const std::string& s) {
std::string out;
- int32_t len = s.length();
- int32_t read_pos = 0;
+ size_t len = s.length();
+ size_t read_pos = 0;
while (read_pos < len) {
char c = s[read_pos++];
if (c == '\\') {
@@ -198,4 +199,5 @@ private:
std::unordered_set<std::string> _protected_words;
};
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/keyword/keyword_tokenizer.h
b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/keyword/keyword_tokenizer.h
index 90b85f63ac2..dbe05087ca7 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/keyword/keyword_tokenizer.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/keyword/keyword_tokenizer.h
@@ -22,6 +22,7 @@
using namespace lucene::analysis;
namespace doris::segment_v2::inverted_index {
+#include "common/compile_check_begin.h"
class KeywordTokenizer : public DorisTokenizer {
public:
@@ -56,7 +57,7 @@ public:
DorisTokenizer::reset();
_done = false;
_char_buffer = nullptr;
- _char_length = _in->read((const void**)&_char_buffer, 0, _in->size());
+ _char_length = _in->read((const void**)&_char_buffer, 0,
static_cast<int32_t>(_in->size()));
}
static constexpr int32_t DEFAULT_BUFFER_SIZE = 256;
@@ -70,4 +71,5 @@ private:
int32_t _char_length = 0;
};
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer.cpp
index d73c1764559..361ea69e3d6 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer.cpp
@@ -20,6 +20,7 @@
#include "common/exception.h"
namespace doris::segment_v2::inverted_index {
+#include "common/compile_check_begin.h"
NGramTokenizer::NGramTokenizer(int32_t min_gram, int32_t max_gram, bool
edges_only) {
init(min_gram, max_gram, edges_only);
@@ -82,7 +83,7 @@ Token* NGramTokenizer::next(Token* token) {
void NGramTokenizer::reset() {
DorisTokenizer::reset();
- _buffer_start = _buffer_end = _buffer.size();
+ _buffer_start = _buffer_end = static_cast<int32_t>(_buffer.size());
_last_non_token_char = _last_checked_char = _buffer_start - 1;
_offset = 0;
_gram_size = _min_gram;
@@ -90,7 +91,7 @@ void NGramTokenizer::reset() {
_char_buffer = nullptr;
_char_offset = 0;
- _char_length = _in->read((const void**)&_char_buffer, 0, _in->size());
+ _char_length = _in->read((const void**)&_char_buffer, 0,
static_cast<int32_t>(_in->size()));
}
void NGramTokenizer::init(int32_t min_gram, int32_t max_gram, bool edges_only)
{
@@ -145,4 +146,5 @@ void NGramTokenizer::to_chars(const std::vector<UChar32>&
buffer, int32_t start,
unistr.toUTF8String(_utf8_buffer);
}
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2::inverted_index
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer.h
b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer.h
index cae24a61242..39651e4697a 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer.h
@@ -25,6 +25,7 @@
using namespace lucene::analysis;
namespace doris::segment_v2::inverted_index {
+#include "common/compile_check_begin.h"
class NGramTokenizer : public DorisTokenizer {
public:
@@ -44,7 +45,7 @@ private:
void update_last_non_token_char();
void consume() {
- uint8_t c = _buffer[_buffer_start++];
+ auto c = static_cast<uint8_t>(_buffer[_buffer_start++]);
_offset += U8_LENGTH(c);
}
@@ -75,4 +76,5 @@ private:
std::string _utf8_buffer;
};
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer_impl.h
b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer_impl.h
index 7301b5943d4..b707e9add33 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer_impl.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer_impl.h
@@ -28,6 +28,7 @@
#include "CLucene/analysis/AnalysisHeader.h"
namespace doris::segment_v2::inverted_index {
+#include "common/compile_check_begin.h"
class StandardTokenizerImpl {
public:
@@ -203,7 +204,8 @@ private:
_zz_start_read = 0;
}
- int32_t requested = _zz_buffer.size() - _zz_end_read -
_zz_final_partial_char;
+ int32_t requested =
+ static_cast<int32_t>(_zz_buffer.size()) - _zz_end_read -
_zz_final_partial_char;
if (requested == 0) {
return true;
}
@@ -296,4 +298,5 @@ private:
};
using StandardTokenizerImplPtr = std::unique_ptr<StandardTokenizerImpl>;
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/util/docid_set_iterator.h
b/be/src/olap/rowset/segment_v2/inverted_index/util/docid_set_iterator.h
index 8c3678aee64..7ebda6cbc82 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/util/docid_set_iterator.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/util/docid_set_iterator.h
@@ -24,6 +24,7 @@
#include "union_term_iterator.h"
namespace doris::segment_v2 {
+#include "common/compile_check_begin.h"
using DISI = std::variant<TermPositionsIterPtr, UnionTermIterPtr, MockIterPtr>;
@@ -78,4 +79,5 @@ struct NextPosition {
}
};
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/util/fixed_bit_set.h
b/be/src/olap/rowset/segment_v2/inverted_index/util/fixed_bit_set.h
index aeb7672a197..deb6a504866 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/util/fixed_bit_set.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/util/fixed_bit_set.h
@@ -24,6 +24,7 @@
#include "common/exception.h"
namespace doris::segment_v2::inverted_index {
+#include "common/compile_check_begin.h"
class FixedBitSet;
using FixedBitSetPtr = std::unique_ptr<FixedBitSet>;
@@ -34,7 +35,7 @@ public:
assert(num_bits >= 0 && num_bits <
std::numeric_limits<int32_t>::max());
_num_bits = num_bits;
_bits.resize(bits2words(_num_bits));
- _num_words = _bits.size();
+ _num_words = static_cast<int32_t>(_bits.size());
}
void clear() { std::fill(_bits.begin(), _bits.end(), 0ULL); }
@@ -90,13 +91,13 @@ public:
_bits[end_word] &= end_mask;
}
- void ensure_capacity(size_t num_bits) {
+ void ensure_capacity(int32_t num_bits) {
if (num_bits >= _num_bits) {
- size_t num_words = bits2words(num_bits);
+ int32_t num_words = bits2words(num_bits);
if (num_words >= _bits.size()) {
_bits.resize(num_words + 1, 0);
}
- reset(_bits.size() << 6);
+ reset(static_cast<int32_t>(_bits.size()) << 6);
}
}
@@ -190,4 +191,5 @@ private:
int32_t _num_words = 0;
};
+#include "common/compile_check_end.h"
} // namespace doris::segment_v2::inverted_index
\ No newline at end of file
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/util/mock_iterator.h
b/be/src/olap/rowset/segment_v2/inverted_index/util/mock_iterator.h
index ac16ec8dc3e..ec825caa8b3 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/util/mock_iterator.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/util/mock_iterator.h
@@ -71,7 +71,7 @@ public:
return INT_MAX;
}
- int64_t doc_freq() const { return _impl->postings.size(); }
+ int32_t doc_freq() const { return
static_cast<int32_t>(_impl->postings.size()); }
int32_t next_position() {
auto& current_doc = _impl->current_doc;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index/util/term_iterator.h
b/be/src/olap/rowset/segment_v2/inverted_index/util/term_iterator.h
index baf0984243b..4b18a132fb6 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/util/term_iterator.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/util/term_iterator.h
@@ -71,7 +71,7 @@ public:
return INT_MAX;
}
- int64_t doc_freq() const { return term_docs_->docFreq(); }
+ int32_t doc_freq() const { return term_docs_->docFreq(); }
bool read_range(DocRange* docRange) const { return
term_docs_->readRange(docRange); }
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/util/union_term_iterator.h
b/be/src/olap/rowset/segment_v2/inverted_index/util/union_term_iterator.h
index 8a9e2e61e4d..a82f34a08bf 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/util/union_term_iterator.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/util/union_term_iterator.h
@@ -139,10 +139,10 @@ public:
return top->doc_id();
}
- int64_t doc_freq() const { return _cost; }
+ int32_t doc_freq() const { return _cost; }
private:
- int64_t _cost = 0;
+ int32_t _cost = 0;
int32_t pos_queue_doc = -2;
DocsQueuePtr<T> _docs_queue;
PositionsQueuePtr _pos_queue;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]