This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a change to branch auto-pick-54479-branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
from 8031a86464b branch-3.1: [fix](variant) change session var for
disable_variant_flatten_nested to enable_variant_flatten_nested #54413 (#54438)
add cd5212c6f08 [feature](inverted index) add char group tokenizer (#54479)
No new revisions were added by this update.
Summary of changes:
.../inverted_index/analysis_factory_mgr.cpp | 3 +
.../char/char_group_tokenizer_factory.cpp | 158 +++++++++++
.../char_group_tokenizer_factory.h} | 34 +--
.../tokenizer/char/char_tokenizer.cpp | 93 +++++++
.../char_tokenizer.h} | 35 +--
.../tokenizer/keyword/keyword_tokenizer.h | 2 -
.../char_group_tokenizer_factory_test.cpp | 292 +++++++++++++++++++++
.../indexpolicy/CharGroupTokenizerValidator.java | 92 +++++++
.../apache/doris/indexpolicy/IndexPolicyMgr.java | 3 +
.../doris/indexpolicy/PolicyValidatorTests.java | 20 ++
.../analyzer/test_char_group_tokenizer.out | Bin 0 -> 999 bytes
.../analyzer/test_char_group_tokenizer.groovy | 131 +++++++++
12 files changed, 831 insertions(+), 32 deletions(-)
create mode 100644
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/char/char_group_tokenizer_factory.cpp
copy
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/{standard/standard_tokenizer_factory.h
=> char/char_group_tokenizer_factory.h} (55%)
create mode 100644
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/char/char_tokenizer.cpp
copy
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/{keyword/keyword_tokenizer_factory.h
=> char/char_tokenizer.h} (57%)
create mode 100644
be/test/olap/rowset/segment_v2/inverted_index/tokenizer/char_group_tokenizer_factory_test.cpp
create mode 100644
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/CharGroupTokenizerValidator.java
create mode 100644
regression-test/data/inverted_index_p0/analyzer/test_char_group_tokenizer.out
create mode 100644
regression-test/suites/inverted_index_p0/analyzer/test_char_group_tokenizer.groovy
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]