This is an automated email from the ASF dual-hosted git repository.

airborne pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


    from 7631f2b8de5 [feature](partition prune) Add variable 
skip_prune_predicate to skip prune predicate after partition prune (#57688)
     add 94fe6aae389 [feature](inverted index) add custom analyzer support with 
pinyin tokenzer and pinyin filter (#57097)

No new revisions were added by this update.

Summary of changes:
 .licenserc.yaml                                    |      1 +
 be/CMakeLists.txt                                  |      5 +-
 be/dict/pinyin/pinyin.txt                          |  20902 +
 be/dict/pinyin/pinyin_alphabet.dict                |    442 +
 be/dict/pinyin/polyphone.txt                       | 462490 ++++++++++++++++++
 .../inverted_index/analysis_factory_mgr.cpp        |      6 +
 .../inverted_index/token_filter/pinyin_filter.cpp  |    441 +
 .../inverted_index/token_filter/pinyin_filter.h    |    105 +
 .../token_filter/pinyin_filter_factory.cpp         |     76 +
 .../token_filter/pinyin_filter_factory.h           |     41 +
 .../tokenizer/pinyin/chinese_util.cpp              |     42 +
 .../inverted_index/tokenizer/pinyin/chinese_util.h |     34 +
 .../tokenizer/pinyin/pinyin_alphabet_tokenizer.cpp |    224 +
 .../tokenizer/pinyin/pinyin_alphabet_tokenizer.h   |     49 +
 .../tokenizer/pinyin/pinyin_config.h               |     46 +
 .../tokenizer/pinyin/pinyin_format.cpp             |     70 +
 .../tokenizer/pinyin/pinyin_format.h               |     78 +
 .../tokenizer/pinyin/pinyin_formatter.cpp          |    214 +
 .../tokenizer/pinyin/pinyin_formatter.h            |     38 +
 .../tokenizer/pinyin/pinyin_tokenizer.cpp          |    339 +
 .../tokenizer/pinyin/pinyin_tokenizer.h            |     84 +
 .../tokenizer/pinyin/pinyin_tokenizer_factory.cpp  |     43 +
 .../tokenizer/pinyin/pinyin_tokenizer_factory.h    |     39 +
 .../tokenizer/pinyin/pinyin_util.cpp               |    292 +
 .../inverted_index/tokenizer/pinyin/pinyin_util.h  |     55 +
 .../inverted_index/tokenizer/pinyin/rune.h         |     43 +
 .../tokenizer/pinyin/smart_forest.cpp              |    195 +
 .../inverted_index/tokenizer/pinyin/smart_forest.h |    143 +
 .../tokenizer/pinyin/smart_get_word.cpp            |    219 +
 .../tokenizer/pinyin/smart_get_word.h              |     95 +
 .../inverted_index/tokenizer/pinyin/term_item.h    |     73 +
 .../token_filter/pinyin_filter_test.cpp            |    409 +
 .../tokenizer/pinyin_alphabet_tokenizer_test.cpp   |    248 +
 .../tokenizer/pinyin_analysis_test.cpp             |   1372 +
 .../inverted_index/tokenizer/pinyin_util_test.cpp  |    685 +
 .../inverted_index/tokenizer/smart_forest_test.cpp |    204 +
 .../tokenizer/smart_get_word_test.cpp              |    361 +
 .../org/apache/doris/indexpolicy/IndexPolicy.java  |      4 +-
 .../apache/doris/indexpolicy/IndexPolicyMgr.java   |      6 +
 .../indexpolicy/PinyinTokenFilterValidator.java    |    159 +
 .../indexpolicy/PinyinTokenizerValidator.java      |    157 +
 .../analyzer/test_custom_analyzer.out              |    174 +
 .../analyzer/test_custom_analyzer.groovy           |    347 +-
 43 files changed, 491046 insertions(+), 4 deletions(-)
 create mode 100644 be/dict/pinyin/pinyin.txt
 create mode 100644 be/dict/pinyin/pinyin_alphabet.dict
 create mode 100644 be/dict/pinyin/polyphone.txt
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/token_filter/pinyin_filter.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/token_filter/pinyin_filter.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/token_filter/pinyin_filter_factory.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/token_filter/pinyin_filter_factory.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/chinese_util.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/chinese_util.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_alphabet_tokenizer.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_alphabet_tokenizer.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_config.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_format.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_format.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_formatter.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_formatter.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_tokenizer.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_tokenizer.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_tokenizer_factory.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_tokenizer_factory.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_util.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/pinyin_util.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/rune.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/smart_forest.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/smart_forest.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/smart_get_word.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/smart_get_word.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin/term_item.h
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/token_filter/pinyin_filter_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin_alphabet_tokenizer_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin_analysis_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/tokenizer/pinyin_util_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/tokenizer/smart_forest_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/tokenizer/smart_get_word_test.cpp
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/PinyinTokenFilterValidator.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/PinyinTokenizerValidator.java


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to