This is an automated email from the ASF dual-hosted git repository.
airborne pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
from 270b56780a9 [fix](close) Release resource once fragment was cancelled
(#58909)
add 6a7e1e5f8c3 [feature](inverted index) Add inverted index NORMALIZER
support (#58178)
No new revisions were added by this update.
Summary of changes:
be/src/olap/inverted_index_parser.cpp | 16 +-
be/src/olap/inverted_index_parser.h | 8 +-
.../inverted_index/analysis_factory_mgr.cpp | 7 +
.../inverted_index/analyzer/analyzer.cpp | 6 +-
.../analyzer/custom_analyzer_config.h | 1 -
.../char_filter/icu_normalizer_char_filter.cpp | 91 +++++++
..._char_filter.h => icu_normalizer_char_filter.h} | 21 +-
.../icu_normalizer_char_filter_factory.h | 119 +++++++++
.../custom_normalizer.cpp} | 101 +++----
.../custom_normalizer.h} | 23 +-
.../normalizer/custom_normalizer_config.cpp | 49 ++++
.../custom_normalizer_config.h} | 40 +--
.../token_filter/icu_normalizer_filter.cpp | 72 +++++
.../icu_normalizer_filter.h} | 21 +-
.../token_filter/icu_normalizer_filter_factory.h | 105 ++++++++
.../rowset/segment_v2/inverted_index_writer.cpp | 2 +-
be/src/olap/tablet_schema.h | 3 +-
be/src/runtime/index_policy/index_policy_mgr.cpp | 82 ++++--
be/src/runtime/index_policy/index_policy_mgr.h | 21 +-
be/src/vec/exprs/vmatch_predicate.cpp | 2 +-
be/src/vec/functions/function_string.cpp | 1 +
be/src/vec/functions/function_string.h | 146 ++++++++++
be/src/vec/functions/function_tokenize.cpp | 5 +-
be/src/vec/functions/match.cpp | 6 +-
be/test/olap/inverted_index_parser_test.cpp | 17 ++
.../inverted_index/ananlyzer/analyzer_test.cpp | 16 +-
.../icu_normalizer_char_filter_factory_test.cpp | 268 +++++++++++++++++++
.../icu_normalizer_filter_factory_test.cpp | 295 +++++++++++++++++++++
.../segment_v2/inverted_index/util/reader_test.cpp | 2 +-
be/test/olap/tablet_schema_index_test.cpp | 6 +-
be/test/vec/function/function_match_test.cpp | 4 +-
be/test/vec/function/function_string_test.cpp | 91 +++++++
fe/fe-core/pom.xml | 5 +
.../antlr4/org/apache/doris/nereids/DorisLexer.g4 | 1 +
.../antlr4/org/apache/doris/nereids/DorisParser.g4 | 5 +
.../apache/doris/analysis/InvertedIndexUtil.java | 86 ++++--
.../org/apache/doris/analysis/MatchPredicate.java | 8 +-
.../doris/catalog/BuiltinScalarFunctions.java | 2 +
.../main/java/org/apache/doris/catalog/Index.java | 14 +-
.../ICUNormalizerCharFilterValidator.java | 75 ++++++
...java => ICUNormalizerTokenFilterValidator.java} | 43 +--
.../org/apache/doris/indexpolicy/IndexPolicy.java | 7 +-
.../apache/doris/indexpolicy/IndexPolicyMgr.java | 159 ++++++++---
.../doris/indexpolicy/IndexPolicyTypeEnum.java | 3 +-
.../doris/nereids/parser/LogicalPlanBuilder.java | 29 ++
.../{XpathString.java => UnicodeNormalize.java} | 27 +-
.../expressions/visitor/ScalarFunctionVisitor.java | 5 +
.../apache/doris/nereids/trees/plans/PlanType.java | 3 +
...mand.java => CreateIndexNormalizerCommand.java} | 42 ++-
...ommand.java => DropIndexNormalizerCommand.java} | 15 +-
...ommand.java => ShowIndexNormalizerCommand.java} | 15 +-
.../trees/plans/commands/info/IndexDefinition.java | 14 +-
.../trees/plans/visitor/CommandVisitor.java | 18 ++
gensrc/thrift/AgentService.thrift | 3 +-
gensrc/thrift/Exprs.thrift | 2 +-
.../analyzer/test_custom_normalizer.out | 15 ++
.../analyzer/test_unicode_normalize.out | 19 ++
.../analyzer/test_custom_normalizer.groovy | 131 +++++++++
.../analyzer/test_unicode_normalize.groovy | 79 ++++++
59 files changed, 2148 insertions(+), 324 deletions(-)
create mode 100644
be/src/olap/rowset/segment_v2/inverted_index/char_filter/icu_normalizer_char_filter.cpp
copy
be/src/olap/rowset/segment_v2/inverted_index/char_filter/{char_replace_char_filter.h
=> icu_normalizer_char_filter.h} (70%)
create mode 100644
be/src/olap/rowset/segment_v2/inverted_index/char_filter/icu_normalizer_char_filter_factory.h
copy
be/src/olap/rowset/segment_v2/inverted_index/{analyzer/custom_analyzer.cpp =>
normalizer/custom_normalizer.cpp} (51%)
copy be/src/olap/rowset/segment_v2/inverted_index/{analyzer/custom_analyzer.h
=> normalizer/custom_normalizer.h} (75%)
create mode 100644
be/src/olap/rowset/segment_v2/inverted_index/normalizer/custom_normalizer_config.cpp
copy
be/src/olap/rowset/segment_v2/inverted_index/{analyzer/custom_analyzer_config.h
=> normalizer/custom_normalizer_config.h} (59%)
create mode 100644
be/src/olap/rowset/segment_v2/inverted_index/token_filter/icu_normalizer_filter.cpp
copy
be/src/olap/rowset/segment_v2/inverted_index/{tokenizer/pinyin/pinyin_formatter.h
=> token_filter/icu_normalizer_filter.h} (62%)
create mode 100644
be/src/olap/rowset/segment_v2/inverted_index/token_filter/icu_normalizer_filter_factory.h
create mode 100644
be/test/olap/rowset/segment_v2/inverted_index/char_filter/icu_normalizer_char_filter_factory_test.cpp
create mode 100644
be/test/olap/rowset/segment_v2/inverted_index/token_filter/icu_normalizer_filter_factory_test.cpp
create mode 100644
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/ICUNormalizerCharFilterValidator.java
copy
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/{CharReplaceCharFilterValidator.java
=> ICUNormalizerTokenFilterValidator.java} (50%)
copy
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/{XpathString.java
=> UnicodeNormalize.java} (76%)
copy
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/{CreateIndexCharFilterCommand.java
=> CreateIndexNormalizerCommand.java} (66%)
copy
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/{DropIndexCharFilterCommand.java
=> DropIndexNormalizerCommand.java} (83%)
copy
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/{ShowIndexCharFilterCommand.java
=> ShowIndexNormalizerCommand.java} (84%)
create mode 100644
regression-test/data/inverted_index_p0/analyzer/test_custom_normalizer.out
create mode 100644
regression-test/data/inverted_index_p0/analyzer/test_unicode_normalize.out
create mode 100644
regression-test/suites/inverted_index_p0/analyzer/test_custom_normalizer.groovy
create mode 100644
regression-test/suites/inverted_index_p0/analyzer/test_unicode_normalize.groovy
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]