EvanLjp commented on a change in pull request #6249: URL: https://github.com/apache/skywalking/pull/6249#discussion_r563307684
########## File path: oap-server/server-storage-plugin/storage-elasticsearch-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/elasticsearch/base/AnalyzerSetting.java ########## @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.oap.server.storage.plugin.elasticsearch.base; + +import com.google.gson.annotations.SerializedName; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +public class AnalyzerSetting { + /** + * A built-in or customised tokenizer. + */ + private Map<String, Object> tokenizer = new HashMap<>(); + /** + * An optional array of built-in or customised character filters. + */ + @SerializedName("char_filter") + private Map<String, Object> charFilter = new HashMap<>(); + /** + * An optional array of built-in or customised token filters. + */ + private Map<String, Object> filter = new HashMap<>(); + /** + * The custom analyzers. + */ + private Map<String, Object> analyzer = new HashMap<>(); + + public void combine(AnalyzerSetting analyzerSetting) { Review comment:  one index setting has multi analyzers ########## File path: oap-server/server-storage-plugin/storage-elasticsearch-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/elasticsearch/base/AnalyzerSetting.java ########## @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.oap.server.storage.plugin.elasticsearch.base; + +import com.google.gson.annotations.SerializedName; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +public class AnalyzerSetting { + /** + * A built-in or customised tokenizer. + */ + private Map<String, Object> tokenizer = new HashMap<>(); + /** + * An optional array of built-in or customised character filters. + */ + @SerializedName("char_filter") + private Map<String, Object> charFilter = new HashMap<>(); + /** + * An optional array of built-in or customised token filters. + */ + private Map<String, Object> filter = new HashMap<>(); + /** + * The custom analyzers. + */ + private Map<String, Object> analyzer = new HashMap<>(); + + public void combine(AnalyzerSetting analyzerSetting) { Review comment:  one index setting has multi analyzers ########## File path: oap-server/server-storage-plugin/storage-elasticsearch-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/elasticsearch/StorageModuleElasticsearchConfig.java ########## @@ -20,6 +20,7 @@ import lombok.Getter; import lombok.Setter; +import org.apache.skywalking.oap.server.core.storage.annotation.Column; Review comment: it's used in link, may not understand your suggestion to modify this.  ########## File path: oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/storage/annotation/Column.java ########## @@ -87,6 +86,27 @@ */ ValueDataType dataType() default ValueDataType.NOT_VALUE; + /** + * The storage analyzer mode. + * + * @since 8.4.0 + */ + AnalyzerType analyzer() default AnalyzerType.OAP_ANALYZER; + + /** + * The analyzer declares the text analysis mode. + */ + enum AnalyzerType { Review comment: fixed ########## File path: oap-server/server-storage-plugin/storage-elasticsearch-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/elasticsearch/base/StorageEsInstaller.java ########## @@ -135,17 +155,19 @@ protected void createTable(Model model) throws StorageException { String matchCName = MatchCNameBuilder.INSTANCE.build(columnDefine.getColumnName().getName()); Map<String, Object> originalColumn = new HashMap<>(); - originalColumn.put("type", columnTypeEsMapping.transform(columnDefine.getType(), columnDefine.getGenericType())); + originalColumn.put( + "type", columnTypeEsMapping.transform(columnDefine.getType(), columnDefine.getGenericType())); originalColumn.put("copy_to", matchCName); properties.put(columnDefine.getColumnName().getName(), originalColumn); Map<String, Object> matchColumn = new HashMap<>(); matchColumn.put("type", "text"); - matchColumn.put("analyzer", "oap_analyzer"); + matchColumn.put("analyzer", columnDefine.getAnalyzer().name().toLowerCase()); Review comment: fixed ########## File path: oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/storage/annotation/Column.java ########## @@ -62,17 +62,16 @@ * @return the length of this column, this is only for {@link String} column. The usage of this depends on the * storage implementation. * - * Notice, different lengths may cause different types. - * Such as, over 16383 would make the type in MySQL to be MEDIUMTEXT, due to database varchar max=16383 - * + * Notice, different lengths may cause different types. Such as, over 16383 would make the type in MySQL to be + * MEDIUMTEXT, due to database varchar max=16383 * @since 7.1.0 */ int length() default 200; /** * The return name of system environment could provide an override value of the length limitation. - * @return the variable name of system environment. * + * @return the variable name of system environment. Review comment: fixed ########## File path: oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/storage/annotation/Column.java ########## @@ -87,6 +86,27 @@ */ ValueDataType dataType() default ValueDataType.NOT_VALUE; + /** + * The storage analyzer mode. + * + * @since 8.4.0 + */ + AnalyzerType analyzer() default AnalyzerType.OAP_ANALYZER; + + /** + * The analyzer declares the text analysis mode. + */ + enum AnalyzerType { Review comment: I would add a functional interface to refactor it. ########## File path: oap-server/server-storage-plugin/storage-elasticsearch-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/elasticsearch/base/StorageEsInstaller.java ########## @@ -113,14 +115,32 @@ protected void createTable(Model model) throws StorageException { setting.put("index.refresh_interval", model.isRecord() ? TimeValue.timeValueSeconds(10).toString() : TimeValue.timeValueSeconds(config.getFlushInterval()).toString()); - setting.put("analysis.analyzer.oap_analyzer.type", "stop"); + setting.put("analysis", getAnalyzerSetting(model.getAnalyzer())); Review comment: Analyzer is for columns, so it should be for column configuration, and setting needs to define the configuration of multiple columns in combination at the same time, so I think the current structure is clear. this is a test example: ```json { "log" : { "order" : 0, "index_patterns" : [ "log-*" ], "settings" : { "index" : { "refresh_interval" : "10s", "analysis" : { "filter" : { "cjk_log_stop_filter" : { "type" : "stop", "stopwords" : "_none_" }, "cjk_log_bigram_filter" : { "output_unigrams" : "true", "type" : "cjk_bigram" }, "cjk_log_word_delimiter" : { "type" : "word_delimiter", "preserve_original" : "true", "split_on_numerics" : "false" } }, "analyzer" : { "oap_log_analyzer" : { "filter" : [ "cjk_width", "lowercase", "cjk_log_bigram_filter", "cjk_log_word_delimiter", "cjk_log_stop_filter" ], "type" : "custom", "tokenizer" : "standard" }, "oap_analyzer" : { "type" : "stop" } } }, "number_of_shards" : "5", "number_of_replicas" : "0" } }, "mappings" : { "properties" : { "trace_id" : { "type" : "keyword" }, "unique_id" : { "type" : "keyword" }, "content_match" : { "analyzer" : "oap_log_analyzer", "type" : "text" }, "span_id" : { "type" : "integer" }, "endpoint_name" : { "copy_to" : "endpoint_name_match", "type" : "keyword" }, "endpoint_id" : { "type" : "keyword" }, "service_instance_id" : { "type" : "keyword" }, "content" : { "copy_to" : "content_match", "type" : "keyword" }, "endpoint_name_match" : { "analyzer" : "oap_analyzer", "type" : "text" }, "tags" : { "type" : "keyword" }, "trace_segment_id" : { "type" : "keyword" }, "content_type" : { "index" : false, "type" : "integer" }, "tags_raw_data" : { "index" : false, "type" : "binary" }, "service_id" : { "type" : "keyword" }, "time_bucket" : { "type" : "long" }, "is_error" : { "type" : "integer" }, "timestamp" : { "type" : "long" } } }, "aliases" : { "log" : { } } } } ``` ########## File path: oap-server/server-storage-plugin/storage-elasticsearch-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/elasticsearch/base/StorageEsInstaller.java ########## @@ -113,14 +115,32 @@ protected void createTable(Model model) throws StorageException { setting.put("index.refresh_interval", model.isRecord() ? TimeValue.timeValueSeconds(10).toString() : TimeValue.timeValueSeconds(config.getFlushInterval()).toString()); - setting.put("analysis.analyzer.oap_analyzer.type", "stop"); + setting.put("analysis", getAnalyzerSetting(model.getAnalyzer())); Review comment: Maybe only used in the log mode. So you want to differentiate based on type? That means we maybe need a new annotation. Is it a little heavier than now? ########## File path: oap-server/server-storage-plugin/storage-elasticsearch-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/elasticsearch/base/AnalyzerSetting.java ########## @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.oap.server.storage.plugin.elasticsearch.base; + +import com.google.gson.Gson; +import com.google.gson.annotations.SerializedName; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import lombok.Getter; +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; +import org.apache.skywalking.oap.server.core.storage.StorageException; +import org.apache.skywalking.oap.server.core.storage.annotation.Column; +import org.apache.skywalking.oap.server.storage.plugin.elasticsearch.StorageModuleElasticsearchConfig; + +@Getter +@Setter +@Slf4j +public class AnalyzerSetting { + /** + * A built-in or customised tokenizer. + */ + private Map<String, Object> tokenizer = new HashMap<>(); + /** + * An optional array of built-in or customised character filters. + */ + @SerializedName("char_filter") + private Map<String, Object> charFilter = new HashMap<>(); + /** + * An optional array of built-in or customised token filters. + */ + private Map<String, Object> filter = new HashMap<>(); + /** + * The custom analyzers. + */ + private Map<String, Object> analyzer = new HashMap<>(); + + public void combine(AnalyzerSetting analyzerSetting) { + this.analyzer.putAll(analyzerSetting.getAnalyzer()); + this.tokenizer.putAll(analyzerSetting.tokenizer); + this.filter.putAll(analyzerSetting.filter); + this.charFilter.putAll(analyzerSetting.charFilter); + } + + @Override + public boolean equals(final Object o) { + if (this == o) + return true; + if (!(o instanceof AnalyzerSetting)) + return false; + final AnalyzerSetting that = (AnalyzerSetting) o; + return getTokenizer().equals(that.getTokenizer()) && + getCharFilter().equals(that.getCharFilter()) && + getFilter().equals(that.getFilter()) && + getAnalyzer().equals(that.getAnalyzer()); + } + + @Override + public int hashCode() { + return Objects.hash(getTokenizer(), getCharFilter(), getFilter(), getAnalyzer()); + } + + public enum Generator { + OAP_ANALYZER_SETTING_GENERATOR( + Column.AnalyzerType.OAP_ANALYZER, + config -> new Gson().fromJson(config.getOapAnalyzer(), AnalyzerSetting.class) + ), + OAP_LOG_ANALYZER_SETTING_GENERATOR( + Column.AnalyzerType.OAP_LOG_ANALYZER, + config -> new Gson().fromJson(config.getOapLogAnalyzer(), AnalyzerSetting.class) + ); + + private final Column.AnalyzerType type; + private final GenerateAnalyzerSettingFunc func; + + Generator(final Column.AnalyzerType type, + final GenerateAnalyzerSettingFunc func) { + this.type = type; + this.func = func; + } + + public GenerateAnalyzerSettingFunc GetGenerateFunc() { Review comment: fixed, recently, keep writing golang, so..... ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
