This is an automated email from the ASF dual-hosted git repository.
jermy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph.git
The following commit(s) were added to refs/heads/master by this push:
new 88a8d782b improve analyzer code (#1990)
88a8d782b is described below
commit 88a8d782b88e7e4e11a6d9a783c982d24fe8fcc6
Author: Jade Peng <[email protected]>
AuthorDate: Wed Oct 26 22:57:41 2022 +0800
improve analyzer code (#1990)
Co-authored-by: jadepeng <[email protected]>
---
.../main/java/com/baidu/hugegraph/StandardHugeGraph.java | 2 +-
.../java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java | 2 +-
.../java/com/baidu/hugegraph/analyzer/AnsjAnalyzer.java | 2 +-
.../java/com/baidu/hugegraph/analyzer/HanLPAnalyzer.java | 4 ++--
.../main/java/com/baidu/hugegraph/analyzer/IKAnalyzer.java | 4 ++--
.../java/com/baidu/hugegraph/analyzer/JiebaAnalyzer.java | 2 +-
.../java/com/baidu/hugegraph/analyzer/MMSeg4JAnalyzer.java | 6 +++---
.../java/com/baidu/hugegraph/analyzer/SmartCNAnalyzer.java | 6 +-----
.../java/com/baidu/hugegraph/analyzer/WordAnalyzer.java | 2 +-
.../java/com/baidu/hugegraph/unit/core/AnalyzerTest.java | 13 +++++--------
10 files changed, 18 insertions(+), 25 deletions(-)
diff --git
a/hugegraph-core/src/main/java/com/baidu/hugegraph/StandardHugeGraph.java
b/hugegraph-core/src/main/java/com/baidu/hugegraph/StandardHugeGraph.java
index 5e2c23acf..53baaa780 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/StandardHugeGraph.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/StandardHugeGraph.java
@@ -1395,7 +1395,7 @@ public class StandardHugeGraph implements HugeGraph {
private void setOpened() {
// The backend tx may be reused, here just set a flag
- assert this.opened.get() == false;
+ assert !this.opened.get();
this.opened.set(true);
this.transactions.get().openedTime(DateUtil.now().getTime());
this.refs.incrementAndGet();
diff --git
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java
index c8d05e6f4..fbd043be2 100644
---
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java
+++
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnalyzerFactory.java
@@ -27,7 +27,7 @@ import
com.baidu.hugegraph.backend.serializer.SerializerFactory;
public class AnalyzerFactory {
- private static Map<String, Class<? extends Analyzer>> analyzers;
+ private static final Map<String, Class<? extends Analyzer>> analyzers;
static {
analyzers = new ConcurrentHashMap<>();
diff --git
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnsjAnalyzer.java
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnsjAnalyzer.java
index 4ef09cfce..179cc97e1 100644
---
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnsjAnalyzer.java
+++
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/AnsjAnalyzer.java
@@ -45,7 +45,7 @@ public class AnsjAnalyzer implements Analyzer {
"NlpAnalysis"
);
- private String analysis;
+ private final String analysis;
public AnsjAnalyzer(String mode) {
if (!SUPPORT_MODES.contains(mode)) {
diff --git
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/HanLPAnalyzer.java
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/HanLPAnalyzer.java
index 36de2ae98..2e282065d 100644
---
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/HanLPAnalyzer.java
+++
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/HanLPAnalyzer.java
@@ -58,7 +58,7 @@ public class HanLPAnalyzer implements Analyzer {
.enablePlaceRecognize(true)
.enableOrganizationRecognize(true);
- private String tokenizer;
+ private final String tokenizer;
public HanLPAnalyzer(String mode) {
if (!SUPPORT_MODES.contains(mode)) {
@@ -71,7 +71,7 @@ public class HanLPAnalyzer implements Analyzer {
@Override
public Set<String> segment(String text) {
- List<Term> terms = null;
+ List<Term> terms;
switch (this.tokenizer) {
case "standard":
terms = StandardTokenizer.segment(text);
diff --git
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/IKAnalyzer.java
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/IKAnalyzer.java
index acb577c8d..6cf2c0ff0 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/IKAnalyzer.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/IKAnalyzer.java
@@ -41,7 +41,7 @@ public class IKAnalyzer implements Analyzer {
"max_word"
);
- private boolean smartSegMode;
+ private final boolean smartSegMode;
public IKAnalyzer(String mode) {
if (!SUPPORT_MODES.contains(mode)) {
@@ -58,7 +58,7 @@ public class IKAnalyzer implements Analyzer {
IKSegmenter ik = new IKSegmenter(new StringReader(text),
this.smartSegMode);
try {
- Lexeme word = null;
+ Lexeme word;
while ((word = ik.next()) != null) {
result.add(word.getLexemeText());
}
diff --git
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/JiebaAnalyzer.java
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/JiebaAnalyzer.java
index 6deb8015c..0e0a72b4c 100644
---
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/JiebaAnalyzer.java
+++
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/JiebaAnalyzer.java
@@ -40,7 +40,7 @@ public class JiebaAnalyzer implements Analyzer {
private static final JiebaSegmenter JIEBA_SEGMENTER = new JiebaSegmenter();
- private JiebaSegmenter.SegMode segMode;
+ private final JiebaSegmenter.SegMode segMode;
public JiebaAnalyzer(String mode) {
if (!SUPPORT_MODES.contains(mode)) {
diff --git
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/MMSeg4JAnalyzer.java
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/MMSeg4JAnalyzer.java
index 42c4924e6..1bc63f1ad 100644
---
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/MMSeg4JAnalyzer.java
+++
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/MMSeg4JAnalyzer.java
@@ -48,7 +48,7 @@ public class MMSeg4JAnalyzer implements Analyzer {
private static final Dictionary DIC = Dictionary.getInstance();
- private Seg seg;
+ private final Seg seg;
public MMSeg4JAnalyzer(String mode) {
if (!SUPPORT_MODES.contains(mode)) {
@@ -69,7 +69,7 @@ public class MMSeg4JAnalyzer implements Analyzer {
break;
default:
throw new AssertionError(String.format(
- "Unsupported segment mode '%s'", this.seg));
+ "Unsupported segment mode '%s'", mode));
}
}
@@ -78,7 +78,7 @@ public class MMSeg4JAnalyzer implements Analyzer {
Set<String> result = InsertionOrderUtil.newSet();
MMSeg mmSeg = new MMSeg(new StringReader(text), this.seg);
try {
- Word word = null;
+ Word word;
while ((word = mmSeg.next()) != null) {
result.add(word.getString());
}
diff --git
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/SmartCNAnalyzer.java
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/SmartCNAnalyzer.java
index 9b0dc699c..ad5a07962 100644
---
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/SmartCNAnalyzer.java
+++
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/SmartCNAnalyzer.java
@@ -21,7 +21,6 @@ package com.baidu.hugegraph.analyzer;
import java.io.Reader;
import java.io.StringReader;
-import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.TokenStream;
@@ -30,15 +29,12 @@ import
org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import com.baidu.hugegraph.HugeException;
import com.baidu.hugegraph.util.InsertionOrderUtil;
-import com.google.common.collect.ImmutableList;
/**
* Reference from https://my.oschina.net/apdplat/blog/412921
*/
public class SmartCNAnalyzer implements Analyzer {
- public static final List<String> SUPPORT_MODES = ImmutableList.of();
-
private static final SmartChineseAnalyzer ANALYZER =
new SmartChineseAnalyzer();
@@ -52,7 +48,7 @@ public class SmartCNAnalyzer implements Analyzer {
Reader reader = new StringReader(text);
try (TokenStream tokenStream = ANALYZER.tokenStream("text", reader)) {
tokenStream.reset();
- CharTermAttribute term = null;
+ CharTermAttribute term;
while (tokenStream.incrementToken()) {
term = tokenStream.getAttribute(CharTermAttribute.class);
result.add(term.toString());
diff --git
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java
index 3e86d3f98..48ba66d95 100644
---
a/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java
+++
b/hugegraph-core/src/main/java/com/baidu/hugegraph/analyzer/WordAnalyzer.java
@@ -50,7 +50,7 @@ public class WordAnalyzer implements Analyzer {
.add("PureEnglish")
.build();
- private SegmentationAlgorithm algorithm;
+ private final SegmentationAlgorithm algorithm;
public WordAnalyzer(String mode) {
try {
diff --git
a/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java
b/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java
index e72f3ce9b..76273460a 100644
---
a/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java
+++
b/hugegraph-test/src/main/java/com/baidu/hugegraph/unit/core/AnalyzerTest.java
@@ -19,6 +19,7 @@
package com.baidu.hugegraph.unit.core;
+import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
@@ -32,9 +33,9 @@ import com.baidu.hugegraph.testutil.Assert;
public class AnalyzerTest {
- private static String text1 = "England wins World Cup";
- private static String text2 = "英格兰世界杯夺冠,中华人民共和国国歌," +
- "百度科技园位于北京市海淀区西北旺东路10号院";
+ private static final String text1 = "England wins World Cup";
+ private static final String text2 = "英格兰世界杯夺冠,中华人民共和国国歌," +
+ "百度科技园位于北京市海淀区西北旺东路10号院";
@Before
public void setup() {
@@ -216,11 +217,7 @@ public class AnalyzerTest {
}
private static Set<String> setOf(String... elems) {
- Set<String> result = new HashSet<>();
- for (String elem : elems) {
- result.add(elem);
- }
- return result;
+ return new HashSet<>(Arrays.asList(elems));
}
}