This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/master by this push:
new 76d263b Name constants according to JNC (#87)
76d263b is described below
commit 76d263b11ac7c85a40b2d8922de70cd5815591e0
Author: Martin Wiesner <[email protected]>
AuthorDate: Fri Feb 24 08:32:41 2023 +0100
Name constants according to JNC (#87)
- adjusts non-compliant naming of constant fields so that capitalized name
is used
- adjusts visibility of some fields to respect information hiding principle
- fixes incorrect indentation in some classes
- removes unused imports along the path
---
.../utils/cfg/ProbabilisticContextFreeGrammar.java | 30 +++---
.../tools/coref/mention/AbstractMentionFinder.java | 6 +-
.../opennlp/tools/coref/mention/DefaultParse.java | 12 +--
.../tools/coref/mention/JWNLDictionary.java | 4 +-
.../opennlp/tools/coref/mention/PTBHeadFinder.java | 18 ++--
.../tools/coref/resolver/MaxentResolver.java | 19 ++--
.../coref/resolver/PluralPronounResolver.java | 2 +-
.../tools/coref/resolver/ResolverUtils.java | 52 +++++-----
.../coref/resolver/SingularPronounResolver.java | 4 +-
.../coref/resolver/SpeechPronounResolver.java | 2 +-
.../java/opennlp/tools/coref/sim/GenderModel.java | 6 +-
.../java/opennlp/tools/coref/sim/NumberModel.java | 4 +-
.../formats/muc/MucMentionInserterStream.java | 4 +-
.../apps/object_dedup/SimilarityAccessorBase.java | 44 ++++-----
.../tools/apps/relevanceVocabs/SentimentVocab.java | 4 +-
.../tools/apps/utils/email/EmailSender.java | 14 +--
.../ClassifierTrainingSetIndexer.java | 24 ++---
.../tools/doc_classifier/DocClassifier.java | 37 +++----
...cClassifierTrainingSetMultilingualExtender.java | 9 +-
.../enron_email_recognizer/EmailNormalizer.java | 12 +--
.../EmailTrainingSetFormer.java | 17 ++--
.../main/java/opennlp/tools/nl2code/NL2Obj.java | 4 +-
.../opennlp/tools/nl2code/NL2ObjCreateAssign.java | 18 ++--
.../tools/parse_thicket/apps/WebPageExtractor.java | 4 +-
.../parse_thicket/matching/LemmaGeneralizer.java | 78 +++++++--------
.../tools/similarity/apps/ContentGenerator.java | 25 +++--
.../apps/GeneratedSentenceProcessor.java | 9 +-
.../similarity/apps/RelatedSentenceFinder.java | 18 ++--
.../similarity/apps/RelatedSentenceFinderML.java | 2 +-
.../similarity/apps/StoryDiscourseNavigator.java | 2 +-
.../tools/similarity/apps/YahooAnswersMiner.java | 12 +--
.../apps/solr/IterativeQueryComponent.java | 8 +-
.../apps/solr/NLProgram2CodeRequestHandler.java | 5 +-
.../solr/SearchResultsReRankerRequestHandler.java | 5 +-
.../apps/taxo_builder/DomainTaxonomyExtender.java | 10 +-
.../taxo_builder/TaxoQuerySnapshotMatcher.java | 5 -
.../taxo_builder/TaxonomyExtenderViaMebMining.java | 4 +-
.../tools/similarity/apps/utils/FileHandler.java | 1 -
.../tools/similarity/apps/utils/PageFetcher.java | 10 +-
.../opennlp/tools/similarity/apps/utils/Utils.java | 108 ++++++++++-----------
.../textsimilarity/ParseTreeChunkListScorer.java | 4 +-
.../textsimilarity/SentencePairMatchResult.java | 5 +-
.../tools/textsimilarity/TextProcessor.java | 4 +-
.../chunker2matcher/ParserCacheSerializer.java | 20 ++--
.../ParserChunker2MatcherProcessor.java | 5 +-
.../tools/disambiguator/FeaturesExtractor.java | 2 +-
.../disambiguator/IMSWSDContextGenerator.java | 2 +-
.../disambiguator/OSCCWSDContextGenerator.java | 2 +-
.../opennlp/tools/disambiguator/WSDHelper.java | 18 ++--
.../tools/disambiguator/WSDSampleStream.java | 6 +-
.../tools/disambiguator/WSDisambiguatorME.java | 8 +-
.../WordRelationshipDetermination.java | 4 +-
.../opennlp/summarization/meta/MetaSummarizer.java | 4 +-
.../preprocess/DefaultDocProcessor.java | 2 -
.../opennlp/summarization/textrank/TextRank.java | 4 +-
.../lexicalchaining/LexChainTest.java | 8 +-
.../LexChainingKeywordExtractorTest.java | 6 +-
57 files changed, 359 insertions(+), 397 deletions(-)
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
index 3a7d1d8..2a9d9e0 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
@@ -41,15 +41,15 @@ public class ProbabilisticContextFreeGrammar {
private final String startSymbol;
private final boolean randomExpansion;
- private static final Rule emptyRule = new Rule("EMPTY~", "");
+ private static final Rule EMPTY_RULE = new Rule("EMPTY~", "");
- private static final String nonTerminalMatcher =
"[\\w\\~\\*\\-\\.\\,\\'\\:\\_\\\"]";
- private static final String terminalMatcher =
"[\\*òàùìèé\\|\\w\\'\\.\\,\\:\\_Ù\\?È\\%\\;À\\-\\\"]";
+ private static final String NON_TERMINAL_MATCHER =
"[\\w\\~\\*\\-\\.\\,\\'\\:\\_\\\"]";
+ private static final String TERMINAL_MATCHER =
"[\\*òàùìèé\\|\\w\\'\\.\\,\\:\\_Ù\\?È\\%\\;À\\-\\\"]";
- private static final Pattern terminalPattern =
Pattern.compile("\\(("+nonTerminalMatcher+"+)\\s("+terminalMatcher+"+)\\)");
- private static final Pattern nonTerminalPattern = Pattern.compile(
- "\\(("+nonTerminalMatcher+"+)" + // source NT
-
"\\s("+nonTerminalMatcher+"+)((\\s"+nonTerminalMatcher+"+)*)\\)" // expansion
NTs
+ private static final Pattern TERMINAL_PATTERN = Pattern.compile("\\(("+
NON_TERMINAL_MATCHER +"+)\\s("+ TERMINAL_MATCHER +"+)\\)");
+ private static final Pattern NON_TERMINAL_PATTERN = Pattern.compile(
+ "\\(("+ NON_TERMINAL_MATCHER +"+)" + // source NT
+ "\\s("+ NON_TERMINAL_MATCHER +"+)((\\s"+
NON_TERMINAL_MATCHER +"+)*)\\)" // expansion NTs
);
public ProbabilisticContextFreeGrammar(Collection<String>
nonTerminalSymbols, Collection<String> terminalSymbols,
@@ -255,7 +255,7 @@ public class ProbabilisticContextFreeGrammar {
@Override
public String toString() {
- if (getRule() != emptyRule) {
+ if (getRule() != EMPTY_RULE) {
return "(" +
(rule != null ? rule.getEntry() : null) + " " +
(leftTree != null && rightTree != null ?
@@ -295,10 +295,10 @@ public class ProbabilisticContextFreeGrammar {
Collection<String> nonTerminals = new HashSet<>();
Collection<String> terminals = new HashSet<>();
- rules.put(emptyRule, 1d);
- rulesMap.put(emptyRule, 1d);
- nonTerminals.add(emptyRule.getEntry());
- terminals.add(emptyRule.getExpansion()[0]);
+ rules.put(EMPTY_RULE, 1d);
+ rulesMap.put(EMPTY_RULE, 1d);
+ nonTerminals.add(EMPTY_RULE.getEntry());
+ terminals.add(EMPTY_RULE.getExpansion()[0]);
for (String parseTreeString : parseStrings) {
@@ -308,7 +308,7 @@ public class ProbabilisticContextFreeGrammar {
String toConsume = String.valueOf(parseTreeString);
- Matcher m = terminalPattern.matcher(parseTreeString);
+ Matcher m = TERMINAL_PATTERN.matcher(parseTreeString);
while (m.find()) {
String nt = m.group(1);
String t = m.group(2);
@@ -321,7 +321,7 @@ public class ProbabilisticContextFreeGrammar {
}
while (toConsume.contains(" ") && !toConsume.trim().equals("( " +
startSymbol + " )")) {
- Matcher m2 = nonTerminalPattern.matcher(toConsume);
+ Matcher m2 = NON_TERMINAL_PATTERN.matcher(toConsume);
while (m2.find()) {
String nt = m2.group(1);
String t1 = m2.group(2);
@@ -370,7 +370,7 @@ public class ProbabilisticContextFreeGrammar {
if (!terminals.contains(firstExpansion)) {
if (nonTerminals.contains(firstExpansion)) {
// nt1 -> nt2 should be expanded in nt1 -> nt2,E
- Rule newRule = new Rule(rule.getEntry(), firstExpansion,
emptyRule.getEntry());
+ Rule newRule = new Rule(rule.getEntry(), firstExpansion,
EMPTY_RULE.getEntry());
addRule(newRule, rulesMap);
} else {
throw new RuntimeException("rule "+rule+" expands to neither a
terminal or non terminal");
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java
index a574e50..43f1ce6 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java
@@ -187,9 +187,9 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
}
private boolean handledPronoun(String tok) {
- return ResolverUtils.singularThirdPersonPronounPattern.matcher(tok).find()
||
-
ResolverUtils.pluralThirdPersonPronounPattern.matcher(tok).find() ||
- ResolverUtils.speechPronounPattern.matcher(tok).find();
+ return
ResolverUtils.SINGULAR_THIRD_PERSON_PRONOUN_PATTERN.matcher(tok).find() ||
+
ResolverUtils.PLURAL_THIRD_PERSON_PRONOUN_PATTERN.matcher(tok).find() ||
+ ResolverUtils.SPEECH_PRONOUN_PATTERN.matcher(tok).find();
}
private void collectPossesivePronouns(Parse np, List<Mention> entities) {
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
index 114b417..725a213 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java
@@ -41,7 +41,7 @@ public class DefaultParse extends AbstractParse {
private final Parse parse;
private final int sentenceNumber;
- private static final Set<String> entitySet = new
HashSet<>(Arrays.asList(NAME_TYPES));
+ private static final Set<String> ENTITY_SET = new
HashSet<>(Arrays.asList(NAME_TYPES));
/**
* Initializes the current instance.
@@ -65,7 +65,7 @@ public class DefaultParse extends AbstractParse {
List<Parse> kids = new LinkedList<>(Arrays.asList(parse.getChildren()));
while (kids.size() > 0) {
Parse p = kids.remove(0);
- if (entitySet.contains(p.getType())) {
+ if (ENTITY_SET.contains(p.getType())) {
names.add(p);
}
else {
@@ -83,7 +83,7 @@ public class DefaultParse extends AbstractParse {
List<Parse> kids = new ArrayList<>(Arrays.asList(parse.getChildren()));
for (int ci = 0; ci < kids.size(); ci++) {
Parse kid = kids.get(ci);
- if (entitySet.contains(kid.getType())) {
+ if (ENTITY_SET.contains(kid.getType())) {
kids.remove(ci);
kids.addAll(ci, Arrays.asList(kid.getChildren()));
ci--;
@@ -108,7 +108,7 @@ public class DefaultParse extends AbstractParse {
}
public String getSyntacticType() {
- if (entitySet.contains(parse.getType())) {
+ if (ENTITY_SET.contains(parse.getType())) {
return null;
}
else if (parse.getType().contains("#")) {
@@ -130,7 +130,7 @@ public class DefaultParse extends AbstractParse {
}
public String getEntityType() {
- if (entitySet.contains(parse.getType())) {
+ if (ENTITY_SET.contains(parse.getType())) {
return parse.getType();
}
else {
@@ -165,7 +165,7 @@ public class DefaultParse extends AbstractParse {
// the type can be extracted from. Then it just depends
// on the training data and not the values inside NAME_TYPES.
- if (entitySet.contains(parse.getType())) {
+ if (ENTITY_SET.contains(parse.getType())) {
return true;
}
else {
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java
index ae5b869..b6cdb79 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java
@@ -53,7 +53,7 @@ public class JWNLDictionary implements Dictionary {
private final net.didion.jwnl.dictionary.Dictionary dict;
private MorphologicalProcessor morphy;
- private static final String[] empty = new String[0];
+ private static final String[] EMPTY = new String[0];
public JWNLDictionary(String searchDirectory) throws IOException,
JWNLException {
PointerType.initialize();
@@ -162,7 +162,7 @@ public class JWNLDictionary implements Dictionary {
return parents.toArray(new String[parents.size()]);
}
else {
- return empty;
+ return EMPTY;
}
}
catch (JWNLException e) {
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java
index 4ce1982..dbbd25d 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java
@@ -28,15 +28,15 @@ import java.util.Set;
public final class PTBHeadFinder implements HeadFinder {
private static PTBHeadFinder instance;
- private static final Set<String> skipSet = new HashSet<>();
+ private static final Set<String> SKIP_SET = new HashSet<>();
static {
- skipSet.add("POS");
- skipSet.add(",");
- skipSet.add(":");
- skipSet.add(".");
- skipSet.add("''");
- skipSet.add("-RRB-");
- skipSet.add("-RCB-");
+ SKIP_SET.add("POS");
+ SKIP_SET.add(",");
+ SKIP_SET.add(":");
+ SKIP_SET.add(".");
+ SKIP_SET.add("''");
+ SKIP_SET.add("-RRB-");
+ SKIP_SET.add("-RCB-");
}
private PTBHeadFinder() {}
@@ -133,7 +133,7 @@ public final class PTBHeadFinder implements HeadFinder {
}
for (int ti = toks.size() - tokenCount - 1; ti >= 0; ti--) {
Parse tok = toks.get(ti);
- if (!skipSet.contains(tok.getSyntacticType())) {
+ if (!SKIP_SET.contains(tok.getSyntacticType())) {
return ti;
}
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
index 8672cb2..f0ead35 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
@@ -52,8 +52,7 @@ public abstract class MaxentResolver extends AbstractResolver
{
/** Default feature value. */
public static final String DEFAULT = "default";
-
- private static final boolean debugOn = false;
+ private static final boolean DEBUG = false;
private String modelName;
private MaxentModel model;
@@ -84,7 +83,7 @@ public abstract class MaxentResolver extends AbstractResolver
{
/** The model for computing non-referential probabilities. */
protected NonReferentialResolver nonReferentialResolver;
- private static final String modelExtension = ".bin.gz";
+ private static final String MODEL_EXTENSION = ".bin.gz";
/**
* Creates a maximum-entropy-based resolver which will look the specified
number of
@@ -123,7 +122,7 @@ public abstract class MaxentResolver extends
AbstractResolver {
this.modelName = modelDirectory + "/" + name;
if (ResolverMode.TEST == this.mode) {
try (DataInputStream dis = new DataInputStream(
- new BufferedInputStream(new FileInputStream(modelName +
modelExtension)))) {
+ new BufferedInputStream(new FileInputStream(modelName +
MODEL_EXTENSION)))) {
model = new BinaryGISModelReader(dis).getModel();
}
sameIndex = model.getIndex(SAME);
@@ -181,7 +180,7 @@ public abstract class MaxentResolver extends
AbstractResolver {
DiscourseEntity de;
int ei = 0;
double nonReferentialProbability =
nonReferentialResolver.getNonReferentialProbability(ec);
- if (debugOn) {
+ if (DEBUG) {
System.err.println(this + ".resolve: " + ec.toText() + " -> " + "null "
+ nonReferentialProbability);
}
for (; ei < getNumEntities(dm); ei++) {
@@ -191,7 +190,7 @@ public abstract class MaxentResolver extends
AbstractResolver {
}
if (excluded(ec, de)) {
candProbs[ei] = 0;
- if (debugOn) {
+ if (DEBUG) {
System.err.println("excluded " + this + ".resolve: " + ec.toText() +
" -> " + de + " "
+ candProbs[ei]);
}
@@ -206,7 +205,7 @@ public abstract class MaxentResolver extends
AbstractResolver {
catch (ArrayIndexOutOfBoundsException e) {
candProbs[ei] = 0;
}
- if (debugOn) {
+ if (DEBUG) {
System.err.println(this + ".resolve: " + ec.toText() + " -> " + de +
" ("
+ ec.getGender() + "," + de.getGender() + ") " + candProbs[ei] +
" " + lfeatures);
}
@@ -284,7 +283,7 @@ public abstract class MaxentResolver extends
AbstractResolver {
List<String> features = getFeatures(mention, cde);
//add Event to Model
- if (debugOn) {
+ if (DEBUG) {
System.err.println(this + ".retain: " + mention.getId() + " " +
mention.toText()
+ " -> " + entityMention.getId() + " " + cde);
}
@@ -339,7 +338,7 @@ public abstract class MaxentResolver extends
AbstractResolver {
@Override
public void train() throws IOException {
if (ResolverMode.TRAIN == mode) {
- if (debugOn) {
+ if (DEBUG) {
System.err.println(this + " referential");
FileWriter writer = new FileWriter(modelName + ".events");
for (Event e : events) {
@@ -353,7 +352,7 @@ public abstract class MaxentResolver extends
AbstractResolver {
GISTrainer trainer = new GISTrainer();
trainer.init(params, null);
GISModel trainedModel =
trainer.trainModel(ObjectStreamUtils.createObjectStream(events));
- new BinaryGISModelWriter(trainedModel, new File(modelName +
modelExtension)).persist();
+ new BinaryGISModelWriter(trainedModel, new File(modelName +
MODEL_EXTENSION)).persist();
nonReferentialResolver.train();
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java
index 2592316..f6534f3 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java
@@ -90,6 +90,6 @@ public class PluralPronounResolver extends MaxentResolver {
public boolean canResolve(MentionContext mention) {
String tag = mention.getHeadTokenTag();
return (tag != null && tag.startsWith("PRP")
- &&
ResolverUtils.pluralThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches());
+ &&
ResolverUtils.PLURAL_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches());
}
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
index b63627d..2cd657a 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
@@ -39,49 +39,49 @@ import opennlp.tools.coref.sim.TestSimilarityModel;
public class ResolverUtils {
private static final Pattern ENDS_WITH_PERIOD = Pattern.compile("\\.$");
- private static final Pattern initialCaps = Pattern.compile("^[A-Z]");
+ private static final Pattern INITIAL_CAPS = Pattern.compile("^[A-Z]");
/** Regular expression for English singular third-person pronouns. */
- public static final Pattern singularThirdPersonPronounPattern =
+ public static final Pattern SINGULAR_THIRD_PERSON_PRONOUN_PATTERN =
Pattern.compile("^(he|she|it|him|her|his|hers|its|himself|herself|itself)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English plural third-person pronouns. */
- public static final Pattern pluralThirdPersonPronounPattern =
+ public static final Pattern PLURAL_THIRD_PERSON_PRONOUN_PATTERN =
Pattern.compile("^(they|their|theirs|them|themselves)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English speech pronouns. */
- public static final Pattern speechPronounPattern =
+ public static final Pattern SPEECH_PRONOUN_PATTERN =
Pattern.compile("^(I|me|my|you|your|you|we|us|our|ours)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English female pronouns. */
- public static final Pattern femalePronounPattern =
+ public static final Pattern FEMALE_PRONOUN_PATTERN =
Pattern.compile("^(she|her|hers|herself)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English neuter pronouns. */
- public static final Pattern neuterPronounPattern =
+ public static final Pattern NEUTER_PRONOUN_PATTERN =
Pattern.compile("^(it|its|itself)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English first-person pronouns. */
- public static final Pattern firstPersonPronounPattern =
+ public static final Pattern FIRST_PERSON_PRONOUN_PATTERN =
Pattern.compile("^(I|me|my|we|our|us|ours)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English singular second-person pronouns. */
- public static final Pattern secondPersonPronounPattern =
+ public static final Pattern SECOND_PERSON_PRONOUN_PATTERN =
Pattern.compile("^(you|your|yours)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English third-person pronouns. */
- public static final Pattern thirdPersonPronounPattern =
+ public static final Pattern THIRD_PERSON_PRONOUN_PATTERN =
Pattern.compile("^(he|she|it|him|her|his|hers|its|himself|herself|itself|they|"
+
"their|theirs|them|themselves)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English singular pronouns. */
- public static final Pattern singularPronounPattern =
+ public static final Pattern SINGULAR_PRONOUN_PATTERN =
Pattern.compile("^(I|me|my|he|she|it|him|her|his|hers|its|himself|herself|itself)$",
Pattern.CASE_INSENSITIVE);
/** Regular expression for English plural pronouns. */
- public static final Pattern pluralPronounPattern =
+ public static final Pattern PLURAL_PRONOUN_PATTERN =
Pattern.compile("^(we|us|our|ours|they|their|theirs|them|themselves)$",
Pattern.CASE_INSENSITIVE);
/** Regular expression for English male pronouns. */
- public static final Pattern malePronounPattern =
+ public static final Pattern MALE_PRONOUN_PATTERN =
Pattern.compile("^(he|him|his|himself)$",Pattern.CASE_INSENSITIVE);
/** Regular expression for English honorifics. */
- public static final Pattern honorificsPattern =
+ public static final Pattern HONORIFICS_PATTERN =
Pattern.compile("[A-Z][a-z]+\\.$|^[A-Z][b-df-hj-np-tv-xz]+$");
/** Regular expression for English corporate designators. */
- public static final Pattern designatorsPattern =
+ public static final Pattern DESIGNATORS_PATTERN =
Pattern.compile("[a-z]\\.$|^[A-Z][b-df-hj-np-tv-xz]+$|^Co(rp)?$");
@@ -185,7 +185,7 @@ public class ResolverUtils {
Object[] mtokens = ec.getTokens();
for (Object mtoken : mtokens) {
String token = mtoken.toString();
- if (!honorificsPattern.matcher(token).matches()) {
+ if (!HONORIFICS_PATTERN.matcher(token).matches()) {
if (!first) {
sb.append(" ");
}
@@ -387,7 +387,7 @@ public class ResolverUtils {
}
if (start + 1 != end) { // don't do this on head words, to keep "U.S."
//strip off honorifics in begining
- if (honorificsPattern.matcher(mtokens[start].toString()).find()) {
+ if (HONORIFICS_PATTERN.matcher(mtokens[start].toString()).find()) {
start++;
}
if (start == end) {
@@ -395,7 +395,7 @@ public class ResolverUtils {
return null;
}
//strip off and honerifics on the end
- if (designatorsPattern.matcher(mtokens[mtokens.length -
1].toString()).find()) {
+ if (DESIGNATORS_PATTERN.matcher(mtokens[mtokens.length -
1].toString()).find()) {
end--;
}
}
@@ -415,7 +415,7 @@ public class ResolverUtils {
for (Iterator<MentionContext> ei = de.getMentions(); ei.hasNext();) {
MentionContext xec = ei.next();
String xecHeadTag = xec.getHeadTokenTag();
- if (xecHeadTag.startsWith("NNP") ||
initialCaps.matcher(xec.getHeadTokenText()).find()) {
+ if (xecHeadTag.startsWith("NNP") ||
INITIAL_CAPS.matcher(xec.getHeadTokenText()).find()) {
return xec;
}
}
@@ -424,19 +424,19 @@ public class ResolverUtils {
private static Map<String, String> getPronounFeatureMap(String pronoun) {
Map<String, String> pronounMap = new HashMap<>();
- if (malePronounPattern.matcher(pronoun).matches()) {
+ if (MALE_PRONOUN_PATTERN.matcher(pronoun).matches()) {
pronounMap.put("gender","male");
}
- else if (femalePronounPattern.matcher(pronoun).matches()) {
+ else if (FEMALE_PRONOUN_PATTERN.matcher(pronoun).matches()) {
pronounMap.put("gender","female");
}
- else if (neuterPronounPattern.matcher(pronoun).matches()) {
+ else if (NEUTER_PRONOUN_PATTERN.matcher(pronoun).matches()) {
pronounMap.put("gender","neuter");
}
- if (singularPronounPattern.matcher(pronoun).matches()) {
+ if (SINGULAR_PRONOUN_PATTERN.matcher(pronoun).matches()) {
pronounMap.put("number","singular");
}
- else if (pluralPronounPattern.matcher(pronoun).matches()) {
+ else if (PLURAL_PRONOUN_PATTERN.matcher(pronoun).matches()) {
pronounMap.put("number","plural");
}
/*
@@ -651,13 +651,13 @@ public class ResolverUtils {
* @return the gender of the specified pronoun.
*/
public static String getPronounGender(String pronoun) {
- if (malePronounPattern.matcher(pronoun).matches()) {
+ if (MALE_PRONOUN_PATTERN.matcher(pronoun).matches()) {
return "m";
}
- else if (femalePronounPattern.matcher(pronoun).matches()) {
+ else if (FEMALE_PRONOUN_PATTERN.matcher(pronoun).matches()) {
return "f";
}
- else if (neuterPronounPattern.matcher(pronoun).matches()) {
+ else if (NEUTER_PRONOUN_PATTERN.matcher(pronoun).matches()) {
return "n";
}
else {
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java
index 459bf10..7cfd887 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java
@@ -50,7 +50,7 @@ public class SingularPronounResolver extends MaxentResolver {
//System.err.println("MaxentSingularPronounResolver.canResolve: ec=
("+mention.id+") "+ mention.toText());
String tag = mention.getHeadTokenTag();
return tag != null && tag.startsWith("PRP")
- &&
ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches();
+ &&
ResolverUtils.SINGULAR_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches();
}
@Override
@@ -111,7 +111,7 @@ public class SingularPronounResolver extends MaxentResolver
{
MentionContext entityMention = ei.next();
String tag = entityMention.getHeadTokenTag();
if (tag != null && tag.startsWith("PRP")
- &&
ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches())
{
+ &&
ResolverUtils.SINGULAR_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches())
{
if (mentionGender == null) { //lazy initialization
mentionGender =
ResolverUtils.getPronounGender(mention.getHeadTokenText());
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java
index be4e5dc..b853b34 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java
@@ -80,7 +80,7 @@ public class SpeechPronounResolver extends MaxentResolver {
public boolean canResolve(MentionContext mention) {
String tag = mention.getHeadTokenTag();
boolean fpp = tag != null && tag.startsWith("PRP")
- &&
ResolverUtils.speechPronounPattern.matcher(mention.getHeadTokenText()).matches();
+ &&
ResolverUtils.SPEECH_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches();
boolean pn = tag != null && tag.startsWith("NNP");
return (fpp || pn);
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
index 2c06836..c1ffb6e 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
@@ -138,13 +138,13 @@ public class GenderModel implements TestGenderModel,
TrainSimilarityModel {
* @return The heuristically determined gender or unknown.
*/
private GenderEnum getGender(Context mention) {
- if
(ResolverUtils.malePronounPattern.matcher(mention.getHeadTokenText()).matches())
{
+ if
(ResolverUtils.MALE_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches())
{
return GenderEnum.MALE;
}
- else if
(ResolverUtils.femalePronounPattern.matcher(mention.getHeadTokenText()).matches())
{
+ else if
(ResolverUtils.FEMALE_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches())
{
return GenderEnum.FEMALE;
}
- else if
(ResolverUtils.neuterPronounPattern.matcher(mention.getHeadTokenText()).matches())
{
+ else if
(ResolverUtils.NEUTER_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches())
{
return GenderEnum.NEUTER;
}
Object[] mtokens = mention.getTokens();
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
index 2ee8481..d305e8e 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
@@ -91,10 +91,10 @@ public class NumberModel implements TestNumberModel,
TrainSimilarityModel {
}
public NumberEnum getNumber(Context ec) {
- if
(ResolverUtils.singularPronounPattern.matcher(ec.getHeadTokenText()).matches())
{
+ if
(ResolverUtils.SINGULAR_PRONOUN_PATTERN.matcher(ec.getHeadTokenText()).matches())
{
return NumberEnum.SINGULAR;
}
- else if
(ResolverUtils.pluralPronounPattern.matcher(ec.getHeadTokenText()).matches()) {
+ else if
(ResolverUtils.PLURAL_PRONOUN_PATTERN.matcher(ec.getHeadTokenText()).matches())
{
return NumberEnum.PLURAL;
}
else {
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java
b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java
index 398a6f5..487816f 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java
@@ -42,7 +42,7 @@ import opennlp.tools.util.Span;
*/
public class MucMentionInserterStream extends
FilterObjectStream<RawCorefSample, CorefSample> {
- private static final Set<String> entitySet = new
HashSet<>(Arrays.asList(DefaultParse.NAME_TYPES));
+ private static final Set<String> ENTITY_SET = new
HashSet<>(Arrays.asList(DefaultParse.NAME_TYPES));
private final MentionFinder mentionFinder;
@@ -95,7 +95,7 @@ public class MucMentionInserterStream extends
FilterObjectStream<RawCorefSample,
if (commonParent != null) {
// Span mentionSpan = new Span(startToken.getSpan().getStart(),
endToken.getSpan().getEnd());
- if (entitySet.contains(commonParent.getType())) {
+ if (ENTITY_SET.contains(commonParent.getType())) {
commonParent.getParent().setType("NP#" + id);
}
else if (commonParent.getType().equals("NML")) {
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java
b/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java
index 7a79f18..16d9637 100755
---
a/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java
@@ -43,7 +43,7 @@ public class SimilarityAccessorBase {
private List<String> namesBothSides;
- protected static final String[] englishPrepositions = new String[] {
"a", "aboard", "about", "above", "absent",
+ private static final String[] ENGLISH_PREPOSITIONS = new String[] {
"a", "aboard", "about", "above", "absent",
"across", "after", "against", "along", "alongside", "among",
"around", "as", "at", "before", "behind", "below",
"beneath", "between", "beyond", "but", "by", "despite", "down",
"during", "except", "excluding", "failing",
"following", "for", "from", "in", "including", "inside",
"into", "like", "near", "next", "of", "off", "on",
@@ -51,20 +51,20 @@ public class SimilarityAccessorBase {
"thru", "till", "to", "toward", "under", "up", "upon",
"versus", "with", "within", "you", "must", "know",
"when" };
- protected final List<String> commonWordsInEventTitles =
Arrays.asList(new String[] { "community", "party", "film",
- "music", "exhibition", "kareoke", "guitar", "quartet",
"reggae", "r&b", "band", "dj ", "piano", "pray",
- "worship", "god", "training", "class", "development",
"training", "class", "course", "our", "comedy", ",fun",
- "musical", "group", "alliance", "session", "feeding",
"introduction", "school", "conversation", "learning",
- "nursery", "unity", "trivia", "chat", "conference", "tuition",
"technology", "teen", "communication",
- "reception", "management", "beginner", "beginning",
"collabora", "reuninon", "political", "course", "age",
- "ages", "through", "grade", "networking", "workshop",
"demonstration", "tuning", "program", "summit",
- "convention", "day", "night", "one", "two", "outfest", "three",
"online", "writing", "seminar", "coach",
- ",expo", "advanced", "beginner", "intermediate", "earn",
"free", "ii", "iii", "skills", "skill", "artist",
- "summer", "winter", "autumn", "spring", "camp", "vacation",
"miscrosoft", "kid", "child", "kids", "children",
- "every", "everyone", "dancer", "dancers", "senior", "seniors",
"basic", "elementary", "outfest", "2008",
- "2009", "2010", "2011", "2012", "monday", "tuesday",
"wednesday", "thirsday", "friday", "saturday", "sunday",
- "mondays", "tuesdays", "wednesdays", "thirsdays", "fridays",
"saturdays", "sundays", "men" // ?
- });
+ private static final List<String> COMMON_WORDS_IN_EVENT_TITLES =
Arrays.asList("community", "party", "film",
+ "music", "exhibition", "kareoke",
"guitar", "quartet", "reggae", "r&b", "band", "dj ", "piano", "pray",
+ "worship", "god", "training", "class",
"development", "training", "class", "course", "our", "comedy", ",fun",
+ "musical", "group", "alliance",
"session", "feeding", "introduction", "school", "conversation", "learning",
+ "nursery", "unity", "trivia", "chat",
"conference", "tuition", "technology", "teen", "communication",
+ "reception", "management", "beginner",
"beginning", "collabora", "reuninon", "political", "course", "age",
+ "ages", "through", "grade",
"networking", "workshop", "demonstration", "tuning", "program", "summit",
+ "convention", "day", "night", "one",
"two", "outfest", "three", "online", "writing", "seminar", "coach",
+ ",expo", "advanced", "beginner",
"intermediate", "earn", "free", "ii", "iii", "skills", "skill", "artist",
+ "summer", "winter", "autumn", "spring",
"camp", "vacation", "miscrosoft", "kid", "child", "kids", "children",
+ "every", "everyone", "dancer",
"dancers", "senior", "seniors", "basic", "elementary", "outfest", "2008",
+ "2009", "2010", "2011", "2012",
"monday", "tuesday", "wednesday", "thirsday", "friday", "saturday", "sunday",
+ "mondays", "tuesdays", "wednesdays",
"thirsdays", "fridays", "saturdays", "sundays", "men" // ?
+ );
private final BingQueryRunner webSearch = new BingQueryRunner();
@@ -257,7 +257,7 @@ public class SimilarityAccessorBase {
if (word.length() < 2) // '-', '|', ':'
break;
- if (word.equals(word.toLowerCase()) &&
(!Arrays.asList(englishPrepositions).contains(word))
+ if (word.equals(word.toLowerCase()) &&
(!Arrays.asList(ENGLISH_PREPOSITIONS).contains(word))
&& word.length() > 3 &&
StringUtils.isAlphanumeric(word))
continue; // was return false;
if (count > 3)
@@ -275,7 +275,7 @@ public class SimilarityAccessorBase {
// now iterate till next preposition towards the end of noun
phrase
for (String preposCand : ofList)
{
- if
(Arrays.asList(englishPrepositions).contains(preposCand))
+ if
(Arrays.asList(ENGLISH_PREPOSITIONS).contains(preposCand))
break;
results.add(preposCand);
}
@@ -403,8 +403,8 @@ public class SimilarityAccessorBase {
{ // all words should be the
// same
name1Tokens.removeAll(name2Tokens);
-
name1Tokens.removeAll(Arrays.asList(englishPrepositions));
-
name1Tokens.removeAll(Arrays.asList(commonWordsInEventTitles));
+
name1Tokens.removeAll(Arrays.asList(ENGLISH_PREPOSITIONS));
+
name1Tokens.removeAll(Arrays.asList(COMMON_WORDS_IN_EVENT_TITLES));
if (name1Tokens.size() < 1)
return true;
@@ -430,7 +430,7 @@ public class SimilarityAccessorBase {
name1Tokens.removeAll(name2Tokens);
name2Tokens.removeAll(name1TokensClone);
name1Tokens.addAll(name2Tokens);
-
name1Tokens.removeAll(Arrays.asList(englishPrepositions));
+
name1Tokens.removeAll(Arrays.asList(ENGLISH_PREPOSITIONS));
//
name1Tokens.removeAll(Arrays.asList(this.commonWordsInEventTitles));
if (name1Tokens.size() < 1)
return true;
@@ -539,8 +539,8 @@ public class SimilarityAccessorBase {
name1Tokens.retainAll(name2Tokens);
name1Tokens.removeAll(venueToks);
- name1Tokens.removeAll(commonWordsInEventTitles);
- name1Tokens.removeAll(Arrays.asList(englishPrepositions));
+ name1Tokens.removeAll(COMMON_WORDS_IN_EVENT_TITLES);
+ name1Tokens.removeAll(Arrays.asList(ENGLISH_PREPOSITIONS));
name1Tokens = removeDollarWordAndNonAlphaFromList(name1Tokens);
// todo : to use full string measure
// boundary case: too many words => just do counts
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
index b277a23..daed37f 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
@@ -103,7 +103,7 @@ public class SentimentVocab {
public static final int SENTIMENT_UNKNOWN = 0;
public static final int SENTIMENT_NEGATIVE = -1;
- private static final SentimentVocab instance = new SentimentVocab();
+ private static final SentimentVocab INSTANCE = new SentimentVocab();
// complete sentiment word map, key = word, value = sentiment object
private final Map<String, Sentiment> sentimentMap = new HashMap<>();
@@ -122,7 +122,7 @@ public class SentimentVocab {
}
public static SentimentVocab getInstance() {
- return instance;
+ return INSTANCE;
}
public Sentiment getSentiment(String word) {
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
index b682086..c5388fa 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
@@ -41,9 +41,9 @@ import java.util.regex.Pattern;
* @author GaDo
*/
public class EmailSender {
- private static final Pattern pc =
Pattern.compile("[^\\s]+@[^\\s]+.[^\\s]+");
+ private static final Pattern EMAIL_PATTERN =
Pattern.compile("[^\\s]+@[^\\s]+.[^\\s]+");
- private static final String mailboxAddress =
"[email protected]";
+ private static final String MAILBOX_ADDRESS =
"[email protected]";
public boolean sendMail(String smtp, String user, String pass,
InternetAddress from, InternetAddress[] to,
InternetAddress[] cc, InternetAddress[] bcc,
@@ -101,7 +101,7 @@ public class EmailSender {
}
Transport tr = session.getTransport("smtp");
- tr.connect(smtp, mailboxAddress, pass);
+ tr.connect(smtp, MAILBOX_ADDRESS, pass);
message.saveChanges();
tr.sendMessage(message,
message.getAllRecipients());
tr.close();
@@ -124,7 +124,7 @@ public class EmailSender {
Matcher m;
if(correct){
- m = pc.matcher(from.getAddress());
+ m = EMAIL_PATTERN.matcher(from.getAddress());
correct = m.matches();
}
@@ -133,7 +133,7 @@ public class EmailSender {
while(correct && vault<to.length){
correct =
!to[vault].getAddress().equals("");
if(correct){
- m =
pc.matcher(to[vault].getAddress());
+ m =
EMAIL_PATTERN.matcher(to[vault].getAddress());
correct = m.matches();
}
vault++;
@@ -145,7 +145,7 @@ public class EmailSender {
while(correct && vault<cc.length){
correct =
!cc[vault].getAddress().equals("");
if(correct){
- m =
pc.matcher(cc[vault].getAddress());
+ m =
EMAIL_PATTERN.matcher(cc[vault].getAddress());
correct = m.matches();
}
vault++;
@@ -157,7 +157,7 @@ public class EmailSender {
while(correct && vault<bcc.length){
correct =
!bcc[vault].getAddress().equals("");
if(correct){
- m =
pc.matcher(bcc[vault].getAddress());
+ m =
EMAIL_PATTERN.matcher(bcc[vault].getAddress());
correct = m.matches();
}
vault++;
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java
index e8219fd..7fcd9ce 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java
@@ -36,20 +36,20 @@ import org.apache.tika.Tika;
public class ClassifierTrainingSetIndexer {
- public static final String resourceDir = new
File(".").getAbsolutePath().replace("/.", "") + "/src/main/resources";
- public static final String INDEX_PATH = "/classif";
- public static final String CLASSIF_TRAINING_CORPUS_PATH = "/training_corpus";
+ private static final String[] DOMAINS = new String[] { "legal", "health",
"computing", "engineering", "business" };
+ private static final String RESOURCE_DIR = new
File(".").getAbsolutePath().replace("/.", "") + "/src/main/resources";
+ static final String INDEX_PATH = "/classif";
+ static final String CLASSIF_TRAINING_CORPUS_PATH = "/training_corpus";
protected final ArrayList<File> queue = new ArrayList<>();
- final Tika tika = new Tika();
+ private final Tika tika = new Tika();
- IndexWriter indexWriter = null;
- protected static final String[] domains = new String[] { "legal", "health",
"computing", "engineering", "business" };
- private String absolutePathTrainingSet=null;
+ private IndexWriter indexWriter = null;
+ private String absolutePathTrainingSet = null;
public ClassifierTrainingSetIndexer() {
try {
- initIndexWriter(resourceDir);
+ initIndexWriter(RESOURCE_DIR);
} catch (Exception e) {
e.printStackTrace();
}
@@ -58,7 +58,7 @@ public class ClassifierTrainingSetIndexer {
public ClassifierTrainingSetIndexer(String absolutePathTrainingSet) {
this.absolutePathTrainingSet = absolutePathTrainingSet;
try {
- initIndexWriter(resourceDir);
+ initIndexWriter(RESOURCE_DIR);
} catch (Exception e) {
e.printStackTrace();
}
@@ -68,7 +68,7 @@ public class ClassifierTrainingSetIndexer {
try {
indexFileOrDirectory(Objects.requireNonNullElseGet(absolutePathTrainingSet,
- () -> resourceDir + CLASSIF_TRAINING_CORPUS_PATH));
+ () -> RESOURCE_DIR + CLASSIF_TRAINING_CORPUS_PATH));
indexWriter.commit();
} catch (IOException e) {
e.printStackTrace();
@@ -120,7 +120,7 @@ public class ClassifierTrainingSetIndexer {
String name = f.getPath();
String className = null;
- for (String d : domains) {
+ for (String d : DOMAINS) {
if (name.contains(d)) {
className = d;
break;
@@ -218,7 +218,7 @@ public class ClassifierTrainingSetIndexer {
public static String getCategoryFromFilePath(String path){
String className = null;
- for (String d : domains) {
+ for (String d : DOMAINS) {
if (path.contains("/" + d + "/")) {
className = d;
break;
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java
index 05abe3e..ccd9f63 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java
@@ -47,9 +47,10 @@ import org.apache.lucene.store.FSDirectory;
import org.json.JSONObject;
public class DocClassifier {
+
+ private static final Log LOGGER =
LogFactory.getLog(DocClassifier.class);
public static final String DOC_CLASSIFIER_KEY = "doc_class";
- public static final String resourceDir = null;
- public static final Log logger = LogFactory.getLog(DocClassifier.class);
+ public static final String RESOURCE_DIR = null;
private Map<String, Float> scoredClasses;
@@ -57,7 +58,7 @@ public class DocClassifier {
protected static IndexReader indexReader = null;
protected static IndexSearcher indexSearcher = null;
// resource directory plus the index folder
- private static final String INDEX_PATH = resourceDir
+ private static final String INDEX_PATH = RESOURCE_DIR
+ ClassifierTrainingSetIndexer.INDEX_PATH;
// http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm
@@ -90,8 +91,8 @@ public class DocClassifier {
// for classification
// these are categories from the index
- public static final String[] categories = new String[] { "legal",
"health",
- "finance", "computing", "engineering", "business" };
+ public static final String[] CATEGORIES = new String[]
+ { "legal", "health", "finance",
"computing", "engineering", "business" };
static {
synchronized (DocClassifier.class) {
@@ -100,13 +101,13 @@ public class DocClassifier {
try {
indexDirectory = FSDirectory.open(new
File(INDEX_PATH).toPath());
} catch (IOException e2) {
- logger.error("problem opening index " + e2);
+ LOGGER.error("problem opening index " + e2);
}
try {
indexReader =
DirectoryReader.open(indexDirectory);
indexSearcher = new IndexSearcher(indexReader);
} catch (IOException e2) {
- logger.error("problem reading index \n" + e2);
+ LOGGER.error("problem reading index \n" + e2);
}
}
}
@@ -141,9 +142,9 @@ public class DocClassifier {
hits = indexSearcher
.search(query,
MAX_DOCS_TO_USE_FOR_CLASSIFY + 2);
} catch (IOException e1) {
- logger.error("problem searching index \n" + e1);
+ LOGGER.error("problem searching index \n" + e1);
}
- logger.debug("Found " + hits.totalHits + " hits for " +
queryStr);
+ LOGGER.debug("Found " + hits.totalHits + " hits for " +
queryStr);
int count = 0;
@@ -152,7 +153,7 @@ public class DocClassifier {
try {
doc = indexSearcher.doc(scoreDoc.doc);
} catch (IOException e) {
- logger.error("Problem searching training set
for classif \n"
+ LOGGER.error("Problem searching training set
for classif \n"
+ e);
continue;
}
@@ -164,7 +165,7 @@ public class DocClassifier {
else
scoredClasses.put(flag, scoreForClass +
scoreDoc.score);
- logger.debug(" <<categorized as>> " + flag + " | score="
+ LOGGER.debug(" <<categorized as>> " + flag + " | score="
+ scoreDoc.score + " \n text =" +
doc.get("text") + "\n");
if (count > MAX_DOCS_TO_USE_FOR_CLASSIFY) {
@@ -180,7 +181,7 @@ public class DocClassifier {
if (scoredClasses.get(key) >
MIN_TOTAL_SCORE_FOR_CATEGORY)
resultsAboveThresh.add(key);
else
- logger.debug("Too low score of " +
scoredClasses.get(key)
+ LOGGER.debug("Too low score of " +
scoredClasses.get(key)
+ " for category = " +
key);
}
@@ -192,7 +193,7 @@ public class DocClassifier {
else
results = resultsAboveThresh;
} catch (Exception e) {
- logger.error("Problem aggregating search results\n" +
e);
+ LOGGER.error("Problem aggregating search results\n" +
e);
}
if (results.size() < 2)
return results;
@@ -253,7 +254,7 @@ public class DocClassifier {
try {
indexReader.close();
} catch (IOException e) {
- logger.error("Problem closing index \n" + e);
+ LOGGER.error("Problem closing index \n" + e);
}
}
@@ -280,12 +281,12 @@ public class DocClassifier {
classifResults = classifySentence(query);
if (classifResults != null &&
classifResults.size() > 0) {
localCats.addAll(classifResults);
- logger.debug(sentence + " => " +
classifResults);
+ LOGGER.debug(sentence + " => " +
classifResults);
}
}
} catch (Exception e) {
- logger.error("Problem classifying sentence\n " + e);
+ LOGGER.error("Problem classifying sentence\n " + e);
}
List<String> aggrResults = new ArrayList<>();
@@ -293,9 +294,9 @@ public class DocClassifier {
aggrResults = localCats.getFrequentTags();
- logger.debug(localCats.getFrequentTags());
+ LOGGER.debug(localCats.getFrequentTags());
} catch (Exception e) {
- logger.error("Problem aggregating search results\n" +
e);
+ LOGGER.error("Problem aggregating search results\n" +
e);
}
return aggrResults;
}
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java
index 00dc002..99d070f 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java
@@ -45,11 +45,8 @@ public class DocClassifierTrainingSetMultilingualExtender {
final DocClassifier classifier;
private String sourceDir = null, destinationDir = null;
//interwiki-fr"><a
href="http://fr.wikipedia.org/wiki/Niveau_d%27%C3%A9nergie" title="Niveau
d'énergie – French" lang="fr"
- private static final String[][] multilingualTokens = new String[][]{
- {"interwiki-fr\"><a href=\"", "lang=\"fr\""},
- {"interwiki-es\"><a href=\"", "lang=\"es\""},
- {"interwiki-de\"><a href=\"", "lang=\"de\""},
- };
+ private static final String[][] MULTILINGUAL_TOKENS = new
String[][]{{"interwiki-fr\"><a href=\"", "lang=\"fr\""},
+ {"interwiki-es\"><a href=\"", "lang=\"es\""},
{"interwiki-de\"><a href=\"", "lang=\"de\""} };
private static final String[] LANGS = new String[]{ "fr", "es", "de"};
@@ -131,7 +128,7 @@ public class DocClassifierTrainingSetMultilingualExtender {
System.out.println("processing "+f.getName());
content = FileUtils.readFileToString(f,
"utf-8");
int langIndex =0;
- for(String[] begEnd: multilingualTokens){
+ for(String[] begEnd: MULTILINGUAL_TOKENS){
String urlDirty =
StringUtils.substringBetween(content, begEnd[0], begEnd[1]);
String url =
StringUtils.substringBefore(urlDirty, "\"");
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java
b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java
index 928efd5..808788f 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java
@@ -43,7 +43,7 @@ public class EmailNormalizer {
}
}
- public static final String[] headers = new String[] {
+ static final String[] HEADERS = new String[] {
"Message-ID:",
"Date:",
"From:",
@@ -62,12 +62,10 @@ public class EmailNormalizer {
"----",
};
- public static final String[] prohibitedStrings = new String[] {
- "@", "<", ">"
- };
+ static final String[] PROHIBITED_STRINGS = new String[] {"@", "<", ">"};
public void normalizeAndWriteIntoANewFile(File f){
- String content="";
+ String content = "";
try {
content = FileUtils.readFileToString(f,
StandardCharsets.UTF_8);
} catch (IOException e) {
@@ -77,12 +75,12 @@ public class EmailNormalizer {
StringBuilder buf = new StringBuilder();
for(String l: lines){
boolean bAccept = true;
- for(String h: headers){
+ for(String h: HEADERS){
if (l.startsWith(h)){
bAccept = false;
}
}
- for(String h: prohibitedStrings){
+ for(String h: PROHIBITED_STRINGS){
if (l.indexOf(h)>0){
bAccept = false;
}
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java
b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java
index 734414e..1a2f89e 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java
@@ -25,20 +25,19 @@ import java.util.List;
import org.apache.commons.io.FileUtils;
public class EmailTrainingSetFormer {
- static final String dataDir = "/Users/bgalitsky/Downloads/";
- static final String//enron_with_categories/",
- fileListFile = "cats4_11-17.txt";
- static final String destinationDir =
"/Users/bgalitsky/Documents/ENRON/data11_17/";
+ static final String DATA_DIR = "/Users/bgalitsky/Downloads/";
+ static final String FILE_LIST_FILE = "cats4_11-17.txt";
+ static final String DESTINATION_DIR =
"/Users/bgalitsky/Documents/ENRON/data11_17/";
//enron_with_categories/5/70665.cats:4,10,1
public static void createPosTrainingSet(){
try {
- List<String> lines = FileUtils.readLines(new
File(dataDir+fileListFile), StandardCharsets.UTF_8);
+ List<String> lines = FileUtils.readLines(new
File(DATA_DIR + FILE_LIST_FILE), StandardCharsets.UTF_8);
for(String l: lines){
int endOfFname = l.indexOf('.'), startOfFname =
l.lastIndexOf('/');
- String filenameOld =dataDir+ l.substring(0,
endOfFname)+".txt";
+ String filenameOld = DATA_DIR + l.substring(0,
endOfFname)+".txt";
String content = normalize(new
File(filenameOld));
- String filenameNew = destinationDir +
l.substring(startOfFname+1, endOfFname)+".txt";
+ String filenameNew = DESTINATION_DIR +
l.substring(startOfFname+1, endOfFname)+".txt";
//FileUtils.copyFile(new File(filenameOld), new
File(filenameNew));
FileUtils.writeStringToFile(new
File(filenameNew), content, StandardCharsets.UTF_8);
}
@@ -61,12 +60,12 @@ public class EmailTrainingSetFormer {
StringBuilder buf = new StringBuilder();
for(String l: lines){
boolean bAccept = true;
- for(String h: EmailNormalizer.headers){
+ for(String h: EmailNormalizer.HEADERS){
if (l.startsWith(h)){
bAccept = false;
}
}
- for(String h: EmailNormalizer.prohibitedStrings){
+ for(String h: EmailNormalizer.PROHIBITED_STRINGS){
if (l.indexOf(h)>0){
bAccept = false;
}
diff --git a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java
b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java
index 424f060..e4beac6 100644
--- a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java
+++ b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java
@@ -43,7 +43,7 @@ public class NL2Obj {
parser = ParserChunker2MatcherProcessor.getInstance();
}
- public static final String[] epistemicStatesList = new String[] {
+ static final String[] EPISTEMIC_STATES_LIST = new String[] {
"select", "verify", "find", "start", "stop", "go", "check"
};
@@ -156,7 +156,7 @@ public class NL2Obj {
}
private boolean isControlOp(String methodOrControlOp) {
- return Arrays.asList(epistemicStatesList).contains(methodOrControlOp);
+ return Arrays.asList(EPISTEMIC_STATES_LIST).contains(methodOrControlOp);
}
protected List<ParseTreeChunk> applyWhichRuleOnVP(List<ParseTreeChunk>
actionWithObject) {
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
index 421a124..0e5811e 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
@@ -26,19 +26,19 @@ import
opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcess
public class NL2ObjCreateAssign extends NL2Obj {
private boolean classBeingDefined = false;
- public static String[] declarationStatesList = new String[] {
+ static final String[] DECLARATION_STATES_LIST = new String[] {
"create", "assign", "set",
};
- public static final String[] dataTypesList = new String[] {
+ static final String[] DATA_TYPES_LIST = new String[] {
"text", "double", "array",
};
- public static final String[] arrayElementList = new String[] {
+ static final String[] ARRAY_ELEMENT_LIST = new String[] {
"first", "second", "third", "fourth"
};
- public static final String[] arrayElementListInsdex = new String[] {
+ static final String[] ARRAY_ELEMENT_LIST_INSDEX = new String[] {
"0", "1", "2", "3"
};
@@ -91,12 +91,12 @@ public class NL2ObjCreateAssign extends NL2Obj {
}
String dataType =
verbChunk.getLemmas().get(1).toLowerCase();
- if (classBeingDefined &&
Arrays.asList(dataTypesList).contains(dataType) &&
verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){
+ if (classBeingDefined &&
Arrays.asList(DATA_TYPES_LIST).contains(dataType) &&
verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){
op.setOperatorFor(dataType + "
"+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase());
classBeingDefined = true;
break;
}
- if
(Arrays.asList(dataTypesList).contains(dataType) &&
verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){
+ if
(Arrays.asList(DATA_TYPES_LIST).contains(dataType) &&
verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){
op.setOperatorFor(dataType + "
"+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase());
classBeingDefined = true;
break;
@@ -126,9 +126,9 @@ public class NL2ObjCreateAssign extends NL2Obj {
numElements = lems.indexOf("object");
if (numElements<0)
numElements = lems.indexOf("member");
- if
(Arrays.asList(arrayElementList).contains(lems.get(numElements-1))){
- int arrIndex =
Arrays.asList(arrayElementList).indexOf(lems.get(numElements-1));
- String indexValue =
arrayElementListInsdex[arrIndex];
+ if
(Arrays.asList(ARRAY_ELEMENT_LIST).contains(lems.get(numElements-1))){
+ int arrIndex =
Arrays.asList(ARRAY_ELEMENT_LIST).indexOf(lems.get(numElements-1));
+ String indexValue =
ARRAY_ELEMENT_LIST_INSDEX[arrIndex];
String arrayName =
lems.get(lems.size()-1);
if (expression!=null)
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java
index ca12830..1f1844a 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java
@@ -34,7 +34,7 @@ public class WebPageExtractor {
protected ParserChunker2MatcherProcessor nlProc;
protected final MostFrequentWordsFromPageGetter
mostFrequentWordsFromPageGetter = new MostFrequentWordsFromPageGetter();
- protected static final int sentThresholdLength = 70;
+ protected static final int SENT_THRESHOLD_LENGTH = 70;
public List<String[]>
extractSentencesWithPotentialProductKeywords(String url)
{
@@ -111,7 +111,7 @@ public class WebPageExtractor {
continue;
if (s.indexOf('|')>-1)
continue;
- if (s == null || s.trim().length() <
sentThresholdLength || s.length() < sentThresholdLength + 10)
+ if (s == null || s.trim().length() <
SENT_THRESHOLD_LENGTH || s.length() < SENT_THRESHOLD_LENGTH + 10)
continue;
if
(GeneratedSentenceProcessor.acceptableMinedSentence(s)==null){
// TODO OPENNLP-1454 Candidate for
logger.debug(...) if required/helpful
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaGeneralizer.java
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaGeneralizer.java
index d92345e..42590a2 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaGeneralizer.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaGeneralizer.java
@@ -25,10 +25,10 @@ import opennlp.tools.stemmer.PStemmer;
import opennlp.tools.word2vec.W2VDistanceMeasurer;
public class LemmaGeneralizer implements IGeneralizer<String> {
- public static final String w2vPrefix = "w2v_";
- final PStemmer ps = new PStemmer();
- String pos = null;
- final W2VDistanceMeasurer w2v;
+ public static final String W2V_PREFIX = "w2v_";
+ private final PStemmer ps = new PStemmer();
+ private String pos = null;
+ private final W2VDistanceMeasurer w2v;
public LemmaGeneralizer() {
w2v = W2VDistanceMeasurer.getInstance();
}
@@ -43,52 +43,42 @@ public class LemmaGeneralizer implements
IGeneralizer<String> {
boolean bEqual = false;
String lemma1 = (String)o1, lemma2 = (String)o2;
-
-
- lemma1 = lemma1.toLowerCase();
- lemma2 = lemma2.toLowerCase();
+ lemma1 = lemma1.toLowerCase();
+ lemma2 = lemma2.toLowerCase();
- if (lemma1.equals(lemma2)) {
- bEqual = true;
- results.add(lemma1);
- return results;
- }
+ if (lemma1.equals(lemma2)) {
+ bEqual = true;
+ results.add(lemma1);
+ return results;
+ }
- if ((lemma1.equals(lemma2 + "s") ||
lemma2.equals(lemma1 + "s"))
- || lemma1.endsWith(lemma2) ||
lemma2.endsWith(lemma1)
- || lemma1.startsWith(lemma2) ||
lemma2.startsWith(lemma1)) {
- bEqual = true;
- results.add(lemma1);
- return results;
- }
+ if ((lemma1.equals(lemma2 + "s") || lemma2.equals(lemma1 + "s"))
+ || lemma1.endsWith(lemma2) ||
lemma2.endsWith(lemma1)
+ || lemma1.startsWith(lemma2) ||
lemma2.startsWith(lemma1)) {
+ bEqual = true;
+ results.add(lemma1);
+ return results;
+ }
- try {
- if (ps != null) {
- if (ps.stem(lemma1).toString()
-
.equalsIgnoreCase(ps.stem(lemma2).toString())) {
- bEqual = true;
- results.add(lemma1);
- return results;
- }
+ try {
+ if (ps != null) {
+ if (ps.stem(lemma1).toString()
+
.equalsIgnoreCase(ps.stem(lemma2).toString())) {
+ bEqual = true;
+ results.add(lemma1);
+ return results;
}
- } catch (Exception e) {
- System.err.println("Problem processing " +
lemma1 + " " + lemma2);
- return results;
- }
- // if different words, then compute word2vec distance
and write the value as a string
- if (w2v.vec!=null){
- double value = w2v.vec.similarity(lemma1,
lemma2);
- results.add(w2vPrefix + value);
}
+ } catch (Exception e) {
+ System.err.println("Problem processing " + lemma1 + " "
+ lemma2);
return results;
}
-
-
-
-
-
-
-
-
+ // if different words, then compute word2vec distance and write
the value as a string
+ if (w2v.vec!=null){
+ double value = w2v.vec.similarity(lemma1, lemma2);
+ results.add(W2V_PREFIX + value);
+ }
+ return results;
}
+}
\ No newline at end of file
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
index f3f49e1..b71d0b2 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
@@ -30,19 +30,18 @@ import
opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
import opennlp.tools.textsimilarity.SentencePairMatchResult;
import
opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
-/*
- * This class does content generation by using web mining and syntactic
generalization to get sentences from the web, convert and combine
- * them in the form
- * expected to be readable by humans and not distinguishable from genuine
content by search engines
- *
+/**
+ * This class does content generation by using web mining and syntactic
generalization to get sentences
+ * from the web, convert and combine them in the form expected to be readable
by humans and
+ * not distinguishable from genuine content by search engines.
*/
-
public class ContentGenerator /*extends RelatedSentenceFinder*/ {
- final PageFetcher pFetcher = new PageFetcher();
- final ParserChunker2MatcherProcessor sm =
ParserChunker2MatcherProcessor.getInstance();
- protected final ParseTreeChunkListScorer parseTreeChunkListScorer = new
ParseTreeChunkListScorer();
- protected final ParseTreeChunk parseTreeChunk = new ParseTreeChunk();
- protected static final StringDistanceMeasurer stringDistanceMeasurer =
new StringDistanceMeasurer();
+
+ private final PageFetcher pFetcher = new PageFetcher();
+ private final ParserChunker2MatcherProcessor sm =
ParserChunker2MatcherProcessor.getInstance();
+ private final ParseTreeChunkListScorer parseTreeChunkListScorer = new
ParseTreeChunkListScorer();
+ private final ParseTreeChunk parseTreeChunk = new ParseTreeChunk();
+ private static final StringDistanceMeasurer STRING_DISTANCE_MEASURER =
new StringDistanceMeasurer();
protected final BingQueryRunner yrunner = new BingQueryRunner();
protected final ContentGeneratorSupport support = new
ContentGeneratorSupport();
protected int MAX_STEPS = 1;
@@ -84,7 +83,7 @@ public class ContentGenerator /*extends
RelatedSentenceFinder*/ {
System.out.println(" \n=== Entity to write about = " +
sentence);
int stepCount=0;
- for (String verbAddition :
StoryDiscourseNavigator.frequentPerformingVerbs) {
+ for (String verbAddition :
StoryDiscourseNavigator.FREQUENT_PERFORMING_VERBS) {
List<HitBase> searchResult = yrunner.runSearch(sentence
+ " "
+ verbAddition, MAX_SEARCH_RESULTS);
//100);
if (MAX_SEARCH_RESULTS<searchResult.size())
@@ -323,7 +322,7 @@ public class ContentGenerator /*extends
RelatedSentenceFinder*/ {
}
}
- measScore =
stringDistanceMeasurer.measureStringDistance(originalSentence, pageSentence);
+ measScore =
STRING_DISTANCE_MEASURER.measureStringDistance(originalSentence, pageSentence);
if ((syntScore > RELEVANCE_THRESHOLD || measScore > 0.5)
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
index d9abf51..e5384c0 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
@@ -26,7 +26,7 @@ import opennlp.tools.similarity.apps.utils.Utils;
public class GeneratedSentenceProcessor {
- public static final String[] occurs = new String[]{ "click here",
"wikipedia", "retrieved", "isbn",
+ private static final String[] OCCURS = new String[]{ "click here",
"wikipedia", "retrieved", "isbn",
"http", "www.",
"copyright", "advertise", "(accessed", "[edit]", "[citation
needed]",
"site map", "email updates", "contact us", "rss feeds",
"cite this site",
@@ -67,7 +67,7 @@ public class GeneratedSentenceProcessor {
"menu.", "search.", "sign in", "home.",
"additional terms", "may apply"};
- public static final String[] occursStartsWith = new String[]{
+ private static final String[] OCCURS_STARTS_WITH = new String[]{
"fax", "write","email", "contact", "conditions", "chat live",
"we ", "the recipient", "day return", "days return",
"refund it", "your money",
@@ -75,6 +75,7 @@ public class GeneratedSentenceProcessor {
"exchange it ", "return it", "day return", "days return",
"subscribe","posted by", "below" , "corporate",
"this book"};
+
public static String acceptableMinedSentence(String sent) {
if (sent==null || sent.length()<40)
return null;
@@ -197,14 +198,14 @@ public class GeneratedSentenceProcessor {
}
public static boolean isProhibitiveWordsOccurOrStartWith(String
sentenceLowercase){
- for(String o: occurs){
+ for(String o: OCCURS){
if (sentenceLowercase.contains(o)){
//System.out.println("Found prohibited
occurrence "+ o +" \n in sentence = "+ sentenceLowercase);
return true;
}
}
- for(String o: occursStartsWith){
+ for(String o: OCCURS_STARTS_WITH){
if (sentenceLowercase.startsWith(o)){
//System.out.println("Found prohibited
occurrence Start With "+ o +" \n in sentence = "+ sentenceLowercase);
return true;
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
index a6941ee..45bcbdb 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
@@ -52,7 +52,7 @@ public class RelatedSentenceFinder {
final ParserChunker2MatcherProcessor sm =
ParserChunker2MatcherProcessor.getInstance();
protected final ParseTreeChunkListScorer parseTreeChunkListScorer = new
ParseTreeChunkListScorer();
protected final ParseTreeChunk parseTreeChunk = new ParseTreeChunk();
- protected static final StringDistanceMeasurer stringDistanceMeasurer =
new StringDistanceMeasurer();
+ protected static final StringDistanceMeasurer STRING_DISTANCE_MEASURER
= new StringDistanceMeasurer();
protected final BingQueryRunner yrunner = new BingQueryRunner();
protected int MAX_STEPS = 1;
protected int MAX_SEARCH_RESULTS = 1;
@@ -61,11 +61,11 @@ public class RelatedSentenceFinder {
// used to indicate that a sentence is an opinion, so more appropriate
static final List<String> MENTAL_VERBS = new ArrayList<>(
- Arrays.asList(new String[] { "want", "know", "believe",
"appeal", "ask",
- "accept", "agree", "allow", "appeal",
"ask", "assume", "believe",
- "check", "confirm", "convince", "deny",
"disagree", "explain",
- "ignore", "inform", "remind",
"request", "suggest", "suppose",
- "think", "threaten", "try",
"understand" }));
+ Arrays.asList("want", "know", "believe", "appeal",
"ask",
+ "accept", "agree",
"allow", "appeal", "ask", "assume", "believe",
+ "check", "confirm",
"convince", "deny", "disagree", "explain",
+ "ignore", "inform",
"remind", "request", "suggest", "suppose",
+ "think", "threaten",
"try", "understand"));
private static final int MAX_FRAGMENT_SENTS = 10;
@@ -137,7 +137,7 @@ public class RelatedSentenceFinder {
String[] extraKeywords = new
StoryDiscourseNavigator().obtainAdditionalKeywordsForAnEntity(sentence);
System.out.println("Found extraKeywords "+
Arrays.asList(extraKeywords));
if (extraKeywords==null || extraKeywords.length<1)
- extraKeywords =
StoryDiscourseNavigator.frequentPerformingVerbs;
+ extraKeywords =
StoryDiscourseNavigator.FREQUENT_PERFORMING_VERBS;
int stepCount=0;
for (String verbAddition : extraKeywords) {
@@ -492,7 +492,7 @@ public class RelatedSentenceFinder {
}
}
- measScore =
stringDistanceMeasurer.measureStringDistance(
+ measScore =
STRING_DISTANCE_MEASURER.measureStringDistance(
originalSentence,
pageSentence);
@@ -863,7 +863,7 @@ public class RelatedSentenceFinder {
e.printStackTrace();
}
- measScore =
stringDistanceMeasurer.measureStringDistance(
+ measScore =
STRING_DISTANCE_MEASURER.measureStringDistance(
originalSentence, pageSentence);
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
index a47c057..a075bc2 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
@@ -186,7 +186,7 @@ public class RelatedSentenceFinderML extends
RelatedSentenceFinder{
}
}
- measScore =
stringDistanceMeasurer.measureStringDistance(
+ measScore =
STRING_DISTANCE_MEASURER.measureStringDistance(
originalSentence,
pageSentence);
// now possibly increase score by
finding mental verbs
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java
index b24dc45..77777d8 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java
@@ -40,7 +40,7 @@ public class StoryDiscourseNavigator {
private final PStemmer ps = new PStemmer();
final PageFetcher pFetcher = new PageFetcher();
- public static final String[] frequentPerformingVerbs = {
+ public static final String[] FREQUENT_PERFORMING_VERBS = {
" born raised meet learn ", " graduated enter discover",
" facts inventions life ", "accomplishments childhood timeline",
" acquire befriend encounter", " achieve reache describe ",
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/YahooAnswersMiner.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/YahooAnswersMiner.java
index 9fe7a70..b3274dc 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/YahooAnswersMiner.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/YahooAnswersMiner.java
@@ -20,21 +20,21 @@ package opennlp.tools.similarity.apps;
import java.util.ArrayList;
import java.util.List;
-import opennlp.tools.jsmlearning.ProfileReaderWriter;
-import opennlp.tools.parse_thicket.Triple;
-
import net.billylieurance.azuresearch.AzureSearchResultSet;
import net.billylieurance.azuresearch.AzureSearchWebResult;
+import opennlp.tools.jsmlearning.ProfileReaderWriter;
+import opennlp.tools.parse_thicket.Triple;
+
public class YahooAnswersMiner extends BingQueryRunner{
private int page = 0;
- private static final int hitsPerPage = 50;
+ private static final int HITS_PER_PAGE = 50;
public List<HitBase> runSearch(String query) {
aq.setAppid(BING_KEY);
aq.setQuery("site:answers.yahoo.com "+ query);
- aq.setPerPage(hitsPerPage);
+ aq.setPerPage(HITS_PER_PAGE);
aq.setPage(page);
aq.doQuery();
@@ -56,7 +56,7 @@ public class YahooAnswersMiner extends BingQueryRunner{
public List<HitBase> runSearch(String query, int totalPages) {
int count=0;
List<HitBase> results = new ArrayList<>();
- while(totalPages>page*hitsPerPage){
+ while(totalPages>page* HITS_PER_PAGE){
List<HitBase> res = runSearch(query);
results.addAll(res);
if (count>10)
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java
index b719e70..c427366 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java
@@ -36,7 +36,7 @@ import org.apache.solr.search.QueryParsing;
public class IterativeQueryComponent extends QueryComponent{
public static final String COMPONENT_NAME = "iterative_query";
- public static final String[] fieldSequence = new String[]{"cat",
"name", "content", "author"};
+ private static final String[] FIELD_SEQUENCE = new String[]{"cat",
"name", "content", "author"};
/**
* Run the query multiple times against various fields, trying to
recognize search intention
@@ -47,10 +47,10 @@ public class IterativeQueryComponent extends QueryComponent{
NamedList<Object> nameValuePairs = rb.rsp.getValues();
nameValuePairs.remove("response");
rb.rsp.setAllValues(nameValuePairs);
- rb = substituteField(rb, fieldSequence[0] );
+ rb = substituteField(rb, FIELD_SEQUENCE[0] );
super.process(rb);
- for(int iter = 1; iter<fieldSequence.length; iter++){
+ for(int iter = 1; iter< FIELD_SEQUENCE.length; iter++){
nameValuePairs = rb.rsp.getValues();
ResultContext c = (ResultContext)
nameValuePairs.get("response");
if (c!=null){
@@ -58,7 +58,7 @@ public class IterativeQueryComponent extends QueryComponent{
if (dList.size()<1){
nameValuePairs.remove("response");
rb.rsp.setAllValues(nameValuePairs);
- rb = substituteField(rb,
fieldSequence[iter] );
+ rb = substituteField(rb,
FIELD_SEQUENCE[iter] );
super.process(rb);
}
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/NLProgram2CodeRequestHandler.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/NLProgram2CodeRequestHandler.java
index 0ed7350..7a1d40e 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/NLProgram2CodeRequestHandler.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/NLProgram2CodeRequestHandler.java
@@ -35,11 +35,10 @@ public class NLProgram2CodeRequestHandler extends
SearchHandler {
private final static int MAX_SEARCH_RESULTS = 100;
private final ParseTreeChunkListScorer parseTreeChunkListScorer = new
ParseTreeChunkListScorer();
private final int MAX_QUERY_LENGTH_NOT_TO_RERANK = 3;
- private static final String resourceDir =
//"/home/solr/solr-4.4.0/example/src/test/resources";
- "C:/workspace/TestSolr/src/test/resources";
+ private static final String RESOURCES =
"C:/workspace/TestSolr/src/test/resources";
//"/data1/solr/example/src/test/resources";
- final NL2Obj compiler = new NL2ObjCreateAssign(resourceDir);
+ final NL2Obj compiler = new NL2ObjCreateAssign(RESOURCES);
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse
rsp){
// get query string
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerRequestHandler.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerRequestHandler.java
index b059d27..608731e 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerRequestHandler.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerRequestHandler.java
@@ -41,9 +41,8 @@ public class SearchResultsReRankerRequestHandler extends
SearchHandler {
private final static int MAX_SEARCH_RESULTS = 100;
private final ParseTreeChunkListScorer parseTreeChunkListScorer = new
ParseTreeChunkListScorer();
private ParserChunker2MatcherProcessor sm = null;
- private static final String resourceDir =
"/home/solr/solr-4.4.0/example/src/test/resources";
+ private static final String RESOURCE_DIR =
"/home/solr/solr-4.4.0/example/src/test/resources";
//"C:/workspace/TestSolr/src/test/resources";
-
//"/data1/solr/example/src/test/resources";
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse
rsp){
@@ -169,7 +168,7 @@ public class SearchResultsReRankerRequestHandler extends
SearchHandler {
private List<HitBase> calculateMatchScoreResortHits(List<HitBase> hits,
String searchQuery) {
try {
- sm =
ParserChunker2MatcherProcessor.getInstance(resourceDir);
+ sm =
ParserChunker2MatcherProcessor.getInstance(RESOURCE_DIR);
} catch (Exception e){
LOG.severe(e.getMessage());
}
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java
index 9f6a11f..f7cb34b 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java
@@ -21,7 +21,6 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
@@ -34,14 +33,7 @@ import opennlp.tools.textsimilarity.SentencePairMatchResult;
import opennlp.tools.textsimilarity.TextProcessor;
import
opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
-/**
-
- *
- */
-
public class DomainTaxonomyExtender {
- private static final Logger LOG = Logger
-
.getLogger("opennlp.tools.similarity.apps.taxo_builder.DomainTaxonomyExtender");
private final BingQueryRunner brunner = new BingQueryRunner();
private final ParserChunker2MatcherProcessor matcher =
ParserChunker2MatcherProcessor.getInstance();
@@ -161,7 +153,7 @@ public class DomainTaxonomyExtender {
String snapshot2 =
StringCleaner.processSnapshotForMatching(h2
.getTitle() + "
" + h2.getAbstractText());
SentencePairMatchResult
overlaps = matcher.assessRelevance(snapshot1, snapshot2);
-
genResult.addAll(overlaps.matchResult);
+
genResult.addAll(overlaps.getMatchResult());
}
}
}
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
index efb2687..1be923e 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java
@@ -22,13 +22,10 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.logging.Logger;
import opennlp.tools.textsimilarity.TextProcessor;
import
opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
-//import com.thoughtworks.xstream.XStream;
-
/**
* This class can be used to generate scores based on the overlapping between a
* text and a given taxonomy.
@@ -40,8 +37,6 @@ public class TaxoQuerySnapshotMatcher {
// XStream xStream= new XStream();
Map<String, List<List<String>>> lemma_ExtendedAssocWords;
final TaxonomySerializer taxo;
- private static final Logger LOG = Logger
-
.getLogger("opennlp.tools.similarity.apps.taxo_builder.TaxoQuerySnapshotMatcher");
public TaxoQuerySnapshotMatcher(String taxoFileName) {
sm = ParserChunker2MatcherProcessor.getInstance();
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
index 64a65de..2f53a7d 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
@@ -21,7 +21,6 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.logging.Logger;
import opennlp.tools.similarity.apps.BingQueryRunner;
import opennlp.tools.similarity.apps.HitBase;
@@ -42,8 +41,7 @@ import
opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcess
*/
public class TaxonomyExtenderViaMebMining extends BingQueryRunner {
- private static final Logger LOG = Logger
-
.getLogger("opennlp.tools.similarity.apps.taxo_builder.TaxonomyExtenderSearchResultFromYahoo");
+
private final ParseTreeChunkListScorer parseTreeChunkListScorer = new
ParseTreeChunkListScorer();
ParserChunker2MatcherProcessor sm;
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java
index 67c65ff..21bdafb 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java
@@ -23,7 +23,6 @@ import java.io.ByteArrayOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java
index 1355d8c..0d63e70 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java
@@ -33,7 +33,7 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
public class PageFetcher {
- private static final Logger log =
Logger.getLogger("opennlp.tools.similarity.apps.utils.PageFetcher");
+ private static final Logger LOG =
Logger.getLogger("opennlp.tools.similarity.apps.utils.PageFetcher");
private final Tika tika = new Tika();
private static int DEFAULT_TIMEOUT = 1500;
@@ -63,7 +63,7 @@ public class PageFetcher {
pageContent = handler.toString();
} catch (Exception e) {
- log.severe(e.getMessage() + "\n" + e);
+ LOG.severe(e.getMessage() + "\n" + e);
}
return pageContent;
}
@@ -79,7 +79,7 @@ public class PageFetcher {
pageContent = tika.parseToString(connection.getInputStream())
.replace('\n', ' ').replace('\t', ' ');
} catch (IOException | TikaException e) {
- log.severe(e.getMessage() + "\n" + e);
+ LOG.severe(e.getMessage() + "\n" + e);
}
return pageContent;
}
@@ -97,7 +97,7 @@ public class PageFetcher {
}
public String fetchOrigHTML(String url) {
- log.info("fetch url " + url);
+ LOG.info("fetch url " + url);
StringBuilder buf = new StringBuilder();
try {
URLConnection connection = new URL(url).openConnection();
@@ -113,7 +113,7 @@ public class PageFetcher {
connection.getInputStream()));
} catch (Exception e) {
// we don't always need to log trial web pages if access fails
- log.severe(e.toString());
+ LOG.severe(e.toString());
}
while ((line = reader.readLine()) != null) {
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
index 23198fc..bae6357 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java
@@ -40,155 +40,155 @@ public class Utils {
private static final Logger LOG = Logger
.getLogger("opennlp.tools.similarity.apps.utils.Utils");
- protected static final ArrayList<String[]> characterMappings = new
ArrayList<>();
+ protected static final ArrayList<String[]> CHARACTER_MAPPINGS = new
ArrayList<>();
static {
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ÃÂÂ
áâãäå�?ăą�°]",
" " }); // was a
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[À�?ÂÃÄÅĀĂĄ�?]",
"A" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[çćĉċ�?]",
"c" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ÇĆĈĊČ]",
"C" });
- characterMappings.add(new String[] {
+ CHARACTER_MAPPINGS.add(new String[] {
"[�?đ]", "d" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[�?�?]",
"D" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[èéêëæęēĕ�ęě]",
" " }); // was e
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ÈÉÊËÃâ€Â
Ē�ĖĘĚ]",
"'" }); // was E
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[�?ğġģ]",
"g" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ĜĞĠĢƓ]",
"G" });
- characterMappings.add(new String[] {
+ CHARACTER_MAPPINGS.add(new String[] {
"[ĥħ]", "h" });
- characterMappings.add(new String[] {
+ CHARACTER_MAPPINGS.add(new String[] {
"[ĤĦ]", "H" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ìÃÂÂîïĩīÄÂÂĮįıijĵ]",
"i" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[Ì�?Î�?ĨĪĬİIJĴĵ]",
"I" });
- characterMappings.add(new String[] {
+ CHARACTER_MAPPINGS.add(new String[] {
"[ķĸ]", "k" });
- characterMappings.add(new String[] { "[Ķ]", "K" });
- characterMappings
+ CHARACTER_MAPPINGS.add(new String[] { "[Ķ]", "K" });
+ CHARACTER_MAPPINGS
.add(new String[] {
"[øőðòóôõö�?�?őœÃâ€
¡]",
"o" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ÒÓ�ÕÖØŌŎ�?Å’Ãâ€
 ]",
"O" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ñńņÅËâ€
ʼnŋ]",
"n" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ÑŃŅŇÅÅÂ
Å‹]",
"N" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ĺļľŀł]",
"l" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ĹĻĽĿ�?]",
"L" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ùúûüũūÅÂÂůűųÃâ€
°]",
"u" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ÙÚÛÜŨŪŬŮŰŲÃâ€
¯]",
"U" });
- characterMappings.add(new String[] {
+ CHARACTER_MAPPINGS.add(new String[] {
"[ýÿŷ]", "y" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[�?ŶŸ]",
"Y" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ŕ�ř]",
"r" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[�ŖŘ]",
"R" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[šś�?şšſ]",
"s" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ŠŚŜŞŠſ]",
"S" });
- characterMappings.add(new String[] { "ß", "ss" });
- characterMappings.add(new String[] { "Þ", "th" });
- characterMappings.add(new String[] { "þ", "Th" });
- characterMappings
+ CHARACTER_MAPPINGS.add(new String[] { "ß", "ss" });
+ CHARACTER_MAPPINGS.add(new String[] { "Þ", "th" });
+ CHARACTER_MAPPINGS.add(new String[] { "þ", "Th" });
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ţťŧ]",
"t" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ŢŤŦ]",
"T" });
- characterMappings.add(new String[] { "[ŵ]", "w" });
- characterMappings.add(new String[] { "[Å´]", "W" });
- characterMappings
+ CHARACTER_MAPPINGS.add(new String[] { "[ŵ]", "w" });
+ CHARACTER_MAPPINGS.add(new String[] { "[Å´]", "W" });
+ CHARACTER_MAPPINGS
.add(new String[] {
"[žźżžƶ]",
"z" });
- characterMappings
+ CHARACTER_MAPPINGS
.add(new String[] {
"[ŽŽŹŻŽƵ]",
"Z" });
- characterMappings.add(new String[] {
+ CHARACTER_MAPPINGS.add(new String[] {
"[’]", "'" });
- characterMappings.add(new String[] {
+ CHARACTER_MAPPINGS.add(new String[] {
"[–]", "'" });
- characterMappings.add(new String[] { "'", "'" });
- characterMappings.add(new String[] { "Âe", "«" });
- characterMappings.add(new String[] { "'AG", "“" });
- characterMappings.add(new String[] { "A�", " " });
- characterMappings.add(new String[] { """, "\"" });
- characterMappings.add(new String[] { "&", "&" });
- characterMappings.add(new String[] { " ", " " });
- characterMappings.add(new String[] {
+ CHARACTER_MAPPINGS.add(new String[] { "'", "'" });
+ CHARACTER_MAPPINGS.add(new String[] { "Âe", "«" });
+ CHARACTER_MAPPINGS.add(new String[] { "'AG", "“" });
+ CHARACTER_MAPPINGS.add(new String[] { "A�", " " });
+ CHARACTER_MAPPINGS.add(new String[] { """, "\"" });
+ CHARACTER_MAPPINGS.add(new String[] { "&", "&" });
+ CHARACTER_MAPPINGS.add(new String[] { " ", " " });
+ CHARACTER_MAPPINGS.add(new String[] {
"", " " });
- characterMappings.add(new String[] { "â„¢",
+ CHARACTER_MAPPINGS.add(new String[] { "â„¢",
" " });
- characterMappings.add(new String[] {
+ CHARACTER_MAPPINGS.add(new String[] {
"�", "" });
- characterMappings.add(new String[] { "’", "'" });
+ CHARACTER_MAPPINGS.add(new String[] { "’", "'" });
}
public static String stripNonAsciiChars(String s) {
@@ -211,7 +211,7 @@ public class Utils {
s = s.replaceAll("’", "__apostrophe__");
String tmp = s;
if (tmp != null) {
- for (String[] mapping : characterMappings) {
+ for (String[] mapping : CHARACTER_MAPPINGS) {
tmp = tmp.replaceAll(mapping[0], mapping[1]);
}
}
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorer.java
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorer.java
index 5cc4c0a..1ebc613 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorer.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorer.java
@@ -76,9 +76,9 @@ public class ParseTreeChunkListScorer {
} else {
score += 0.1;
}
- } else if (l.startsWith(LemmaGeneralizer.w2vPrefix) ){
+ } else if (l.startsWith(LemmaGeneralizer.W2V_PREFIX) ){
try {
- float val =
Float.parseFloat(l.substring(LemmaGeneralizer.w2vPrefix.length()));
+ float val =
Float.parseFloat(l.substring(LemmaGeneralizer.W2V_PREFIX.length()));
score+= 1- val;
} catch (NumberFormatException e) {
e.printStackTrace();
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java
index bcf87d7..2a74997 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java
@@ -18,14 +18,11 @@
package opennlp.tools.textsimilarity;
import java.util.List;
-import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
public class SentencePairMatchResult {
- public List<List<ParseTreeChunk>> matchResult;
- private static final Logger LOG = Logger
- .getLogger("opennlp.tools.textsimilarity.SentencePairMatchResult");
+ private List<List<ParseTreeChunk>> matchResult;
public List<List<ParseTreeChunk>> getMatchResult() {
return matchResult;
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java
index 5dfdf1a..75d707e 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java
@@ -42,7 +42,7 @@ public class TextProcessor {
private static final Logger LOG = Logger
.getLogger("opennlp.tools.textsimilarity.TextProcessor");
- static final String[] abbrevs = { "mr.", "mrs.", "sen.", "rep.", "gov.",
+ static final String[] ABBREVS = { "mr.", "mrs.", "sen.", "rep.", "gov.",
"miss.", "dr.", "oct.", "nov.", "jan.", "feb.", "mar.", "apr.", "may",
"jun.", "jul.", "aug.", "sept." };
@@ -225,7 +225,7 @@ public class TextProcessor {
cand += " " + text.substring(idx, m.end() - 1).trim();
boolean hasAbbrev = false;
- for (String abbrev : abbrevs) {
+ for (String abbrev : ABBREVS) {
if (cand.toLowerCase().endsWith(abbrev)) {
hasAbbrev = true;
break;
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
index b1eda2a..f1b4ea2 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java
@@ -53,14 +53,14 @@ import au.com.bytecode.opencsv.CSVWriter;
public class ParserCacheSerializer {
private static final Logger LOG = Logger
.getLogger("opennlp.tools.textsimilarity.chunker2matcher.ParserCacheSerializer");
- private static final boolean javaObjectSerialization = false;
+ private static final boolean JAVA_OBJECT_SERIALIZATION = false;
private static final String RESOURCE_DIR = "src/test/resources/";
- public static final String parseCacheFileName = "sentence_parseObject.dat";
- public static final String parseCacheFileNameCSV =
"sentence_parseObject.csv";
+ private static final String PARSE_CACHE_FILE_NAME =
"sentence_parseObject.dat";
+ private static final String PARSE_CACHE_FILE_NAME_CSV =
"sentence_parseObject.csv";
public static void writeObject(Object objectToSerialize) {
- if (javaObjectSerialization) {
- String filename = RESOURCE_DIR + parseCacheFileName;
+ if (JAVA_OBJECT_SERIALIZATION) {
+ String filename = RESOURCE_DIR + PARSE_CACHE_FILE_NAME;
try(FileOutputStream fos = new FileOutputStream(filename);
ObjectOutputStream out = new ObjectOutputStream(fos)) {
@@ -73,7 +73,7 @@ public class ParserCacheSerializer {
Map<String, String[][]> sentence_parseObject = (Map<String, String[][]>)
objectToSerialize;
List<String> keys = new ArrayList<>(sentence_parseObject.keySet());
try (CSVWriter writer = new CSVWriter(
- new FileWriter(RESOURCE_DIR + parseCacheFileNameCSV, false))) {
+ new FileWriter(RESOURCE_DIR + PARSE_CACHE_FILE_NAME_CSV,
false))) {
for (String k : keys) {
String[][] triplet = sentence_parseObject.get(k);
writer.writeNext(new String[] { k });
@@ -89,8 +89,8 @@ public class ParserCacheSerializer {
}
public static Object readObject() {
- if (javaObjectSerialization) {
- String filename = RESOURCE_DIR + parseCacheFileName;
+ if (JAVA_OBJECT_SERIALIZATION) {
+ String filename = RESOURCE_DIR + PARSE_CACHE_FILE_NAME;
Object data = null;
try (FileInputStream fis = new FileInputStream(filename);
ObjectInputStream in = new ObjectInputStream(fis)) {
@@ -106,10 +106,10 @@ public class ParserCacheSerializer {
List<String[]> lines;
try (CSVReader reader = new CSVReader(new FileReader(RESOURCE_DIR
- + parseCacheFileNameCSV), ',')) {
+ + PARSE_CACHE_FILE_NAME_CSV), ',')) {
lines = reader.readAll();
} catch (FileNotFoundException e) {
- if (javaObjectSerialization)
+ if (JAVA_OBJECT_SERIALIZATION)
System.err.println("Cannot find cache file");
return null;
} catch (IOException ioe) {
diff --git
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
index 3659ad1..e2bb275 100644
---
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
+++
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
@@ -54,7 +54,8 @@ import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.Span;
public class ParserChunker2MatcherProcessor {
- protected static final int MIN_SENTENCE_LENGTH = 10;
+
+ static final int MIN_SENTENCE_LENGTH = 10;
private static final String MODEL_DIR_KEY = "nlp.models.dir";
// TODO config
// this is where resources should live
@@ -67,7 +68,7 @@ public class ParserChunker2MatcherProcessor {
private POSTagger posTagger;
private Parser parser;
private ChunkerME chunker;
- private final int NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS = 5;
+ private static final int NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS = 5;
private static final Logger LOG =
Logger.getLogger("opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor");
private Map<String, String[][]> sentence_parseObject;
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
index c28b17d..9d202a2 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
@@ -81,7 +81,7 @@ public class FeaturesExtractor {
for (int i = 0; i < wordToDisambiguate.getSentence().length; i++) {
if (wordToDisambiguate.getLemmas() != null) {
- if (!WSDHelper.stopWords.contains(wordToDisambiguate.getSentence()[i]
+ if (!WSDHelper.STOP_WORDS.contains(wordToDisambiguate.getSentence()[i]
.toLowerCase()) && (wordToDisambiguate.getWordIndex() != i)) {
String lemma = wordToDisambiguate.getLemmas()[i].toLowerCase()
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
index a54cf2e..7cc7015 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
@@ -50,7 +50,7 @@ public class IMSWSDContextGenerator implements
WSDContextGenerator {
for (int i = 0; i < toks.length; i++) {
if (lemmas != null) {
- if (!WSDHelper.stopWords.contains(toks[i].toLowerCase()) && (index
+ if (!WSDHelper.STOP_WORDS.contains(toks[i].toLowerCase()) && (index
!= i)) {
String lemma = lemmas[i].toLowerCase().replaceAll("[^a-z_]", "")
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
index d37162c..8c52c9d 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
@@ -41,7 +41,7 @@ public class OSCCWSDContextGenerator implements
WSDContextGenerator {
for (int i = 0; i < toks.length; i++) {
if (lemmas != null) {
- if (!WSDHelper.stopWords.contains(toks[i].toLowerCase()) && (index
+ if (!WSDHelper.STOP_WORDS.contains(toks[i].toLowerCase()) && (index
!= i)) {
String lemma = lemmas[i].toLowerCase().replaceAll("[^a-z_]", "")
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
index da1cd3e..2dbf7d7 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
@@ -35,6 +35,7 @@ import net.sf.extjwnl.JWNLException;
import net.sf.extjwnl.data.POS;
import net.sf.extjwnl.dictionary.Dictionary;
import net.sf.extjwnl.dictionary.MorphologicalProcessor;
+
import opennlp.tools.cmdline.postag.POSModelLoader;
import opennlp.tools.lemmatizer.DictionaryLemmatizer;
import opennlp.tools.postag.POSTaggerME;
@@ -62,22 +63,21 @@ public class WSDHelper {
private static Map<String, Object> nonRelevWordsDef;
// List of all the PoS tags
- public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ",
+ public static final String[] ALL_POS = { "CC", "CD", "DT", "EX", "FW", "IN",
"JJ",
"JJR", "JJS", "LS", "MD", "NN", "NNS", "NNP", "NNPS", "PDT", "POS",
"PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH", "VB", "VBD",
"VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB" };
// List of the PoS tags of which the senses are to be extracted
- public static final String[] relevantPOS = { "JJ", "JJR", "JJS", "NN",
"NNS", "RB",
+ public static final String[] RELEVANT_POS = { "JJ", "JJR", "JJS", "NN",
"NNS", "RB",
"RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" };
// List of Negation Words
- public static List<String> negationWords = new ArrayList<>(
- Arrays.asList("not", "no", "never", "none", "nor", "non"));
+ public static final List<String> NEGATION_WORDS = Arrays.asList("not", "no",
"never", "none", "nor", "non");
// List of Stop Words
- public static final List<String> stopWords = new ArrayList<>(
- Arrays.asList("a", "able", "about", "above", "according", "accordingly",
+ public static final List<String> STOP_WORDS = Arrays.asList(
+ "a", "able", "about", "above", "according", "accordingly",
"across", "actually", "after", "afterwards", "again", "against",
"ain't", "all", "allow", "allows", "almost", "alone", "along",
"already", "also", "although", "always", "am", "among", "amongst",
@@ -155,12 +155,12 @@ public class WSDHelper {
"who", "whoever", "whole", "whom", "who's", "whose", "why", "will",
"willing", "wish", "with", "within", "without", "wonder", "won't",
"would", "wouldn't", "yes", "yet", "you", "you'd", "you'll", "your",
- "you're", "yours", "yourself", "yourselves", "you've", "zero"));
+ "you're", "yours", "yourself", "yourselves", "you've", "zero");
public static Map<String, Object> getRelvCache() {
if (relvCache == null || relvCache.keySet().isEmpty()) {
relvCache = new HashMap<>();
- for (String t : relevantPOS) {
+ for (String t : RELEVANT_POS) {
relvCache.put(t, null);
}
}
@@ -170,7 +170,7 @@ public class WSDHelper {
public static Map<String, Object> getStopCache() {
if (stopCache == null || stopCache.keySet().isEmpty()) {
stopCache = new HashMap<>();
- for (String s : stopWords) {
+ for (String s : STOP_WORDS) {
stopCache.put(s, null);
}
}
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
index ad17dc7..d8667d2 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
@@ -27,7 +27,7 @@ import opennlp.tools.util.ObjectStream;
public class WSDSampleStream extends FilterObjectStream<String, WSDSample> {
- private static final Logger logger =
Logger.getLogger(WSDSampleStream.class.getName());
+ private static final Logger LOG =
Logger.getLogger(WSDSampleStream.class.getName());
/**
* Initializes the current instance.
@@ -60,8 +60,8 @@ public class WSDSampleStream extends
FilterObjectStream<String, WSDSample> {
sample = WSDSample.parse(sentence);
} catch (InvalidFormatException e) {
- if (logger.isLoggable(Level.WARNING)) {
- logger
+ if (LOG.isLoggable(Level.WARNING)) {
+ LOG
.warning("Error during parsing, ignoring sentence: " + sentence);
}
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
index 9bf8d7f..c8aa549 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
@@ -36,7 +36,7 @@ public class WSDisambiguatorME extends WSDisambiguator {
protected WSDModel model;
- protected static final WSDContextGenerator cg = new IMSWSDContextGenerator();
+ protected static final WSDContextGenerator CONTEXT_GENERATOR = new
IMSWSDContextGenerator();
public WSDisambiguatorME(WSDParameters params) {
this.params = params;
@@ -78,7 +78,7 @@ public class WSDisambiguatorME extends WSDisambiguator {
wordTag = sample.getTargetWordTag();
do {
String sense = sample.getSenseIDs()[0];
- String[] context = cg
+ String[] context = CONTEXT_GENERATOR
.getContext(sample, ((WSDDefaultParameters) params).ngram,
((WSDDefaultParameters) params).windowSize, surroundingContext);
Event ev = new Event(sense + "", context);
@@ -136,7 +136,7 @@ public class WSDisambiguatorME extends WSDisambiguator {
String outcome;
- String[] context = cg
+ String[] context = CONTEXT_GENERATOR
.getContext(sample, ((WSDDefaultParameters) this.params).ngram,
((WSDDefaultParameters) this.params).windowSize,
this.model.getContextEntries());
@@ -162,7 +162,7 @@ public class WSDisambiguatorME extends WSDisambiguator {
} else {
String outcome;
- String[] context = cg
+ String[] context = CONTEXT_GENERATOR
.getContext(sample, ((WSDDefaultParameters) this.params).ngram,
((WSDDefaultParameters) this.params).windowSize,
this.model.getContextEntries());
diff --git
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java
index fca0d07..524b420 100644
---
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java
+++
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java
@@ -46,7 +46,7 @@ import edu.mit.jwi.RAMDictionary;
public class WordRelationshipDetermination {
private final IDictionary dictionary;
- private static final String dictionaryFile = "/wordnet/dict";
+ private static final String DICTIONARY_FILE = "/wordnet/dict";
private static final int MAX_DIST_MED_REL = 1000;
private final Pointer[] rels = {Pointer.ANTONYM, Pointer.HYPERNYM,
Pointer.HYPONYM, Pointer.MERONYM_PART,
@@ -55,7 +55,7 @@ public class WordRelationshipDetermination {
private final Hashtable<ISynset, List<IWord>> synsetWordCache = new
Hashtable<>();
public WordRelationshipDetermination() throws Exception {
- dictionary = new
RAMDictionary(WordRelationshipDetermination.class.getResource(dictionaryFile),
ILoadPolicy.IMMEDIATE_LOAD);
+ dictionary = new
RAMDictionary(WordRelationshipDetermination.class.getResource(DICTIONARY_FILE),
ILoadPolicy.IMMEDIATE_LOAD);
((RAMDictionary)dictionary).load();
openDict();
}
diff --git
a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
index 6b23dd9..e6eca05 100644
--- a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
+++ b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
@@ -42,11 +42,11 @@ public class MetaSummarizer {
private final DocProcessor dp;
private final TextRankSummarizer textRank;
private final LexicalChainingSummarizer lcs;
- private static final String sentFragModel = "/en-sent.bin";
+ private static final String SENT_FRAG_MODEL = "/en-sent.bin";
public MetaSummarizer(String posModelFile) throws Exception {
Logger.getAnonymousLogger().info("Initializing Meta Summarizer");
- dp = new
DefaultDocProcessor(MetaSummarizer.class.getResourceAsStream(sentFragModel));
+ dp = new
DefaultDocProcessor(MetaSummarizer.class.getResourceAsStream(SENT_FRAG_MODEL));
textRank = new TextRankSummarizer();
lcs = new LexicalChainingSummarizer(dp, new FileInputStream(posModelFile));
}
diff --git
a/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java
b/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java
index 7825e58..c54f76e 100755
---
a/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java
+++
b/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java
@@ -142,7 +142,6 @@ public class DefaultDocProcessor implements DocProcessor {
//List of sentences form a document
public List<Sentence> docToSentList(String fileName) {
List<Sentence> sentList = new ArrayList<>();
- StringBuilder docBuffer = new StringBuilder();
try (LineNumberReader lnr = new LineNumberReader(new
FileReader(fileName))) {
String nextLine;
@@ -168,7 +167,6 @@ public class DefaultDocProcessor implements DocProcessor {
}
}
- String doc = docBuffer.toString();
} catch (Exception ex) {
Logger.getLogger(DefaultDocProcessor.class.getName()).log(Level.SEVERE,
null, ex);
}
diff --git
a/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java
b/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java
index b068637..b6072eb 100755
--- a/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java
+++ b/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java
@@ -53,7 +53,7 @@ public class TextRank {
// private Hashtable<Integer, String[]> wordsInSent;
// DAMPING FACTOR..
- private static final double df = 0.15;
+ private static final double DF = 0.15;
private static final boolean HIGHER_TITLE_WEIGHT = true;
private static final double TITLE_WRD_WT = 2d;
@@ -150,7 +150,7 @@ public class TextRank {
sum += wij / sigmawjk * txtRnkj;
}
}
- ns.setScore((1d - df) + sum * df);// * rs.score
+ ns.setScore((1d - DF) + sum * DF);// * rs.score
totErr += ns.getScore() - getScoreFrom(rawScores, sentId);
newWtScores.add(ns);
}
diff --git
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainTest.java
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainTest.java
index 72804cb..aa01361 100644
---
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainTest.java
+++
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainTest.java
@@ -33,7 +33,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
class LexChainTest {
- private static final String article =
+ private static final String ARTICLE =
"US President Barack Obama has welcomed an agreement between the US and
Russia under which Syria's chemical weapons must be destroyed or removed by
mid-2014 as an \"important step\"."
+ "But a White House statement cautioned that the US expected Syria
to live up to its public commitments. "
+ "The US-Russian framework document stipulates that Syria must
provide details of its stockpile within a week. "
@@ -52,14 +52,14 @@ class LexChainTest {
@Test
void testBuildLexicalChains() {
- List<Sentence> sent = dp.getSentencesFromStr(article);
+ List<Sentence> sent = dp.getSentencesFromStr(ARTICLE);
assertNotNull(sent);
- List<LexicalChain> vh = lcs.buildLexicalChains(article, sent);
+ List<LexicalChain> vh = lcs.buildLexicalChains(ARTICLE, sent);
assertNotNull(vh);
Collections.sort(vh);
assertTrue(vh.size() > 0);
- List<Sentence> s = dp.getSentencesFromStr(article);
+ List<Sentence> s = dp.getSentencesFromStr(ARTICLE);
Hashtable<String, Boolean> comp = new Hashtable<>();
for (int i = vh.size() - 1; i >= Math.max(vh.size() - 50, 0); i--) {
diff --git
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java
index 24cac18..1bb476a 100644
---
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java
+++
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java
@@ -30,7 +30,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
class LexChainingKeywordExtractorTest {
- private static final String article =
+ private static final String ARTICLE =
"US President Barack Obama has welcomed an agreement between the US and
Russia under which Syria's chemical weapons must be destroyed or removed by
mid-2014 as an \"important step\"."
+ "But a White House statement cautioned that the US expected Syria
to live up to its public commitments. "
+ "The US-Russian framework document stipulates that Syria must
provide details of its stockpile within a week. "
@@ -49,8 +49,8 @@ class LexChainingKeywordExtractorTest {
@Test
void testGetKeywords() {
- List<Sentence> sent = dp.getSentencesFromStr(article);
- List<LexicalChain> vh = lcs.buildLexicalChains(article, sent);
+ List<Sentence> sent = dp.getSentencesFromStr(ARTICLE);
+ List<LexicalChain> vh = lcs.buildLexicalChains(ARTICLE, sent);
LexChainingKeywordExtractor ke = new LexChainingKeywordExtractor();
List<String> keywords = ke.getKeywords(vh, 5);
assertNotNull(keywords);