This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch constant_fields_shall_be_named_according_to_jnc in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
commit 1efb0db5ddc9cde40b5211f161f3c4877e2611c2 Author: Martin Wiesner <[email protected]> AuthorDate: Thu Feb 23 14:25:15 2023 +0100 Name constants according to JNC - adjusts non-compliant naming of constant fields so that capitalized name is used - adjusts visibility of some fields to respect information hiding principle - fixes incorrect indentation in some classes - removes unused imports along the path --- .../utils/cfg/ProbabilisticContextFreeGrammar.java | 30 +++--- .../tools/coref/mention/AbstractMentionFinder.java | 6 +- .../opennlp/tools/coref/mention/DefaultParse.java | 12 +-- .../tools/coref/mention/JWNLDictionary.java | 4 +- .../opennlp/tools/coref/mention/PTBHeadFinder.java | 18 ++-- .../tools/coref/resolver/MaxentResolver.java | 19 ++-- .../coref/resolver/PluralPronounResolver.java | 2 +- .../tools/coref/resolver/ResolverUtils.java | 52 +++++----- .../coref/resolver/SingularPronounResolver.java | 4 +- .../coref/resolver/SpeechPronounResolver.java | 2 +- .../java/opennlp/tools/coref/sim/GenderModel.java | 6 +- .../java/opennlp/tools/coref/sim/NumberModel.java | 4 +- .../formats/muc/MucMentionInserterStream.java | 4 +- .../apps/object_dedup/SimilarityAccessorBase.java | 44 ++++----- .../tools/apps/relevanceVocabs/SentimentVocab.java | 4 +- .../tools/apps/utils/email/EmailSender.java | 14 +-- .../ClassifierTrainingSetIndexer.java | 24 ++--- .../tools/doc_classifier/DocClassifier.java | 37 +++---- ...cClassifierTrainingSetMultilingualExtender.java | 9 +- .../enron_email_recognizer/EmailNormalizer.java | 12 +-- .../EmailTrainingSetFormer.java | 17 ++-- .../main/java/opennlp/tools/nl2code/NL2Obj.java | 4 +- .../opennlp/tools/nl2code/NL2ObjCreateAssign.java | 18 ++-- .../tools/parse_thicket/apps/WebPageExtractor.java | 4 +- .../parse_thicket/matching/LemmaGeneralizer.java | 78 +++++++-------- .../tools/similarity/apps/ContentGenerator.java | 25 +++-- .../apps/GeneratedSentenceProcessor.java | 9 +- .../similarity/apps/RelatedSentenceFinder.java | 18 ++-- .../similarity/apps/RelatedSentenceFinderML.java | 2 +- .../similarity/apps/StoryDiscourseNavigator.java | 2 +- .../tools/similarity/apps/YahooAnswersMiner.java | 12 +-- .../apps/solr/IterativeQueryComponent.java | 8 +- .../apps/solr/NLProgram2CodeRequestHandler.java | 5 +- .../solr/SearchResultsReRankerRequestHandler.java | 5 +- .../apps/taxo_builder/DomainTaxonomyExtender.java | 10 +- .../taxo_builder/TaxoQuerySnapshotMatcher.java | 5 - .../taxo_builder/TaxonomyExtenderViaMebMining.java | 4 +- .../tools/similarity/apps/utils/FileHandler.java | 1 - .../tools/similarity/apps/utils/PageFetcher.java | 10 +- .../opennlp/tools/similarity/apps/utils/Utils.java | 108 ++++++++++----------- .../textsimilarity/ParseTreeChunkListScorer.java | 4 +- .../textsimilarity/SentencePairMatchResult.java | 5 +- .../tools/textsimilarity/TextProcessor.java | 4 +- .../chunker2matcher/ParserCacheSerializer.java | 20 ++-- .../ParserChunker2MatcherProcessor.java | 5 +- .../tools/disambiguator/FeaturesExtractor.java | 2 +- .../disambiguator/IMSWSDContextGenerator.java | 2 +- .../disambiguator/OSCCWSDContextGenerator.java | 2 +- .../opennlp/tools/disambiguator/WSDHelper.java | 18 ++-- .../tools/disambiguator/WSDSampleStream.java | 6 +- .../tools/disambiguator/WSDisambiguatorME.java | 8 +- .../WordRelationshipDetermination.java | 4 +- .../opennlp/summarization/meta/MetaSummarizer.java | 4 +- .../preprocess/DefaultDocProcessor.java | 2 - .../opennlp/summarization/textrank/TextRank.java | 4 +- .../lexicalchaining/LexChainTest.java | 8 +- .../LexChainingKeywordExtractorTest.java | 6 +- 57 files changed, 359 insertions(+), 397 deletions(-) diff --git a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java index 3a7d1d8..2a9d9e0 100644 --- a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java +++ b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java @@ -41,15 +41,15 @@ public class ProbabilisticContextFreeGrammar { private final String startSymbol; private final boolean randomExpansion; - private static final Rule emptyRule = new Rule("EMPTY~", ""); + private static final Rule EMPTY_RULE = new Rule("EMPTY~", ""); - private static final String nonTerminalMatcher = "[\\w\\~\\*\\-\\.\\,\\'\\:\\_\\\"]"; - private static final String terminalMatcher = "[\\*òàùìèé\\|\\w\\'\\.\\,\\:\\_Ù\\?È\\%\\;À\\-\\\"]"; + private static final String NON_TERMINAL_MATCHER = "[\\w\\~\\*\\-\\.\\,\\'\\:\\_\\\"]"; + private static final String TERMINAL_MATCHER = "[\\*òàùìèé\\|\\w\\'\\.\\,\\:\\_Ù\\?È\\%\\;À\\-\\\"]"; - private static final Pattern terminalPattern = Pattern.compile("\\(("+nonTerminalMatcher+"+)\\s("+terminalMatcher+"+)\\)"); - private static final Pattern nonTerminalPattern = Pattern.compile( - "\\(("+nonTerminalMatcher+"+)" + // source NT - "\\s("+nonTerminalMatcher+"+)((\\s"+nonTerminalMatcher+"+)*)\\)" // expansion NTs + private static final Pattern TERMINAL_PATTERN = Pattern.compile("\\(("+ NON_TERMINAL_MATCHER +"+)\\s("+ TERMINAL_MATCHER +"+)\\)"); + private static final Pattern NON_TERMINAL_PATTERN = Pattern.compile( + "\\(("+ NON_TERMINAL_MATCHER +"+)" + // source NT + "\\s("+ NON_TERMINAL_MATCHER +"+)((\\s"+ NON_TERMINAL_MATCHER +"+)*)\\)" // expansion NTs ); public ProbabilisticContextFreeGrammar(Collection<String> nonTerminalSymbols, Collection<String> terminalSymbols, @@ -255,7 +255,7 @@ public class ProbabilisticContextFreeGrammar { @Override public String toString() { - if (getRule() != emptyRule) { + if (getRule() != EMPTY_RULE) { return "(" + (rule != null ? rule.getEntry() : null) + " " + (leftTree != null && rightTree != null ? @@ -295,10 +295,10 @@ public class ProbabilisticContextFreeGrammar { Collection<String> nonTerminals = new HashSet<>(); Collection<String> terminals = new HashSet<>(); - rules.put(emptyRule, 1d); - rulesMap.put(emptyRule, 1d); - nonTerminals.add(emptyRule.getEntry()); - terminals.add(emptyRule.getExpansion()[0]); + rules.put(EMPTY_RULE, 1d); + rulesMap.put(EMPTY_RULE, 1d); + nonTerminals.add(EMPTY_RULE.getEntry()); + terminals.add(EMPTY_RULE.getExpansion()[0]); for (String parseTreeString : parseStrings) { @@ -308,7 +308,7 @@ public class ProbabilisticContextFreeGrammar { String toConsume = String.valueOf(parseTreeString); - Matcher m = terminalPattern.matcher(parseTreeString); + Matcher m = TERMINAL_PATTERN.matcher(parseTreeString); while (m.find()) { String nt = m.group(1); String t = m.group(2); @@ -321,7 +321,7 @@ public class ProbabilisticContextFreeGrammar { } while (toConsume.contains(" ") && !toConsume.trim().equals("( " + startSymbol + " )")) { - Matcher m2 = nonTerminalPattern.matcher(toConsume); + Matcher m2 = NON_TERMINAL_PATTERN.matcher(toConsume); while (m2.find()) { String nt = m2.group(1); String t1 = m2.group(2); @@ -370,7 +370,7 @@ public class ProbabilisticContextFreeGrammar { if (!terminals.contains(firstExpansion)) { if (nonTerminals.contains(firstExpansion)) { // nt1 -> nt2 should be expanded in nt1 -> nt2,E - Rule newRule = new Rule(rule.getEntry(), firstExpansion, emptyRule.getEntry()); + Rule newRule = new Rule(rule.getEntry(), firstExpansion, EMPTY_RULE.getEntry()); addRule(newRule, rulesMap); } else { throw new RuntimeException("rule "+rule+" expands to neither a terminal or non terminal"); diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java index a574e50..43f1ce6 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java @@ -187,9 +187,9 @@ public abstract class AbstractMentionFinder implements MentionFinder { } private boolean handledPronoun(String tok) { - return ResolverUtils.singularThirdPersonPronounPattern.matcher(tok).find() || - ResolverUtils.pluralThirdPersonPronounPattern.matcher(tok).find() || - ResolverUtils.speechPronounPattern.matcher(tok).find(); + return ResolverUtils.SINGULAR_THIRD_PERSON_PRONOUN_PATTERN.matcher(tok).find() || + ResolverUtils.PLURAL_THIRD_PERSON_PRONOUN_PATTERN.matcher(tok).find() || + ResolverUtils.SPEECH_PRONOUN_PATTERN.matcher(tok).find(); } private void collectPossesivePronouns(Parse np, List<Mention> entities) { diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java index 114b417..725a213 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/DefaultParse.java @@ -41,7 +41,7 @@ public class DefaultParse extends AbstractParse { private final Parse parse; private final int sentenceNumber; - private static final Set<String> entitySet = new HashSet<>(Arrays.asList(NAME_TYPES)); + private static final Set<String> ENTITY_SET = new HashSet<>(Arrays.asList(NAME_TYPES)); /** * Initializes the current instance. @@ -65,7 +65,7 @@ public class DefaultParse extends AbstractParse { List<Parse> kids = new LinkedList<>(Arrays.asList(parse.getChildren())); while (kids.size() > 0) { Parse p = kids.remove(0); - if (entitySet.contains(p.getType())) { + if (ENTITY_SET.contains(p.getType())) { names.add(p); } else { @@ -83,7 +83,7 @@ public class DefaultParse extends AbstractParse { List<Parse> kids = new ArrayList<>(Arrays.asList(parse.getChildren())); for (int ci = 0; ci < kids.size(); ci++) { Parse kid = kids.get(ci); - if (entitySet.contains(kid.getType())) { + if (ENTITY_SET.contains(kid.getType())) { kids.remove(ci); kids.addAll(ci, Arrays.asList(kid.getChildren())); ci--; @@ -108,7 +108,7 @@ public class DefaultParse extends AbstractParse { } public String getSyntacticType() { - if (entitySet.contains(parse.getType())) { + if (ENTITY_SET.contains(parse.getType())) { return null; } else if (parse.getType().contains("#")) { @@ -130,7 +130,7 @@ public class DefaultParse extends AbstractParse { } public String getEntityType() { - if (entitySet.contains(parse.getType())) { + if (ENTITY_SET.contains(parse.getType())) { return parse.getType(); } else { @@ -165,7 +165,7 @@ public class DefaultParse extends AbstractParse { // the type can be extracted from. Then it just depends // on the training data and not the values inside NAME_TYPES. - if (entitySet.contains(parse.getType())) { + if (ENTITY_SET.contains(parse.getType())) { return true; } else { diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java index ae5b869..b6cdb79 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/JWNLDictionary.java @@ -53,7 +53,7 @@ public class JWNLDictionary implements Dictionary { private final net.didion.jwnl.dictionary.Dictionary dict; private MorphologicalProcessor morphy; - private static final String[] empty = new String[0]; + private static final String[] EMPTY = new String[0]; public JWNLDictionary(String searchDirectory) throws IOException, JWNLException { PointerType.initialize(); @@ -162,7 +162,7 @@ public class JWNLDictionary implements Dictionary { return parents.toArray(new String[parents.size()]); } else { - return empty; + return EMPTY; } } catch (JWNLException e) { diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java index 4ce1982..dbbd25d 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java @@ -28,15 +28,15 @@ import java.util.Set; public final class PTBHeadFinder implements HeadFinder { private static PTBHeadFinder instance; - private static final Set<String> skipSet = new HashSet<>(); + private static final Set<String> SKIP_SET = new HashSet<>(); static { - skipSet.add("POS"); - skipSet.add(","); - skipSet.add(":"); - skipSet.add("."); - skipSet.add("''"); - skipSet.add("-RRB-"); - skipSet.add("-RCB-"); + SKIP_SET.add("POS"); + SKIP_SET.add(","); + SKIP_SET.add(":"); + SKIP_SET.add("."); + SKIP_SET.add("''"); + SKIP_SET.add("-RRB-"); + SKIP_SET.add("-RCB-"); } private PTBHeadFinder() {} @@ -133,7 +133,7 @@ public final class PTBHeadFinder implements HeadFinder { } for (int ti = toks.size() - tokenCount - 1; ti >= 0; ti--) { Parse tok = toks.get(ti); - if (!skipSet.contains(tok.getSyntacticType())) { + if (!SKIP_SET.contains(tok.getSyntacticType())) { return ti; } } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java index 8672cb2..f0ead35 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java @@ -52,8 +52,7 @@ public abstract class MaxentResolver extends AbstractResolver { /** Default feature value. */ public static final String DEFAULT = "default"; - - private static final boolean debugOn = false; + private static final boolean DEBUG = false; private String modelName; private MaxentModel model; @@ -84,7 +83,7 @@ public abstract class MaxentResolver extends AbstractResolver { /** The model for computing non-referential probabilities. */ protected NonReferentialResolver nonReferentialResolver; - private static final String modelExtension = ".bin.gz"; + private static final String MODEL_EXTENSION = ".bin.gz"; /** * Creates a maximum-entropy-based resolver which will look the specified number of @@ -123,7 +122,7 @@ public abstract class MaxentResolver extends AbstractResolver { this.modelName = modelDirectory + "/" + name; if (ResolverMode.TEST == this.mode) { try (DataInputStream dis = new DataInputStream( - new BufferedInputStream(new FileInputStream(modelName + modelExtension)))) { + new BufferedInputStream(new FileInputStream(modelName + MODEL_EXTENSION)))) { model = new BinaryGISModelReader(dis).getModel(); } sameIndex = model.getIndex(SAME); @@ -181,7 +180,7 @@ public abstract class MaxentResolver extends AbstractResolver { DiscourseEntity de; int ei = 0; double nonReferentialProbability = nonReferentialResolver.getNonReferentialProbability(ec); - if (debugOn) { + if (DEBUG) { System.err.println(this + ".resolve: " + ec.toText() + " -> " + "null " + nonReferentialProbability); } for (; ei < getNumEntities(dm); ei++) { @@ -191,7 +190,7 @@ public abstract class MaxentResolver extends AbstractResolver { } if (excluded(ec, de)) { candProbs[ei] = 0; - if (debugOn) { + if (DEBUG) { System.err.println("excluded " + this + ".resolve: " + ec.toText() + " -> " + de + " " + candProbs[ei]); } @@ -206,7 +205,7 @@ public abstract class MaxentResolver extends AbstractResolver { catch (ArrayIndexOutOfBoundsException e) { candProbs[ei] = 0; } - if (debugOn) { + if (DEBUG) { System.err.println(this + ".resolve: " + ec.toText() + " -> " + de + " (" + ec.getGender() + "," + de.getGender() + ") " + candProbs[ei] + " " + lfeatures); } @@ -284,7 +283,7 @@ public abstract class MaxentResolver extends AbstractResolver { List<String> features = getFeatures(mention, cde); //add Event to Model - if (debugOn) { + if (DEBUG) { System.err.println(this + ".retain: " + mention.getId() + " " + mention.toText() + " -> " + entityMention.getId() + " " + cde); } @@ -339,7 +338,7 @@ public abstract class MaxentResolver extends AbstractResolver { @Override public void train() throws IOException { if (ResolverMode.TRAIN == mode) { - if (debugOn) { + if (DEBUG) { System.err.println(this + " referential"); FileWriter writer = new FileWriter(modelName + ".events"); for (Event e : events) { @@ -353,7 +352,7 @@ public abstract class MaxentResolver extends AbstractResolver { GISTrainer trainer = new GISTrainer(); trainer.init(params, null); GISModel trainedModel = trainer.trainModel(ObjectStreamUtils.createObjectStream(events)); - new BinaryGISModelWriter(trainedModel, new File(modelName + modelExtension)).persist(); + new BinaryGISModelWriter(trainedModel, new File(modelName + MODEL_EXTENSION)).persist(); nonReferentialResolver.train(); } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java index 2592316..f6534f3 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java @@ -90,6 +90,6 @@ public class PluralPronounResolver extends MaxentResolver { public boolean canResolve(MentionContext mention) { String tag = mention.getHeadTokenTag(); return (tag != null && tag.startsWith("PRP") - && ResolverUtils.pluralThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches()); + && ResolverUtils.PLURAL_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches()); } } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java index b63627d..2cd657a 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java @@ -39,49 +39,49 @@ import opennlp.tools.coref.sim.TestSimilarityModel; public class ResolverUtils { private static final Pattern ENDS_WITH_PERIOD = Pattern.compile("\\.$"); - private static final Pattern initialCaps = Pattern.compile("^[A-Z]"); + private static final Pattern INITIAL_CAPS = Pattern.compile("^[A-Z]"); /** Regular expression for English singular third-person pronouns. */ - public static final Pattern singularThirdPersonPronounPattern = + public static final Pattern SINGULAR_THIRD_PERSON_PRONOUN_PATTERN = Pattern.compile("^(he|she|it|him|her|his|hers|its|himself|herself|itself)$",Pattern.CASE_INSENSITIVE); /** Regular expression for English plural third-person pronouns. */ - public static final Pattern pluralThirdPersonPronounPattern = + public static final Pattern PLURAL_THIRD_PERSON_PRONOUN_PATTERN = Pattern.compile("^(they|their|theirs|them|themselves)$",Pattern.CASE_INSENSITIVE); /** Regular expression for English speech pronouns. */ - public static final Pattern speechPronounPattern = + public static final Pattern SPEECH_PRONOUN_PATTERN = Pattern.compile("^(I|me|my|you|your|you|we|us|our|ours)$",Pattern.CASE_INSENSITIVE); /** Regular expression for English female pronouns. */ - public static final Pattern femalePronounPattern = + public static final Pattern FEMALE_PRONOUN_PATTERN = Pattern.compile("^(she|her|hers|herself)$",Pattern.CASE_INSENSITIVE); /** Regular expression for English neuter pronouns. */ - public static final Pattern neuterPronounPattern = + public static final Pattern NEUTER_PRONOUN_PATTERN = Pattern.compile("^(it|its|itself)$",Pattern.CASE_INSENSITIVE); /** Regular expression for English first-person pronouns. */ - public static final Pattern firstPersonPronounPattern = + public static final Pattern FIRST_PERSON_PRONOUN_PATTERN = Pattern.compile("^(I|me|my|we|our|us|ours)$",Pattern.CASE_INSENSITIVE); /** Regular expression for English singular second-person pronouns. */ - public static final Pattern secondPersonPronounPattern = + public static final Pattern SECOND_PERSON_PRONOUN_PATTERN = Pattern.compile("^(you|your|yours)$",Pattern.CASE_INSENSITIVE); /** Regular expression for English third-person pronouns. */ - public static final Pattern thirdPersonPronounPattern = + public static final Pattern THIRD_PERSON_PRONOUN_PATTERN = Pattern.compile("^(he|she|it|him|her|his|hers|its|himself|herself|itself|they|" + "their|theirs|them|themselves)$",Pattern.CASE_INSENSITIVE); /** Regular expression for English singular pronouns. */ - public static final Pattern singularPronounPattern = + public static final Pattern SINGULAR_PRONOUN_PATTERN = Pattern.compile("^(I|me|my|he|she|it|him|her|his|hers|its|himself|herself|itself)$", Pattern.CASE_INSENSITIVE); /** Regular expression for English plural pronouns. */ - public static final Pattern pluralPronounPattern = + public static final Pattern PLURAL_PRONOUN_PATTERN = Pattern.compile("^(we|us|our|ours|they|their|theirs|them|themselves)$", Pattern.CASE_INSENSITIVE); /** Regular expression for English male pronouns. */ - public static final Pattern malePronounPattern = + public static final Pattern MALE_PRONOUN_PATTERN = Pattern.compile("^(he|him|his|himself)$",Pattern.CASE_INSENSITIVE); /** Regular expression for English honorifics. */ - public static final Pattern honorificsPattern = + public static final Pattern HONORIFICS_PATTERN = Pattern.compile("[A-Z][a-z]+\\.$|^[A-Z][b-df-hj-np-tv-xz]+$"); /** Regular expression for English corporate designators. */ - public static final Pattern designatorsPattern = + public static final Pattern DESIGNATORS_PATTERN = Pattern.compile("[a-z]\\.$|^[A-Z][b-df-hj-np-tv-xz]+$|^Co(rp)?$"); @@ -185,7 +185,7 @@ public class ResolverUtils { Object[] mtokens = ec.getTokens(); for (Object mtoken : mtokens) { String token = mtoken.toString(); - if (!honorificsPattern.matcher(token).matches()) { + if (!HONORIFICS_PATTERN.matcher(token).matches()) { if (!first) { sb.append(" "); } @@ -387,7 +387,7 @@ public class ResolverUtils { } if (start + 1 != end) { // don't do this on head words, to keep "U.S." //strip off honorifics in begining - if (honorificsPattern.matcher(mtokens[start].toString()).find()) { + if (HONORIFICS_PATTERN.matcher(mtokens[start].toString()).find()) { start++; } if (start == end) { @@ -395,7 +395,7 @@ public class ResolverUtils { return null; } //strip off and honerifics on the end - if (designatorsPattern.matcher(mtokens[mtokens.length - 1].toString()).find()) { + if (DESIGNATORS_PATTERN.matcher(mtokens[mtokens.length - 1].toString()).find()) { end--; } } @@ -415,7 +415,7 @@ public class ResolverUtils { for (Iterator<MentionContext> ei = de.getMentions(); ei.hasNext();) { MentionContext xec = ei.next(); String xecHeadTag = xec.getHeadTokenTag(); - if (xecHeadTag.startsWith("NNP") || initialCaps.matcher(xec.getHeadTokenText()).find()) { + if (xecHeadTag.startsWith("NNP") || INITIAL_CAPS.matcher(xec.getHeadTokenText()).find()) { return xec; } } @@ -424,19 +424,19 @@ public class ResolverUtils { private static Map<String, String> getPronounFeatureMap(String pronoun) { Map<String, String> pronounMap = new HashMap<>(); - if (malePronounPattern.matcher(pronoun).matches()) { + if (MALE_PRONOUN_PATTERN.matcher(pronoun).matches()) { pronounMap.put("gender","male"); } - else if (femalePronounPattern.matcher(pronoun).matches()) { + else if (FEMALE_PRONOUN_PATTERN.matcher(pronoun).matches()) { pronounMap.put("gender","female"); } - else if (neuterPronounPattern.matcher(pronoun).matches()) { + else if (NEUTER_PRONOUN_PATTERN.matcher(pronoun).matches()) { pronounMap.put("gender","neuter"); } - if (singularPronounPattern.matcher(pronoun).matches()) { + if (SINGULAR_PRONOUN_PATTERN.matcher(pronoun).matches()) { pronounMap.put("number","singular"); } - else if (pluralPronounPattern.matcher(pronoun).matches()) { + else if (PLURAL_PRONOUN_PATTERN.matcher(pronoun).matches()) { pronounMap.put("number","plural"); } /* @@ -651,13 +651,13 @@ public class ResolverUtils { * @return the gender of the specified pronoun. */ public static String getPronounGender(String pronoun) { - if (malePronounPattern.matcher(pronoun).matches()) { + if (MALE_PRONOUN_PATTERN.matcher(pronoun).matches()) { return "m"; } - else if (femalePronounPattern.matcher(pronoun).matches()) { + else if (FEMALE_PRONOUN_PATTERN.matcher(pronoun).matches()) { return "f"; } - else if (neuterPronounPattern.matcher(pronoun).matches()) { + else if (NEUTER_PRONOUN_PATTERN.matcher(pronoun).matches()) { return "n"; } else { diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java index 459bf10..7cfd887 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java @@ -50,7 +50,7 @@ public class SingularPronounResolver extends MaxentResolver { //System.err.println("MaxentSingularPronounResolver.canResolve: ec= ("+mention.id+") "+ mention.toText()); String tag = mention.getHeadTokenTag(); return tag != null && tag.startsWith("PRP") - && ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches(); + && ResolverUtils.SINGULAR_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches(); } @Override @@ -111,7 +111,7 @@ public class SingularPronounResolver extends MaxentResolver { MentionContext entityMention = ei.next(); String tag = entityMention.getHeadTokenTag(); if (tag != null && tag.startsWith("PRP") - && ResolverUtils.singularThirdPersonPronounPattern.matcher(mention.getHeadTokenText()).matches()) { + && ResolverUtils.SINGULAR_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches()) { if (mentionGender == null) { //lazy initialization mentionGender = ResolverUtils.getPronounGender(mention.getHeadTokenText()); } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java index be4e5dc..b853b34 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java @@ -80,7 +80,7 @@ public class SpeechPronounResolver extends MaxentResolver { public boolean canResolve(MentionContext mention) { String tag = mention.getHeadTokenTag(); boolean fpp = tag != null && tag.startsWith("PRP") - && ResolverUtils.speechPronounPattern.matcher(mention.getHeadTokenText()).matches(); + && ResolverUtils.SPEECH_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches(); boolean pn = tag != null && tag.startsWith("NNP"); return (fpp || pn); } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java index 2c06836..c1ffb6e 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java @@ -138,13 +138,13 @@ public class GenderModel implements TestGenderModel, TrainSimilarityModel { * @return The heuristically determined gender or unknown. */ private GenderEnum getGender(Context mention) { - if (ResolverUtils.malePronounPattern.matcher(mention.getHeadTokenText()).matches()) { + if (ResolverUtils.MALE_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches()) { return GenderEnum.MALE; } - else if (ResolverUtils.femalePronounPattern.matcher(mention.getHeadTokenText()).matches()) { + else if (ResolverUtils.FEMALE_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches()) { return GenderEnum.FEMALE; } - else if (ResolverUtils.neuterPronounPattern.matcher(mention.getHeadTokenText()).matches()) { + else if (ResolverUtils.NEUTER_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches()) { return GenderEnum.NEUTER; } Object[] mtokens = mention.getTokens(); diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java index 2ee8481..d305e8e 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java @@ -91,10 +91,10 @@ public class NumberModel implements TestNumberModel, TrainSimilarityModel { } public NumberEnum getNumber(Context ec) { - if (ResolverUtils.singularPronounPattern.matcher(ec.getHeadTokenText()).matches()) { + if (ResolverUtils.SINGULAR_PRONOUN_PATTERN.matcher(ec.getHeadTokenText()).matches()) { return NumberEnum.SINGULAR; } - else if (ResolverUtils.pluralPronounPattern.matcher(ec.getHeadTokenText()).matches()) { + else if (ResolverUtils.PLURAL_PRONOUN_PATTERN.matcher(ec.getHeadTokenText()).matches()) { return NumberEnum.PLURAL; } else { diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java index 398a6f5..487816f 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java +++ b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java @@ -42,7 +42,7 @@ import opennlp.tools.util.Span; */ public class MucMentionInserterStream extends FilterObjectStream<RawCorefSample, CorefSample> { - private static final Set<String> entitySet = new HashSet<>(Arrays.asList(DefaultParse.NAME_TYPES)); + private static final Set<String> ENTITY_SET = new HashSet<>(Arrays.asList(DefaultParse.NAME_TYPES)); private final MentionFinder mentionFinder; @@ -95,7 +95,7 @@ public class MucMentionInserterStream extends FilterObjectStream<RawCorefSample, if (commonParent != null) { // Span mentionSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd()); - if (entitySet.contains(commonParent.getType())) { + if (ENTITY_SET.contains(commonParent.getType())) { commonParent.getParent().setType("NP#" + id); } else if (commonParent.getType().equals("NML")) { diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java index 7a79f18..16d9637 100755 --- a/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/object_dedup/SimilarityAccessorBase.java @@ -43,7 +43,7 @@ public class SimilarityAccessorBase { private List<String> namesBothSides; - protected static final String[] englishPrepositions = new String[] { "a", "aboard", "about", "above", "absent", + private static final String[] ENGLISH_PREPOSITIONS = new String[] { "a", "aboard", "about", "above", "absent", "across", "after", "against", "along", "alongside", "among", "around", "as", "at", "before", "behind", "below", "beneath", "between", "beyond", "but", "by", "despite", "down", "during", "except", "excluding", "failing", "following", "for", "from", "in", "including", "inside", "into", "like", "near", "next", "of", "off", "on", @@ -51,20 +51,20 @@ public class SimilarityAccessorBase { "thru", "till", "to", "toward", "under", "up", "upon", "versus", "with", "within", "you", "must", "know", "when" }; - protected final List<String> commonWordsInEventTitles = Arrays.asList(new String[] { "community", "party", "film", - "music", "exhibition", "kareoke", "guitar", "quartet", "reggae", "r&b", "band", "dj ", "piano", "pray", - "worship", "god", "training", "class", "development", "training", "class", "course", "our", "comedy", ",fun", - "musical", "group", "alliance", "session", "feeding", "introduction", "school", "conversation", "learning", - "nursery", "unity", "trivia", "chat", "conference", "tuition", "technology", "teen", "communication", - "reception", "management", "beginner", "beginning", "collabora", "reuninon", "political", "course", "age", - "ages", "through", "grade", "networking", "workshop", "demonstration", "tuning", "program", "summit", - "convention", "day", "night", "one", "two", "outfest", "three", "online", "writing", "seminar", "coach", - ",expo", "advanced", "beginner", "intermediate", "earn", "free", "ii", "iii", "skills", "skill", "artist", - "summer", "winter", "autumn", "spring", "camp", "vacation", "miscrosoft", "kid", "child", "kids", "children", - "every", "everyone", "dancer", "dancers", "senior", "seniors", "basic", "elementary", "outfest", "2008", - "2009", "2010", "2011", "2012", "monday", "tuesday", "wednesday", "thirsday", "friday", "saturday", "sunday", - "mondays", "tuesdays", "wednesdays", "thirsdays", "fridays", "saturdays", "sundays", "men" // ? - }); + private static final List<String> COMMON_WORDS_IN_EVENT_TITLES = Arrays.asList("community", "party", "film", + "music", "exhibition", "kareoke", "guitar", "quartet", "reggae", "r&b", "band", "dj ", "piano", "pray", + "worship", "god", "training", "class", "development", "training", "class", "course", "our", "comedy", ",fun", + "musical", "group", "alliance", "session", "feeding", "introduction", "school", "conversation", "learning", + "nursery", "unity", "trivia", "chat", "conference", "tuition", "technology", "teen", "communication", + "reception", "management", "beginner", "beginning", "collabora", "reuninon", "political", "course", "age", + "ages", "through", "grade", "networking", "workshop", "demonstration", "tuning", "program", "summit", + "convention", "day", "night", "one", "two", "outfest", "three", "online", "writing", "seminar", "coach", + ",expo", "advanced", "beginner", "intermediate", "earn", "free", "ii", "iii", "skills", "skill", "artist", + "summer", "winter", "autumn", "spring", "camp", "vacation", "miscrosoft", "kid", "child", "kids", "children", + "every", "everyone", "dancer", "dancers", "senior", "seniors", "basic", "elementary", "outfest", "2008", + "2009", "2010", "2011", "2012", "monday", "tuesday", "wednesday", "thirsday", "friday", "saturday", "sunday", + "mondays", "tuesdays", "wednesdays", "thirsdays", "fridays", "saturdays", "sundays", "men" // ? + ); private final BingQueryRunner webSearch = new BingQueryRunner(); @@ -257,7 +257,7 @@ public class SimilarityAccessorBase { if (word.length() < 2) // '-', '|', ':' break; - if (word.equals(word.toLowerCase()) && (!Arrays.asList(englishPrepositions).contains(word)) + if (word.equals(word.toLowerCase()) && (!Arrays.asList(ENGLISH_PREPOSITIONS).contains(word)) && word.length() > 3 && StringUtils.isAlphanumeric(word)) continue; // was return false; if (count > 3) @@ -275,7 +275,7 @@ public class SimilarityAccessorBase { // now iterate till next preposition towards the end of noun phrase for (String preposCand : ofList) { - if (Arrays.asList(englishPrepositions).contains(preposCand)) + if (Arrays.asList(ENGLISH_PREPOSITIONS).contains(preposCand)) break; results.add(preposCand); } @@ -403,8 +403,8 @@ public class SimilarityAccessorBase { { // all words should be the // same name1Tokens.removeAll(name2Tokens); - name1Tokens.removeAll(Arrays.asList(englishPrepositions)); - name1Tokens.removeAll(Arrays.asList(commonWordsInEventTitles)); + name1Tokens.removeAll(Arrays.asList(ENGLISH_PREPOSITIONS)); + name1Tokens.removeAll(Arrays.asList(COMMON_WORDS_IN_EVENT_TITLES)); if (name1Tokens.size() < 1) return true; @@ -430,7 +430,7 @@ public class SimilarityAccessorBase { name1Tokens.removeAll(name2Tokens); name2Tokens.removeAll(name1TokensClone); name1Tokens.addAll(name2Tokens); - name1Tokens.removeAll(Arrays.asList(englishPrepositions)); + name1Tokens.removeAll(Arrays.asList(ENGLISH_PREPOSITIONS)); // name1Tokens.removeAll(Arrays.asList(this.commonWordsInEventTitles)); if (name1Tokens.size() < 1) return true; @@ -539,8 +539,8 @@ public class SimilarityAccessorBase { name1Tokens.retainAll(name2Tokens); name1Tokens.removeAll(venueToks); - name1Tokens.removeAll(commonWordsInEventTitles); - name1Tokens.removeAll(Arrays.asList(englishPrepositions)); + name1Tokens.removeAll(COMMON_WORDS_IN_EVENT_TITLES); + name1Tokens.removeAll(Arrays.asList(ENGLISH_PREPOSITIONS)); name1Tokens = removeDollarWordAndNonAlphaFromList(name1Tokens); // todo : to use full string measure // boundary case: too many words => just do counts diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java index b277a23..daed37f 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java @@ -103,7 +103,7 @@ public class SentimentVocab { public static final int SENTIMENT_UNKNOWN = 0; public static final int SENTIMENT_NEGATIVE = -1; - private static final SentimentVocab instance = new SentimentVocab(); + private static final SentimentVocab INSTANCE = new SentimentVocab(); // complete sentiment word map, key = word, value = sentiment object private final Map<String, Sentiment> sentimentMap = new HashMap<>(); @@ -122,7 +122,7 @@ public class SentimentVocab { } public static SentimentVocab getInstance() { - return instance; + return INSTANCE; } public Sentiment getSentiment(String word) { diff --git a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java index b682086..c5388fa 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java @@ -41,9 +41,9 @@ import java.util.regex.Pattern; * @author GaDo */ public class EmailSender { - private static final Pattern pc = Pattern.compile("[^\\s]+@[^\\s]+.[^\\s]+"); + private static final Pattern EMAIL_PATTERN = Pattern.compile("[^\\s]+@[^\\s]+.[^\\s]+"); - private static final String mailboxAddress = "[email protected]"; + private static final String MAILBOX_ADDRESS = "[email protected]"; public boolean sendMail(String smtp, String user, String pass, InternetAddress from, InternetAddress[] to, InternetAddress[] cc, InternetAddress[] bcc, @@ -101,7 +101,7 @@ public class EmailSender { } Transport tr = session.getTransport("smtp"); - tr.connect(smtp, mailboxAddress, pass); + tr.connect(smtp, MAILBOX_ADDRESS, pass); message.saveChanges(); tr.sendMessage(message, message.getAllRecipients()); tr.close(); @@ -124,7 +124,7 @@ public class EmailSender { Matcher m; if(correct){ - m = pc.matcher(from.getAddress()); + m = EMAIL_PATTERN.matcher(from.getAddress()); correct = m.matches(); } @@ -133,7 +133,7 @@ public class EmailSender { while(correct && vault<to.length){ correct = !to[vault].getAddress().equals(""); if(correct){ - m = pc.matcher(to[vault].getAddress()); + m = EMAIL_PATTERN.matcher(to[vault].getAddress()); correct = m.matches(); } vault++; @@ -145,7 +145,7 @@ public class EmailSender { while(correct && vault<cc.length){ correct = !cc[vault].getAddress().equals(""); if(correct){ - m = pc.matcher(cc[vault].getAddress()); + m = EMAIL_PATTERN.matcher(cc[vault].getAddress()); correct = m.matches(); } vault++; @@ -157,7 +157,7 @@ public class EmailSender { while(correct && vault<bcc.length){ correct = !bcc[vault].getAddress().equals(""); if(correct){ - m = pc.matcher(bcc[vault].getAddress()); + m = EMAIL_PATTERN.matcher(bcc[vault].getAddress()); correct = m.matches(); } vault++; diff --git a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java index e8219fd..7fcd9ce 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/ClassifierTrainingSetIndexer.java @@ -36,20 +36,20 @@ import org.apache.tika.Tika; public class ClassifierTrainingSetIndexer { - public static final String resourceDir = new File(".").getAbsolutePath().replace("/.", "") + "/src/main/resources"; - public static final String INDEX_PATH = "/classif"; - public static final String CLASSIF_TRAINING_CORPUS_PATH = "/training_corpus"; + private static final String[] DOMAINS = new String[] { "legal", "health", "computing", "engineering", "business" }; + private static final String RESOURCE_DIR = new File(".").getAbsolutePath().replace("/.", "") + "/src/main/resources"; + static final String INDEX_PATH = "/classif"; + static final String CLASSIF_TRAINING_CORPUS_PATH = "/training_corpus"; protected final ArrayList<File> queue = new ArrayList<>(); - final Tika tika = new Tika(); + private final Tika tika = new Tika(); - IndexWriter indexWriter = null; - protected static final String[] domains = new String[] { "legal", "health", "computing", "engineering", "business" }; - private String absolutePathTrainingSet=null; + private IndexWriter indexWriter = null; + private String absolutePathTrainingSet = null; public ClassifierTrainingSetIndexer() { try { - initIndexWriter(resourceDir); + initIndexWriter(RESOURCE_DIR); } catch (Exception e) { e.printStackTrace(); } @@ -58,7 +58,7 @@ public class ClassifierTrainingSetIndexer { public ClassifierTrainingSetIndexer(String absolutePathTrainingSet) { this.absolutePathTrainingSet = absolutePathTrainingSet; try { - initIndexWriter(resourceDir); + initIndexWriter(RESOURCE_DIR); } catch (Exception e) { e.printStackTrace(); } @@ -68,7 +68,7 @@ public class ClassifierTrainingSetIndexer { try { indexFileOrDirectory(Objects.requireNonNullElseGet(absolutePathTrainingSet, - () -> resourceDir + CLASSIF_TRAINING_CORPUS_PATH)); + () -> RESOURCE_DIR + CLASSIF_TRAINING_CORPUS_PATH)); indexWriter.commit(); } catch (IOException e) { e.printStackTrace(); @@ -120,7 +120,7 @@ public class ClassifierTrainingSetIndexer { String name = f.getPath(); String className = null; - for (String d : domains) { + for (String d : DOMAINS) { if (name.contains(d)) { className = d; break; @@ -218,7 +218,7 @@ public class ClassifierTrainingSetIndexer { public static String getCategoryFromFilePath(String path){ String className = null; - for (String d : domains) { + for (String d : DOMAINS) { if (path.contains("/" + d + "/")) { className = d; break; diff --git a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java index 05abe3e..ccd9f63 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java @@ -47,9 +47,10 @@ import org.apache.lucene.store.FSDirectory; import org.json.JSONObject; public class DocClassifier { + + private static final Log LOGGER = LogFactory.getLog(DocClassifier.class); public static final String DOC_CLASSIFIER_KEY = "doc_class"; - public static final String resourceDir = null; - public static final Log logger = LogFactory.getLog(DocClassifier.class); + public static final String RESOURCE_DIR = null; private Map<String, Float> scoredClasses; @@ -57,7 +58,7 @@ public class DocClassifier { protected static IndexReader indexReader = null; protected static IndexSearcher indexSearcher = null; // resource directory plus the index folder - private static final String INDEX_PATH = resourceDir + private static final String INDEX_PATH = RESOURCE_DIR + ClassifierTrainingSetIndexer.INDEX_PATH; // http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm @@ -90,8 +91,8 @@ public class DocClassifier { // for classification // these are categories from the index - public static final String[] categories = new String[] { "legal", "health", - "finance", "computing", "engineering", "business" }; + public static final String[] CATEGORIES = new String[] + { "legal", "health", "finance", "computing", "engineering", "business" }; static { synchronized (DocClassifier.class) { @@ -100,13 +101,13 @@ public class DocClassifier { try { indexDirectory = FSDirectory.open(new File(INDEX_PATH).toPath()); } catch (IOException e2) { - logger.error("problem opening index " + e2); + LOGGER.error("problem opening index " + e2); } try { indexReader = DirectoryReader.open(indexDirectory); indexSearcher = new IndexSearcher(indexReader); } catch (IOException e2) { - logger.error("problem reading index \n" + e2); + LOGGER.error("problem reading index \n" + e2); } } } @@ -141,9 +142,9 @@ public class DocClassifier { hits = indexSearcher .search(query, MAX_DOCS_TO_USE_FOR_CLASSIFY + 2); } catch (IOException e1) { - logger.error("problem searching index \n" + e1); + LOGGER.error("problem searching index \n" + e1); } - logger.debug("Found " + hits.totalHits + " hits for " + queryStr); + LOGGER.debug("Found " + hits.totalHits + " hits for " + queryStr); int count = 0; @@ -152,7 +153,7 @@ public class DocClassifier { try { doc = indexSearcher.doc(scoreDoc.doc); } catch (IOException e) { - logger.error("Problem searching training set for classif \n" + LOGGER.error("Problem searching training set for classif \n" + e); continue; } @@ -164,7 +165,7 @@ public class DocClassifier { else scoredClasses.put(flag, scoreForClass + scoreDoc.score); - logger.debug(" <<categorized as>> " + flag + " | score=" + LOGGER.debug(" <<categorized as>> " + flag + " | score=" + scoreDoc.score + " \n text =" + doc.get("text") + "\n"); if (count > MAX_DOCS_TO_USE_FOR_CLASSIFY) { @@ -180,7 +181,7 @@ public class DocClassifier { if (scoredClasses.get(key) > MIN_TOTAL_SCORE_FOR_CATEGORY) resultsAboveThresh.add(key); else - logger.debug("Too low score of " + scoredClasses.get(key) + LOGGER.debug("Too low score of " + scoredClasses.get(key) + " for category = " + key); } @@ -192,7 +193,7 @@ public class DocClassifier { else results = resultsAboveThresh; } catch (Exception e) { - logger.error("Problem aggregating search results\n" + e); + LOGGER.error("Problem aggregating search results\n" + e); } if (results.size() < 2) return results; @@ -253,7 +254,7 @@ public class DocClassifier { try { indexReader.close(); } catch (IOException e) { - logger.error("Problem closing index \n" + e); + LOGGER.error("Problem closing index \n" + e); } } @@ -280,12 +281,12 @@ public class DocClassifier { classifResults = classifySentence(query); if (classifResults != null && classifResults.size() > 0) { localCats.addAll(classifResults); - logger.debug(sentence + " => " + classifResults); + LOGGER.debug(sentence + " => " + classifResults); } } } catch (Exception e) { - logger.error("Problem classifying sentence\n " + e); + LOGGER.error("Problem classifying sentence\n " + e); } List<String> aggrResults = new ArrayList<>(); @@ -293,9 +294,9 @@ public class DocClassifier { aggrResults = localCats.getFrequentTags(); - logger.debug(localCats.getFrequentTags()); + LOGGER.debug(localCats.getFrequentTags()); } catch (Exception e) { - logger.error("Problem aggregating search results\n" + e); + LOGGER.error("Problem aggregating search results\n" + e); } return aggrResults; } diff --git a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java index 00dc002..99d070f 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java @@ -45,11 +45,8 @@ public class DocClassifierTrainingSetMultilingualExtender { final DocClassifier classifier; private String sourceDir = null, destinationDir = null; //interwiki-fr"><a href="http://fr.wikipedia.org/wiki/Niveau_d%27%C3%A9nergie" title="Niveau d'énergie – French" lang="fr" - private static final String[][] multilingualTokens = new String[][]{ - {"interwiki-fr\"><a href=\"", "lang=\"fr\""}, - {"interwiki-es\"><a href=\"", "lang=\"es\""}, - {"interwiki-de\"><a href=\"", "lang=\"de\""}, - }; + private static final String[][] MULTILINGUAL_TOKENS = new String[][]{{"interwiki-fr\"><a href=\"", "lang=\"fr\""}, + {"interwiki-es\"><a href=\"", "lang=\"es\""}, {"interwiki-de\"><a href=\"", "lang=\"de\""} }; private static final String[] LANGS = new String[]{ "fr", "es", "de"}; @@ -131,7 +128,7 @@ public class DocClassifierTrainingSetMultilingualExtender { System.out.println("processing "+f.getName()); content = FileUtils.readFileToString(f, "utf-8"); int langIndex =0; - for(String[] begEnd: multilingualTokens){ + for(String[] begEnd: MULTILINGUAL_TOKENS){ String urlDirty = StringUtils.substringBetween(content, begEnd[0], begEnd[1]); String url = StringUtils.substringBefore(urlDirty, "\""); diff --git a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java index 928efd5..808788f 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailNormalizer.java @@ -43,7 +43,7 @@ public class EmailNormalizer { } } - public static final String[] headers = new String[] { + static final String[] HEADERS = new String[] { "Message-ID:", "Date:", "From:", @@ -62,12 +62,10 @@ public class EmailNormalizer { "----", }; - public static final String[] prohibitedStrings = new String[] { - "@", "<", ">" - }; + static final String[] PROHIBITED_STRINGS = new String[] {"@", "<", ">"}; public void normalizeAndWriteIntoANewFile(File f){ - String content=""; + String content = ""; try { content = FileUtils.readFileToString(f, StandardCharsets.UTF_8); } catch (IOException e) { @@ -77,12 +75,12 @@ public class EmailNormalizer { StringBuilder buf = new StringBuilder(); for(String l: lines){ boolean bAccept = true; - for(String h: headers){ + for(String h: HEADERS){ if (l.startsWith(h)){ bAccept = false; } } - for(String h: prohibitedStrings){ + for(String h: PROHIBITED_STRINGS){ if (l.indexOf(h)>0){ bAccept = false; } diff --git a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java index 734414e..1a2f89e 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/enron_email_recognizer/EmailTrainingSetFormer.java @@ -25,20 +25,19 @@ import java.util.List; import org.apache.commons.io.FileUtils; public class EmailTrainingSetFormer { - static final String dataDir = "/Users/bgalitsky/Downloads/"; - static final String//enron_with_categories/", - fileListFile = "cats4_11-17.txt"; - static final String destinationDir = "/Users/bgalitsky/Documents/ENRON/data11_17/"; + static final String DATA_DIR = "/Users/bgalitsky/Downloads/"; + static final String FILE_LIST_FILE = "cats4_11-17.txt"; + static final String DESTINATION_DIR = "/Users/bgalitsky/Documents/ENRON/data11_17/"; //enron_with_categories/5/70665.cats:4,10,1 public static void createPosTrainingSet(){ try { - List<String> lines = FileUtils.readLines(new File(dataDir+fileListFile), StandardCharsets.UTF_8); + List<String> lines = FileUtils.readLines(new File(DATA_DIR + FILE_LIST_FILE), StandardCharsets.UTF_8); for(String l: lines){ int endOfFname = l.indexOf('.'), startOfFname = l.lastIndexOf('/'); - String filenameOld =dataDir+ l.substring(0, endOfFname)+".txt"; + String filenameOld = DATA_DIR + l.substring(0, endOfFname)+".txt"; String content = normalize(new File(filenameOld)); - String filenameNew = destinationDir + l.substring(startOfFname+1, endOfFname)+".txt"; + String filenameNew = DESTINATION_DIR + l.substring(startOfFname+1, endOfFname)+".txt"; //FileUtils.copyFile(new File(filenameOld), new File(filenameNew)); FileUtils.writeStringToFile(new File(filenameNew), content, StandardCharsets.UTF_8); } @@ -61,12 +60,12 @@ public class EmailTrainingSetFormer { StringBuilder buf = new StringBuilder(); for(String l: lines){ boolean bAccept = true; - for(String h: EmailNormalizer.headers){ + for(String h: EmailNormalizer.HEADERS){ if (l.startsWith(h)){ bAccept = false; } } - for(String h: EmailNormalizer.prohibitedStrings){ + for(String h: EmailNormalizer.PROHIBITED_STRINGS){ if (l.indexOf(h)>0){ bAccept = false; } diff --git a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java index 424f060..e4beac6 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java @@ -43,7 +43,7 @@ public class NL2Obj { parser = ParserChunker2MatcherProcessor.getInstance(); } - public static final String[] epistemicStatesList = new String[] { + static final String[] EPISTEMIC_STATES_LIST = new String[] { "select", "verify", "find", "start", "stop", "go", "check" }; @@ -156,7 +156,7 @@ public class NL2Obj { } private boolean isControlOp(String methodOrControlOp) { - return Arrays.asList(epistemicStatesList).contains(methodOrControlOp); + return Arrays.asList(EPISTEMIC_STATES_LIST).contains(methodOrControlOp); } protected List<ParseTreeChunk> applyWhichRuleOnVP(List<ParseTreeChunk> actionWithObject) { diff --git a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java index 421a124..0e5811e 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java @@ -26,19 +26,19 @@ import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcess public class NL2ObjCreateAssign extends NL2Obj { private boolean classBeingDefined = false; - public static String[] declarationStatesList = new String[] { + static final String[] DECLARATION_STATES_LIST = new String[] { "create", "assign", "set", }; - public static final String[] dataTypesList = new String[] { + static final String[] DATA_TYPES_LIST = new String[] { "text", "double", "array", }; - public static final String[] arrayElementList = new String[] { + static final String[] ARRAY_ELEMENT_LIST = new String[] { "first", "second", "third", "fourth" }; - public static final String[] arrayElementListInsdex = new String[] { + static final String[] ARRAY_ELEMENT_LIST_INSDEX = new String[] { "0", "1", "2", "3" }; @@ -91,12 +91,12 @@ public class NL2ObjCreateAssign extends NL2Obj { } String dataType = verbChunk.getLemmas().get(1).toLowerCase(); - if (classBeingDefined && Arrays.asList(dataTypesList).contains(dataType) && verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){ + if (classBeingDefined && Arrays.asList(DATA_TYPES_LIST).contains(dataType) && verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){ op.setOperatorFor(dataType + " "+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase()); classBeingDefined = true; break; } - if (Arrays.asList(dataTypesList).contains(dataType) && verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){ + if (Arrays.asList(DATA_TYPES_LIST).contains(dataType) && verbChunk.getLemmas().get(2).toLowerCase().equals("attribute")){ op.setOperatorFor(dataType + " "+verbChunk.getLemmas().get(verbChunk.getLemmas().size()-1).toLowerCase()); classBeingDefined = true; break; @@ -126,9 +126,9 @@ public class NL2ObjCreateAssign extends NL2Obj { numElements = lems.indexOf("object"); if (numElements<0) numElements = lems.indexOf("member"); - if (Arrays.asList(arrayElementList).contains(lems.get(numElements-1))){ - int arrIndex = Arrays.asList(arrayElementList).indexOf(lems.get(numElements-1)); - String indexValue = arrayElementListInsdex[arrIndex]; + if (Arrays.asList(ARRAY_ELEMENT_LIST).contains(lems.get(numElements-1))){ + int arrIndex = Arrays.asList(ARRAY_ELEMENT_LIST).indexOf(lems.get(numElements-1)); + String indexValue = ARRAY_ELEMENT_LIST_INSDEX[arrIndex]; String arrayName = lems.get(lems.size()-1); if (expression!=null) diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java index ca12830..1f1844a 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java @@ -34,7 +34,7 @@ public class WebPageExtractor { protected ParserChunker2MatcherProcessor nlProc; protected final MostFrequentWordsFromPageGetter mostFrequentWordsFromPageGetter = new MostFrequentWordsFromPageGetter(); - protected static final int sentThresholdLength = 70; + protected static final int SENT_THRESHOLD_LENGTH = 70; public List<String[]> extractSentencesWithPotentialProductKeywords(String url) { @@ -111,7 +111,7 @@ public class WebPageExtractor { continue; if (s.indexOf('|')>-1) continue; - if (s == null || s.trim().length() < sentThresholdLength || s.length() < sentThresholdLength + 10) + if (s == null || s.trim().length() < SENT_THRESHOLD_LENGTH || s.length() < SENT_THRESHOLD_LENGTH + 10) continue; if (GeneratedSentenceProcessor.acceptableMinedSentence(s)==null){ // TODO OPENNLP-1454 Candidate for logger.debug(...) if required/helpful diff --git a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaGeneralizer.java b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaGeneralizer.java index d92345e..42590a2 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaGeneralizer.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaGeneralizer.java @@ -25,10 +25,10 @@ import opennlp.tools.stemmer.PStemmer; import opennlp.tools.word2vec.W2VDistanceMeasurer; public class LemmaGeneralizer implements IGeneralizer<String> { - public static final String w2vPrefix = "w2v_"; - final PStemmer ps = new PStemmer(); - String pos = null; - final W2VDistanceMeasurer w2v; + public static final String W2V_PREFIX = "w2v_"; + private final PStemmer ps = new PStemmer(); + private String pos = null; + private final W2VDistanceMeasurer w2v; public LemmaGeneralizer() { w2v = W2VDistanceMeasurer.getInstance(); } @@ -43,52 +43,42 @@ public class LemmaGeneralizer implements IGeneralizer<String> { boolean bEqual = false; String lemma1 = (String)o1, lemma2 = (String)o2; - - - lemma1 = lemma1.toLowerCase(); - lemma2 = lemma2.toLowerCase(); + lemma1 = lemma1.toLowerCase(); + lemma2 = lemma2.toLowerCase(); - if (lemma1.equals(lemma2)) { - bEqual = true; - results.add(lemma1); - return results; - } + if (lemma1.equals(lemma2)) { + bEqual = true; + results.add(lemma1); + return results; + } - if ((lemma1.equals(lemma2 + "s") || lemma2.equals(lemma1 + "s")) - || lemma1.endsWith(lemma2) || lemma2.endsWith(lemma1) - || lemma1.startsWith(lemma2) || lemma2.startsWith(lemma1)) { - bEqual = true; - results.add(lemma1); - return results; - } + if ((lemma1.equals(lemma2 + "s") || lemma2.equals(lemma1 + "s")) + || lemma1.endsWith(lemma2) || lemma2.endsWith(lemma1) + || lemma1.startsWith(lemma2) || lemma2.startsWith(lemma1)) { + bEqual = true; + results.add(lemma1); + return results; + } - try { - if (ps != null) { - if (ps.stem(lemma1).toString() - .equalsIgnoreCase(ps.stem(lemma2).toString())) { - bEqual = true; - results.add(lemma1); - return results; - } + try { + if (ps != null) { + if (ps.stem(lemma1).toString() + .equalsIgnoreCase(ps.stem(lemma2).toString())) { + bEqual = true; + results.add(lemma1); + return results; } - } catch (Exception e) { - System.err.println("Problem processing " + lemma1 + " " + lemma2); - return results; - } - // if different words, then compute word2vec distance and write the value as a string - if (w2v.vec!=null){ - double value = w2v.vec.similarity(lemma1, lemma2); - results.add(w2vPrefix + value); } + } catch (Exception e) { + System.err.println("Problem processing " + lemma1 + " " + lemma2); return results; } - - - - - - - - + // if different words, then compute word2vec distance and write the value as a string + if (w2v.vec!=null){ + double value = w2v.vec.similarity(lemma1, lemma2); + results.add(W2V_PREFIX + value); + } + return results; } +} \ No newline at end of file diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java index f3f49e1..b71d0b2 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java @@ -30,19 +30,18 @@ import opennlp.tools.textsimilarity.ParseTreeChunkListScorer; import opennlp.tools.textsimilarity.SentencePairMatchResult; import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor; -/* - * This class does content generation by using web mining and syntactic generalization to get sentences from the web, convert and combine - * them in the form - * expected to be readable by humans and not distinguishable from genuine content by search engines - * +/** + * This class does content generation by using web mining and syntactic generalization to get sentences + * from the web, convert and combine them in the form expected to be readable by humans and + * not distinguishable from genuine content by search engines. */ - public class ContentGenerator /*extends RelatedSentenceFinder*/ { - final PageFetcher pFetcher = new PageFetcher(); - final ParserChunker2MatcherProcessor sm = ParserChunker2MatcherProcessor.getInstance(); - protected final ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer(); - protected final ParseTreeChunk parseTreeChunk = new ParseTreeChunk(); - protected static final StringDistanceMeasurer stringDistanceMeasurer = new StringDistanceMeasurer(); + + private final PageFetcher pFetcher = new PageFetcher(); + private final ParserChunker2MatcherProcessor sm = ParserChunker2MatcherProcessor.getInstance(); + private final ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer(); + private final ParseTreeChunk parseTreeChunk = new ParseTreeChunk(); + private static final StringDistanceMeasurer STRING_DISTANCE_MEASURER = new StringDistanceMeasurer(); protected final BingQueryRunner yrunner = new BingQueryRunner(); protected final ContentGeneratorSupport support = new ContentGeneratorSupport(); protected int MAX_STEPS = 1; @@ -84,7 +83,7 @@ public class ContentGenerator /*extends RelatedSentenceFinder*/ { System.out.println(" \n=== Entity to write about = " + sentence); int stepCount=0; - for (String verbAddition : StoryDiscourseNavigator.frequentPerformingVerbs) { + for (String verbAddition : StoryDiscourseNavigator.FREQUENT_PERFORMING_VERBS) { List<HitBase> searchResult = yrunner.runSearch(sentence + " " + verbAddition, MAX_SEARCH_RESULTS); //100); if (MAX_SEARCH_RESULTS<searchResult.size()) @@ -323,7 +322,7 @@ public class ContentGenerator /*extends RelatedSentenceFinder*/ { } } - measScore = stringDistanceMeasurer.measureStringDistance(originalSentence, pageSentence); + measScore = STRING_DISTANCE_MEASURER.measureStringDistance(originalSentence, pageSentence); if ((syntScore > RELEVANCE_THRESHOLD || measScore > 0.5) diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java index d9abf51..e5384c0 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java @@ -26,7 +26,7 @@ import opennlp.tools.similarity.apps.utils.Utils; public class GeneratedSentenceProcessor { - public static final String[] occurs = new String[]{ "click here", "wikipedia", "retrieved", "isbn", + private static final String[] OCCURS = new String[]{ "click here", "wikipedia", "retrieved", "isbn", "http", "www.", "copyright", "advertise", "(accessed", "[edit]", "[citation needed]", "site map", "email updates", "contact us", "rss feeds", "cite this site", @@ -67,7 +67,7 @@ public class GeneratedSentenceProcessor { "menu.", "search.", "sign in", "home.", "additional terms", "may apply"}; - public static final String[] occursStartsWith = new String[]{ + private static final String[] OCCURS_STARTS_WITH = new String[]{ "fax", "write","email", "contact", "conditions", "chat live", "we ", "the recipient", "day return", "days return", "refund it", "your money", @@ -75,6 +75,7 @@ public class GeneratedSentenceProcessor { "exchange it ", "return it", "day return", "days return", "subscribe","posted by", "below" , "corporate", "this book"}; + public static String acceptableMinedSentence(String sent) { if (sent==null || sent.length()<40) return null; @@ -197,14 +198,14 @@ public class GeneratedSentenceProcessor { } public static boolean isProhibitiveWordsOccurOrStartWith(String sentenceLowercase){ - for(String o: occurs){ + for(String o: OCCURS){ if (sentenceLowercase.contains(o)){ //System.out.println("Found prohibited occurrence "+ o +" \n in sentence = "+ sentenceLowercase); return true; } } - for(String o: occursStartsWith){ + for(String o: OCCURS_STARTS_WITH){ if (sentenceLowercase.startsWith(o)){ //System.out.println("Found prohibited occurrence Start With "+ o +" \n in sentence = "+ sentenceLowercase); return true; diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java index a6941ee..45bcbdb 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java @@ -52,7 +52,7 @@ public class RelatedSentenceFinder { final ParserChunker2MatcherProcessor sm = ParserChunker2MatcherProcessor.getInstance(); protected final ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer(); protected final ParseTreeChunk parseTreeChunk = new ParseTreeChunk(); - protected static final StringDistanceMeasurer stringDistanceMeasurer = new StringDistanceMeasurer(); + protected static final StringDistanceMeasurer STRING_DISTANCE_MEASURER = new StringDistanceMeasurer(); protected final BingQueryRunner yrunner = new BingQueryRunner(); protected int MAX_STEPS = 1; protected int MAX_SEARCH_RESULTS = 1; @@ -61,11 +61,11 @@ public class RelatedSentenceFinder { // used to indicate that a sentence is an opinion, so more appropriate static final List<String> MENTAL_VERBS = new ArrayList<>( - Arrays.asList(new String[] { "want", "know", "believe", "appeal", "ask", - "accept", "agree", "allow", "appeal", "ask", "assume", "believe", - "check", "confirm", "convince", "deny", "disagree", "explain", - "ignore", "inform", "remind", "request", "suggest", "suppose", - "think", "threaten", "try", "understand" })); + Arrays.asList("want", "know", "believe", "appeal", "ask", + "accept", "agree", "allow", "appeal", "ask", "assume", "believe", + "check", "confirm", "convince", "deny", "disagree", "explain", + "ignore", "inform", "remind", "request", "suggest", "suppose", + "think", "threaten", "try", "understand")); private static final int MAX_FRAGMENT_SENTS = 10; @@ -137,7 +137,7 @@ public class RelatedSentenceFinder { String[] extraKeywords = new StoryDiscourseNavigator().obtainAdditionalKeywordsForAnEntity(sentence); System.out.println("Found extraKeywords "+ Arrays.asList(extraKeywords)); if (extraKeywords==null || extraKeywords.length<1) - extraKeywords = StoryDiscourseNavigator.frequentPerformingVerbs; + extraKeywords = StoryDiscourseNavigator.FREQUENT_PERFORMING_VERBS; int stepCount=0; for (String verbAddition : extraKeywords) { @@ -492,7 +492,7 @@ public class RelatedSentenceFinder { } } - measScore = stringDistanceMeasurer.measureStringDistance( + measScore = STRING_DISTANCE_MEASURER.measureStringDistance( originalSentence, pageSentence); @@ -863,7 +863,7 @@ public class RelatedSentenceFinder { e.printStackTrace(); } - measScore = stringDistanceMeasurer.measureStringDistance( + measScore = STRING_DISTANCE_MEASURER.measureStringDistance( originalSentence, pageSentence); diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java index a47c057..a075bc2 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java @@ -186,7 +186,7 @@ public class RelatedSentenceFinderML extends RelatedSentenceFinder{ } } - measScore = stringDistanceMeasurer.measureStringDistance( + measScore = STRING_DISTANCE_MEASURER.measureStringDistance( originalSentence, pageSentence); // now possibly increase score by finding mental verbs diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java index b24dc45..77777d8 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java @@ -40,7 +40,7 @@ public class StoryDiscourseNavigator { private final PStemmer ps = new PStemmer(); final PageFetcher pFetcher = new PageFetcher(); - public static final String[] frequentPerformingVerbs = { + public static final String[] FREQUENT_PERFORMING_VERBS = { " born raised meet learn ", " graduated enter discover", " facts inventions life ", "accomplishments childhood timeline", " acquire befriend encounter", " achieve reache describe ", diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/YahooAnswersMiner.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/YahooAnswersMiner.java index 9fe7a70..b3274dc 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/YahooAnswersMiner.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/YahooAnswersMiner.java @@ -20,21 +20,21 @@ package opennlp.tools.similarity.apps; import java.util.ArrayList; import java.util.List; -import opennlp.tools.jsmlearning.ProfileReaderWriter; -import opennlp.tools.parse_thicket.Triple; - import net.billylieurance.azuresearch.AzureSearchResultSet; import net.billylieurance.azuresearch.AzureSearchWebResult; +import opennlp.tools.jsmlearning.ProfileReaderWriter; +import opennlp.tools.parse_thicket.Triple; + public class YahooAnswersMiner extends BingQueryRunner{ private int page = 0; - private static final int hitsPerPage = 50; + private static final int HITS_PER_PAGE = 50; public List<HitBase> runSearch(String query) { aq.setAppid(BING_KEY); aq.setQuery("site:answers.yahoo.com "+ query); - aq.setPerPage(hitsPerPage); + aq.setPerPage(HITS_PER_PAGE); aq.setPage(page); aq.doQuery(); @@ -56,7 +56,7 @@ public class YahooAnswersMiner extends BingQueryRunner{ public List<HitBase> runSearch(String query, int totalPages) { int count=0; List<HitBase> results = new ArrayList<>(); - while(totalPages>page*hitsPerPage){ + while(totalPages>page* HITS_PER_PAGE){ List<HitBase> res = runSearch(query); results.addAll(res); if (count>10) diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java index b719e70..c427366 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java @@ -36,7 +36,7 @@ import org.apache.solr.search.QueryParsing; public class IterativeQueryComponent extends QueryComponent{ public static final String COMPONENT_NAME = "iterative_query"; - public static final String[] fieldSequence = new String[]{"cat", "name", "content", "author"}; + private static final String[] FIELD_SEQUENCE = new String[]{"cat", "name", "content", "author"}; /** * Run the query multiple times against various fields, trying to recognize search intention @@ -47,10 +47,10 @@ public class IterativeQueryComponent extends QueryComponent{ NamedList<Object> nameValuePairs = rb.rsp.getValues(); nameValuePairs.remove("response"); rb.rsp.setAllValues(nameValuePairs); - rb = substituteField(rb, fieldSequence[0] ); + rb = substituteField(rb, FIELD_SEQUENCE[0] ); super.process(rb); - for(int iter = 1; iter<fieldSequence.length; iter++){ + for(int iter = 1; iter< FIELD_SEQUENCE.length; iter++){ nameValuePairs = rb.rsp.getValues(); ResultContext c = (ResultContext) nameValuePairs.get("response"); if (c!=null){ @@ -58,7 +58,7 @@ public class IterativeQueryComponent extends QueryComponent{ if (dList.size()<1){ nameValuePairs.remove("response"); rb.rsp.setAllValues(nameValuePairs); - rb = substituteField(rb, fieldSequence[iter] ); + rb = substituteField(rb, FIELD_SEQUENCE[iter] ); super.process(rb); } diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/NLProgram2CodeRequestHandler.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/NLProgram2CodeRequestHandler.java index 0ed7350..7a1d40e 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/NLProgram2CodeRequestHandler.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/NLProgram2CodeRequestHandler.java @@ -35,11 +35,10 @@ public class NLProgram2CodeRequestHandler extends SearchHandler { private final static int MAX_SEARCH_RESULTS = 100; private final ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer(); private final int MAX_QUERY_LENGTH_NOT_TO_RERANK = 3; - private static final String resourceDir = //"/home/solr/solr-4.4.0/example/src/test/resources"; - "C:/workspace/TestSolr/src/test/resources"; + private static final String RESOURCES = "C:/workspace/TestSolr/src/test/resources"; //"/data1/solr/example/src/test/resources"; - final NL2Obj compiler = new NL2ObjCreateAssign(resourceDir); + final NL2Obj compiler = new NL2ObjCreateAssign(RESOURCES); public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp){ // get query string diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerRequestHandler.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerRequestHandler.java index b059d27..608731e 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerRequestHandler.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerRequestHandler.java @@ -41,9 +41,8 @@ public class SearchResultsReRankerRequestHandler extends SearchHandler { private final static int MAX_SEARCH_RESULTS = 100; private final ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer(); private ParserChunker2MatcherProcessor sm = null; - private static final String resourceDir = "/home/solr/solr-4.4.0/example/src/test/resources"; + private static final String RESOURCE_DIR = "/home/solr/solr-4.4.0/example/src/test/resources"; //"C:/workspace/TestSolr/src/test/resources"; - //"/data1/solr/example/src/test/resources"; public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp){ @@ -169,7 +168,7 @@ public class SearchResultsReRankerRequestHandler extends SearchHandler { private List<HitBase> calculateMatchScoreResortHits(List<HitBase> hits, String searchQuery) { try { - sm = ParserChunker2MatcherProcessor.getInstance(resourceDir); + sm = ParserChunker2MatcherProcessor.getInstance(RESOURCE_DIR); } catch (Exception e){ LOG.severe(e.getMessage()); } diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java index 9f6a11f..f7cb34b 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/DomainTaxonomyExtender.java @@ -21,7 +21,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.logging.Logger; import org.apache.commons.lang.StringUtils; @@ -34,14 +33,7 @@ import opennlp.tools.textsimilarity.SentencePairMatchResult; import opennlp.tools.textsimilarity.TextProcessor; import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor; -/** - - * - */ - public class DomainTaxonomyExtender { - private static final Logger LOG = Logger - .getLogger("opennlp.tools.similarity.apps.taxo_builder.DomainTaxonomyExtender"); private final BingQueryRunner brunner = new BingQueryRunner(); private final ParserChunker2MatcherProcessor matcher = ParserChunker2MatcherProcessor.getInstance(); @@ -161,7 +153,7 @@ public class DomainTaxonomyExtender { String snapshot2 = StringCleaner.processSnapshotForMatching(h2 .getTitle() + " " + h2.getAbstractText()); SentencePairMatchResult overlaps = matcher.assessRelevance(snapshot1, snapshot2); - genResult.addAll(overlaps.matchResult); + genResult.addAll(overlaps.getMatchResult()); } } } diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java index efb2687..1be923e 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxoQuerySnapshotMatcher.java @@ -22,13 +22,10 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.logging.Logger; import opennlp.tools.textsimilarity.TextProcessor; import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor; -//import com.thoughtworks.xstream.XStream; - /** * This class can be used to generate scores based on the overlapping between a * text and a given taxonomy. @@ -40,8 +37,6 @@ public class TaxoQuerySnapshotMatcher { // XStream xStream= new XStream(); Map<String, List<List<String>>> lemma_ExtendedAssocWords; final TaxonomySerializer taxo; - private static final Logger LOG = Logger - .getLogger("opennlp.tools.similarity.apps.taxo_builder.TaxoQuerySnapshotMatcher"); public TaxoQuerySnapshotMatcher(String taxoFileName) { sm = ParserChunker2MatcherProcessor.getInstance(); diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java index 64a65de..2f53a7d 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java @@ -21,7 +21,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.logging.Logger; import opennlp.tools.similarity.apps.BingQueryRunner; import opennlp.tools.similarity.apps.HitBase; @@ -42,8 +41,7 @@ import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcess */ public class TaxonomyExtenderViaMebMining extends BingQueryRunner { - private static final Logger LOG = Logger - .getLogger("opennlp.tools.similarity.apps.taxo_builder.TaxonomyExtenderSearchResultFromYahoo"); + private final ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer(); ParserChunker2MatcherProcessor sm; diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java index 67c65ff..21bdafb 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/FileHandler.java @@ -23,7 +23,6 @@ import java.io.ByteArrayOutputStream; import java.io.EOFException; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java index 1355d8c..0d63e70 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java @@ -33,7 +33,7 @@ import org.apache.tika.parser.Parser; import org.apache.tika.sax.BodyContentHandler; public class PageFetcher { - private static final Logger log = Logger.getLogger("opennlp.tools.similarity.apps.utils.PageFetcher"); + private static final Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.utils.PageFetcher"); private final Tika tika = new Tika(); private static int DEFAULT_TIMEOUT = 1500; @@ -63,7 +63,7 @@ public class PageFetcher { pageContent = handler.toString(); } catch (Exception e) { - log.severe(e.getMessage() + "\n" + e); + LOG.severe(e.getMessage() + "\n" + e); } return pageContent; } @@ -79,7 +79,7 @@ public class PageFetcher { pageContent = tika.parseToString(connection.getInputStream()) .replace('\n', ' ').replace('\t', ' '); } catch (IOException | TikaException e) { - log.severe(e.getMessage() + "\n" + e); + LOG.severe(e.getMessage() + "\n" + e); } return pageContent; } @@ -97,7 +97,7 @@ public class PageFetcher { } public String fetchOrigHTML(String url) { - log.info("fetch url " + url); + LOG.info("fetch url " + url); StringBuilder buf = new StringBuilder(); try { URLConnection connection = new URL(url).openConnection(); @@ -113,7 +113,7 @@ public class PageFetcher { connection.getInputStream())); } catch (Exception e) { // we don't always need to log trial web pages if access fails - log.severe(e.toString()); + LOG.severe(e.toString()); } while ((line = reader.readLine()) != null) { diff --git a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java index 23198fc..bae6357 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/Utils.java @@ -40,155 +40,155 @@ public class Utils { private static final Logger LOG = Logger .getLogger("opennlp.tools.similarity.apps.utils.Utils"); - protected static final ArrayList<String[]> characterMappings = new ArrayList<>(); + protected static final ArrayList<String[]> CHARACTER_MAPPINGS = new ArrayList<>(); static { - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[àáâãäå�?ăą�°]", " " }); // was a - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[À�?ÂÃÄÅĀĂĄ�?]", "A" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[çćĉċ�?]", "c" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ÇĆĈĊČ]", "C" }); - characterMappings.add(new String[] { + CHARACTER_MAPPINGS.add(new String[] { "[�?Ä‘]", "d" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[�?�?]", "D" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[èéêëæęēĕÄâ€â€�ęě]", " " }); // was e - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ÈÉÊËÆĒ�ĖĘĚ]", "'" }); // was E - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[�?ğġģ]", "g" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ĜĞĠĢƓ]", "G" }); - characterMappings.add(new String[] { + CHARACTER_MAPPINGS.add(new String[] { "[ĥħ]", "h" }); - characterMappings.add(new String[] { + CHARACTER_MAPPINGS.add(new String[] { "[ĤĦ]", "H" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ìÃÂÂîïĩīÄÂÂĮįıijĵ]", "i" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[�?�?ĨĪĬİIJĴĵ]", "I" }); - characterMappings.add(new String[] { + CHARACTER_MAPPINGS.add(new String[] { "[ķĸ]", "k" }); - characterMappings.add(new String[] { "[Ķ]", "K" }); - characterMappings + CHARACTER_MAPPINGS.add(new String[] { "[Ķ]", "K" }); + CHARACTER_MAPPINGS .add(new String[] { "[øőðòóôõö�?�?őœơ]", "o" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ÒÓ�ÕÖØŌŎ�?Å’Æ ]", "O" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ñńņňʼnŋ]", "n" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ÑŃŅŇŊŋ]", "N" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ĺļľŀł]", "l" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ĹĻĽĿ�?]", "L" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ùúûüũūÅÂÂůűųư]", "u" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ÙÚÛÜŨŪŬŮŰŲƯ]", "U" }); - characterMappings.add(new String[] { + CHARACTER_MAPPINGS.add(new String[] { "[ýÿŷ]", "y" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[�?ŶŸ]", "Y" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[Å•Åâ€â€�Ã…â„¢]", "r" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[�ŖŘ]", "R" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[šś�?şšſ]", "s" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ŠŚŜŞŠſ]", "S" }); - characterMappings.add(new String[] { "ß", "ss" }); - characterMappings.add(new String[] { "Þ", "th" }); - characterMappings.add(new String[] { "þ", "Th" }); - characterMappings + CHARACTER_MAPPINGS.add(new String[] { "ß", "ss" }); + CHARACTER_MAPPINGS.add(new String[] { "Þ", "th" }); + CHARACTER_MAPPINGS.add(new String[] { "þ", "Th" }); + CHARACTER_MAPPINGS .add(new String[] { "[ţťŧ]", "t" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ŢŤŦ]", "T" }); - characterMappings.add(new String[] { "[ŵ]", "w" }); - characterMappings.add(new String[] { "[Å´]", "W" }); - characterMappings + CHARACTER_MAPPINGS.add(new String[] { "[ŵ]", "w" }); + CHARACTER_MAPPINGS.add(new String[] { "[Å´]", "W" }); + CHARACTER_MAPPINGS .add(new String[] { "[žźżžƶ]", "z" }); - characterMappings + CHARACTER_MAPPINGS .add(new String[] { "[ŽŽŹŻŽƵ]", "Z" }); - characterMappings.add(new String[] { + CHARACTER_MAPPINGS.add(new String[] { "[’]", "'" }); - characterMappings.add(new String[] { + CHARACTER_MAPPINGS.add(new String[] { "[–]", "'" }); - characterMappings.add(new String[] { "'", "'" }); - characterMappings.add(new String[] { "Âe", "«" }); - characterMappings.add(new String[] { "'AG", "“" }); - characterMappings.add(new String[] { "A�", " " }); - characterMappings.add(new String[] { """, "\"" }); - characterMappings.add(new String[] { "&", "&" }); - characterMappings.add(new String[] { " ", " " }); - characterMappings.add(new String[] { + CHARACTER_MAPPINGS.add(new String[] { "'", "'" }); + CHARACTER_MAPPINGS.add(new String[] { "Âe", "«" }); + CHARACTER_MAPPINGS.add(new String[] { "'AG", "“" }); + CHARACTER_MAPPINGS.add(new String[] { "A�", " " }); + CHARACTER_MAPPINGS.add(new String[] { """, "\"" }); + CHARACTER_MAPPINGS.add(new String[] { "&", "&" }); + CHARACTER_MAPPINGS.add(new String[] { " ", " " }); + CHARACTER_MAPPINGS.add(new String[] { "", " " }); - characterMappings.add(new String[] { "â„¢", + CHARACTER_MAPPINGS.add(new String[] { "â„¢", " " }); - characterMappings.add(new String[] { + CHARACTER_MAPPINGS.add(new String[] { "�", "" }); - characterMappings.add(new String[] { "’", "'" }); + CHARACTER_MAPPINGS.add(new String[] { "’", "'" }); } public static String stripNonAsciiChars(String s) { @@ -211,7 +211,7 @@ public class Utils { s = s.replaceAll("’", "__apostrophe__"); String tmp = s; if (tmp != null) { - for (String[] mapping : characterMappings) { + for (String[] mapping : CHARACTER_MAPPINGS) { tmp = tmp.replaceAll(mapping[0], mapping[1]); } } diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorer.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorer.java index 5cc4c0a..1ebc613 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorer.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorer.java @@ -76,9 +76,9 @@ public class ParseTreeChunkListScorer { } else { score += 0.1; } - } else if (l.startsWith(LemmaGeneralizer.w2vPrefix) ){ + } else if (l.startsWith(LemmaGeneralizer.W2V_PREFIX) ){ try { - float val = Float.parseFloat(l.substring(LemmaGeneralizer.w2vPrefix.length())); + float val = Float.parseFloat(l.substring(LemmaGeneralizer.W2V_PREFIX.length())); score+= 1- val; } catch (NumberFormatException e) { e.printStackTrace(); diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java index bcf87d7..2a74997 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/SentencePairMatchResult.java @@ -18,14 +18,11 @@ package opennlp.tools.textsimilarity; import java.util.List; -import java.util.logging.Logger; import org.apache.commons.lang.StringUtils; public class SentencePairMatchResult { - public List<List<ParseTreeChunk>> matchResult; - private static final Logger LOG = Logger - .getLogger("opennlp.tools.textsimilarity.SentencePairMatchResult"); + private List<List<ParseTreeChunk>> matchResult; public List<List<ParseTreeChunk>> getMatchResult() { return matchResult; diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java index 5dfdf1a..75d707e 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java @@ -42,7 +42,7 @@ public class TextProcessor { private static final Logger LOG = Logger .getLogger("opennlp.tools.textsimilarity.TextProcessor"); - static final String[] abbrevs = { "mr.", "mrs.", "sen.", "rep.", "gov.", + static final String[] ABBREVS = { "mr.", "mrs.", "sen.", "rep.", "gov.", "miss.", "dr.", "oct.", "nov.", "jan.", "feb.", "mar.", "apr.", "may", "jun.", "jul.", "aug.", "sept." }; @@ -225,7 +225,7 @@ public class TextProcessor { cand += " " + text.substring(idx, m.end() - 1).trim(); boolean hasAbbrev = false; - for (String abbrev : abbrevs) { + for (String abbrev : ABBREVS) { if (cand.toLowerCase().endsWith(abbrev)) { hasAbbrev = true; break; diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java index b1eda2a..f1b4ea2 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserCacheSerializer.java @@ -53,14 +53,14 @@ import au.com.bytecode.opencsv.CSVWriter; public class ParserCacheSerializer { private static final Logger LOG = Logger .getLogger("opennlp.tools.textsimilarity.chunker2matcher.ParserCacheSerializer"); - private static final boolean javaObjectSerialization = false; + private static final boolean JAVA_OBJECT_SERIALIZATION = false; private static final String RESOURCE_DIR = "src/test/resources/"; - public static final String parseCacheFileName = "sentence_parseObject.dat"; - public static final String parseCacheFileNameCSV = "sentence_parseObject.csv"; + private static final String PARSE_CACHE_FILE_NAME = "sentence_parseObject.dat"; + private static final String PARSE_CACHE_FILE_NAME_CSV = "sentence_parseObject.csv"; public static void writeObject(Object objectToSerialize) { - if (javaObjectSerialization) { - String filename = RESOURCE_DIR + parseCacheFileName; + if (JAVA_OBJECT_SERIALIZATION) { + String filename = RESOURCE_DIR + PARSE_CACHE_FILE_NAME; try(FileOutputStream fos = new FileOutputStream(filename); ObjectOutputStream out = new ObjectOutputStream(fos)) { @@ -73,7 +73,7 @@ public class ParserCacheSerializer { Map<String, String[][]> sentence_parseObject = (Map<String, String[][]>) objectToSerialize; List<String> keys = new ArrayList<>(sentence_parseObject.keySet()); try (CSVWriter writer = new CSVWriter( - new FileWriter(RESOURCE_DIR + parseCacheFileNameCSV, false))) { + new FileWriter(RESOURCE_DIR + PARSE_CACHE_FILE_NAME_CSV, false))) { for (String k : keys) { String[][] triplet = sentence_parseObject.get(k); writer.writeNext(new String[] { k }); @@ -89,8 +89,8 @@ public class ParserCacheSerializer { } public static Object readObject() { - if (javaObjectSerialization) { - String filename = RESOURCE_DIR + parseCacheFileName; + if (JAVA_OBJECT_SERIALIZATION) { + String filename = RESOURCE_DIR + PARSE_CACHE_FILE_NAME; Object data = null; try (FileInputStream fis = new FileInputStream(filename); ObjectInputStream in = new ObjectInputStream(fis)) { @@ -106,10 +106,10 @@ public class ParserCacheSerializer { List<String[]> lines; try (CSVReader reader = new CSVReader(new FileReader(RESOURCE_DIR - + parseCacheFileNameCSV), ',')) { + + PARSE_CACHE_FILE_NAME_CSV), ',')) { lines = reader.readAll(); } catch (FileNotFoundException e) { - if (javaObjectSerialization) + if (JAVA_OBJECT_SERIALIZATION) System.err.println("Cannot find cache file"); return null; } catch (IOException ioe) { diff --git a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java index 3659ad1..e2bb275 100644 --- a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java +++ b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java @@ -54,7 +54,8 @@ import opennlp.tools.tokenize.TokenizerModel; import opennlp.tools.util.Span; public class ParserChunker2MatcherProcessor { - protected static final int MIN_SENTENCE_LENGTH = 10; + + static final int MIN_SENTENCE_LENGTH = 10; private static final String MODEL_DIR_KEY = "nlp.models.dir"; // TODO config // this is where resources should live @@ -67,7 +68,7 @@ public class ParserChunker2MatcherProcessor { private POSTagger posTagger; private Parser parser; private ChunkerME chunker; - private final int NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS = 5; + private static final int NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS = 5; private static final Logger LOG = Logger.getLogger("opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor"); private Map<String, String[][]> sentence_parseObject; diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java index c28b17d..9d202a2 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java @@ -81,7 +81,7 @@ public class FeaturesExtractor { for (int i = 0; i < wordToDisambiguate.getSentence().length; i++) { if (wordToDisambiguate.getLemmas() != null) { - if (!WSDHelper.stopWords.contains(wordToDisambiguate.getSentence()[i] + if (!WSDHelper.STOP_WORDS.contains(wordToDisambiguate.getSentence()[i] .toLowerCase()) && (wordToDisambiguate.getWordIndex() != i)) { String lemma = wordToDisambiguate.getLemmas()[i].toLowerCase() diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java index a54cf2e..7cc7015 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java @@ -50,7 +50,7 @@ public class IMSWSDContextGenerator implements WSDContextGenerator { for (int i = 0; i < toks.length; i++) { if (lemmas != null) { - if (!WSDHelper.stopWords.contains(toks[i].toLowerCase()) && (index + if (!WSDHelper.STOP_WORDS.contains(toks[i].toLowerCase()) && (index != i)) { String lemma = lemmas[i].toLowerCase().replaceAll("[^a-z_]", "") diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java index d37162c..8c52c9d 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java @@ -41,7 +41,7 @@ public class OSCCWSDContextGenerator implements WSDContextGenerator { for (int i = 0; i < toks.length; i++) { if (lemmas != null) { - if (!WSDHelper.stopWords.contains(toks[i].toLowerCase()) && (index + if (!WSDHelper.STOP_WORDS.contains(toks[i].toLowerCase()) && (index != i)) { String lemma = lemmas[i].toLowerCase().replaceAll("[^a-z_]", "") diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java index da1cd3e..2dbf7d7 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java @@ -35,6 +35,7 @@ import net.sf.extjwnl.JWNLException; import net.sf.extjwnl.data.POS; import net.sf.extjwnl.dictionary.Dictionary; import net.sf.extjwnl.dictionary.MorphologicalProcessor; + import opennlp.tools.cmdline.postag.POSModelLoader; import opennlp.tools.lemmatizer.DictionaryLemmatizer; import opennlp.tools.postag.POSTaggerME; @@ -62,22 +63,21 @@ public class WSDHelper { private static Map<String, Object> nonRelevWordsDef; // List of all the PoS tags - public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ", + public static final String[] ALL_POS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ", "JJR", "JJS", "LS", "MD", "NN", "NNS", "NNP", "NNPS", "PDT", "POS", "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB" }; // List of the PoS tags of which the senses are to be extracted - public static final String[] relevantPOS = { "JJ", "JJR", "JJS", "NN", "NNS", "RB", + public static final String[] RELEVANT_POS = { "JJ", "JJR", "JJS", "NN", "NNS", "RB", "RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" }; // List of Negation Words - public static List<String> negationWords = new ArrayList<>( - Arrays.asList("not", "no", "never", "none", "nor", "non")); + public static final List<String> NEGATION_WORDS = Arrays.asList("not", "no", "never", "none", "nor", "non"); // List of Stop Words - public static final List<String> stopWords = new ArrayList<>( - Arrays.asList("a", "able", "about", "above", "according", "accordingly", + public static final List<String> STOP_WORDS = Arrays.asList( + "a", "able", "about", "above", "according", "accordingly", "across", "actually", "after", "afterwards", "again", "against", "ain't", "all", "allow", "allows", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", @@ -155,12 +155,12 @@ public class WSDHelper { "who", "whoever", "whole", "whom", "who's", "whose", "why", "will", "willing", "wish", "with", "within", "without", "wonder", "won't", "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll", "your", - "you're", "yours", "yourself", "yourselves", "you've", "zero")); + "you're", "yours", "yourself", "yourselves", "you've", "zero"); public static Map<String, Object> getRelvCache() { if (relvCache == null || relvCache.keySet().isEmpty()) { relvCache = new HashMap<>(); - for (String t : relevantPOS) { + for (String t : RELEVANT_POS) { relvCache.put(t, null); } } @@ -170,7 +170,7 @@ public class WSDHelper { public static Map<String, Object> getStopCache() { if (stopCache == null || stopCache.keySet().isEmpty()) { stopCache = new HashMap<>(); - for (String s : stopWords) { + for (String s : STOP_WORDS) { stopCache.put(s, null); } } diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java index ad17dc7..d8667d2 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java @@ -27,7 +27,7 @@ import opennlp.tools.util.ObjectStream; public class WSDSampleStream extends FilterObjectStream<String, WSDSample> { - private static final Logger logger = Logger.getLogger(WSDSampleStream.class.getName()); + private static final Logger LOG = Logger.getLogger(WSDSampleStream.class.getName()); /** * Initializes the current instance. @@ -60,8 +60,8 @@ public class WSDSampleStream extends FilterObjectStream<String, WSDSample> { sample = WSDSample.parse(sentence); } catch (InvalidFormatException e) { - if (logger.isLoggable(Level.WARNING)) { - logger + if (LOG.isLoggable(Level.WARNING)) { + LOG .warning("Error during parsing, ignoring sentence: " + sentence); } diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java index 9bf8d7f..c8aa549 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java @@ -36,7 +36,7 @@ public class WSDisambiguatorME extends WSDisambiguator { protected WSDModel model; - protected static final WSDContextGenerator cg = new IMSWSDContextGenerator(); + protected static final WSDContextGenerator CONTEXT_GENERATOR = new IMSWSDContextGenerator(); public WSDisambiguatorME(WSDParameters params) { this.params = params; @@ -78,7 +78,7 @@ public class WSDisambiguatorME extends WSDisambiguator { wordTag = sample.getTargetWordTag(); do { String sense = sample.getSenseIDs()[0]; - String[] context = cg + String[] context = CONTEXT_GENERATOR .getContext(sample, ((WSDDefaultParameters) params).ngram, ((WSDDefaultParameters) params).windowSize, surroundingContext); Event ev = new Event(sense + "", context); @@ -136,7 +136,7 @@ public class WSDisambiguatorME extends WSDisambiguator { String outcome; - String[] context = cg + String[] context = CONTEXT_GENERATOR .getContext(sample, ((WSDDefaultParameters) this.params).ngram, ((WSDDefaultParameters) this.params).windowSize, this.model.getContextEntries()); @@ -162,7 +162,7 @@ public class WSDisambiguatorME extends WSDisambiguator { } else { String outcome; - String[] context = cg + String[] context = CONTEXT_GENERATOR .getContext(sample, ((WSDDefaultParameters) this.params).ngram, ((WSDDefaultParameters) this.params).windowSize, this.model.getContextEntries()); diff --git a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java index fca0d07..524b420 100644 --- a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java +++ b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java @@ -46,7 +46,7 @@ import edu.mit.jwi.RAMDictionary; public class WordRelationshipDetermination { private final IDictionary dictionary; - private static final String dictionaryFile = "/wordnet/dict"; + private static final String DICTIONARY_FILE = "/wordnet/dict"; private static final int MAX_DIST_MED_REL = 1000; private final Pointer[] rels = {Pointer.ANTONYM, Pointer.HYPERNYM, Pointer.HYPONYM, Pointer.MERONYM_PART, @@ -55,7 +55,7 @@ public class WordRelationshipDetermination { private final Hashtable<ISynset, List<IWord>> synsetWordCache = new Hashtable<>(); public WordRelationshipDetermination() throws Exception { - dictionary = new RAMDictionary(WordRelationshipDetermination.class.getResource(dictionaryFile), ILoadPolicy.IMMEDIATE_LOAD); + dictionary = new RAMDictionary(WordRelationshipDetermination.class.getResource(DICTIONARY_FILE), ILoadPolicy.IMMEDIATE_LOAD); ((RAMDictionary)dictionary).load(); openDict(); } diff --git a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java index 6b23dd9..e6eca05 100644 --- a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java +++ b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java @@ -42,11 +42,11 @@ public class MetaSummarizer { private final DocProcessor dp; private final TextRankSummarizer textRank; private final LexicalChainingSummarizer lcs; - private static final String sentFragModel = "/en-sent.bin"; + private static final String SENT_FRAG_MODEL = "/en-sent.bin"; public MetaSummarizer(String posModelFile) throws Exception { Logger.getAnonymousLogger().info("Initializing Meta Summarizer"); - dp = new DefaultDocProcessor(MetaSummarizer.class.getResourceAsStream(sentFragModel)); + dp = new DefaultDocProcessor(MetaSummarizer.class.getResourceAsStream(SENT_FRAG_MODEL)); textRank = new TextRankSummarizer(); lcs = new LexicalChainingSummarizer(dp, new FileInputStream(posModelFile)); } diff --git a/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java b/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java index 7825e58..c54f76e 100755 --- a/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java +++ b/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java @@ -142,7 +142,6 @@ public class DefaultDocProcessor implements DocProcessor { //List of sentences form a document public List<Sentence> docToSentList(String fileName) { List<Sentence> sentList = new ArrayList<>(); - StringBuilder docBuffer = new StringBuilder(); try (LineNumberReader lnr = new LineNumberReader(new FileReader(fileName))) { String nextLine; @@ -168,7 +167,6 @@ public class DefaultDocProcessor implements DocProcessor { } } - String doc = docBuffer.toString(); } catch (Exception ex) { Logger.getLogger(DefaultDocProcessor.class.getName()).log(Level.SEVERE, null, ex); } diff --git a/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java b/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java index b068637..b6072eb 100755 --- a/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java +++ b/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java @@ -53,7 +53,7 @@ public class TextRank { // private Hashtable<Integer, String[]> wordsInSent; // DAMPING FACTOR.. - private static final double df = 0.15; + private static final double DF = 0.15; private static final boolean HIGHER_TITLE_WEIGHT = true; private static final double TITLE_WRD_WT = 2d; @@ -150,7 +150,7 @@ public class TextRank { sum += wij / sigmawjk * txtRnkj; } } - ns.setScore((1d - df) + sum * df);// * rs.score + ns.setScore((1d - DF) + sum * DF);// * rs.score totErr += ns.getScore() - getScoreFrom(rawScores, sentId); newWtScores.add(ns); } diff --git a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainTest.java b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainTest.java index 72804cb..aa01361 100644 --- a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainTest.java +++ b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainTest.java @@ -33,7 +33,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; class LexChainTest { - private static final String article = + private static final String ARTICLE = "US President Barack Obama has welcomed an agreement between the US and Russia under which Syria's chemical weapons must be destroyed or removed by mid-2014 as an \"important step\"." + "But a White House statement cautioned that the US expected Syria to live up to its public commitments. " + "The US-Russian framework document stipulates that Syria must provide details of its stockpile within a week. " @@ -52,14 +52,14 @@ class LexChainTest { @Test void testBuildLexicalChains() { - List<Sentence> sent = dp.getSentencesFromStr(article); + List<Sentence> sent = dp.getSentencesFromStr(ARTICLE); assertNotNull(sent); - List<LexicalChain> vh = lcs.buildLexicalChains(article, sent); + List<LexicalChain> vh = lcs.buildLexicalChains(ARTICLE, sent); assertNotNull(vh); Collections.sort(vh); assertTrue(vh.size() > 0); - List<Sentence> s = dp.getSentencesFromStr(article); + List<Sentence> s = dp.getSentencesFromStr(ARTICLE); Hashtable<String, Boolean> comp = new Hashtable<>(); for (int i = vh.size() - 1; i >= Math.max(vh.size() - 50, 0); i--) { diff --git a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java index 24cac18..1bb476a 100644 --- a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java +++ b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java @@ -30,7 +30,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; class LexChainingKeywordExtractorTest { - private static final String article = + private static final String ARTICLE = "US President Barack Obama has welcomed an agreement between the US and Russia under which Syria's chemical weapons must be destroyed or removed by mid-2014 as an \"important step\"." + "But a White House statement cautioned that the US expected Syria to live up to its public commitments. " + "The US-Russian framework document stipulates that Syria must provide details of its stockpile within a week. " @@ -49,8 +49,8 @@ class LexChainingKeywordExtractorTest { @Test void testGetKeywords() { - List<Sentence> sent = dp.getSentencesFromStr(article); - List<LexicalChain> vh = lcs.buildLexicalChains(article, sent); + List<Sentence> sent = dp.getSentencesFromStr(ARTICLE); + List<LexicalChain> vh = lcs.buildLexicalChains(ARTICLE, sent); LexChainingKeywordExtractor ke = new LexChainingKeywordExtractor(); List<String> keywords = ke.getKeywords(vh, 5); assertNotNull(keywords);
