This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-addons.git
The following commit(s) were added to refs/heads/master by this push:
new 0bd12e8 corrects minor typos in geoentitylinker classes fixes grammer
issues throughout all modules corrects some C-style array declarations makes
fields final where applicable simplifies if-else statements removes redundant
'throws' clauses simplifies some JUnit assertions
0bd12e8 is described below
commit 0bd12e8c57a94c2f4d9ebac26eddbd83c9922302
Author: Martin Wiesner <[email protected]>
AuthorDate: Sat Jan 25 21:52:21 2025 +0100
corrects minor typos in geoentitylinker classes
fixes grammer issues throughout all modules
corrects some C-style array declarations
makes fields final where applicable
simplifies if-else statements
removes redundant 'throws' clauses
simplifies some JUnit assertions
---
.../AdminBoundaryContextGenerator.java | 9 +++++----
.../addons/geoentitylinker/GazetteerEntry.java | 7 ++-----
.../geoentitylinker/indexing/GazetteerIndexer.java | 2 +-
.../indexing/GeonamesFileDownloader.java | 4 ++--
.../indexing/GeonamesProcessor.java | 4 +---
.../geoentitylinker/indexing/RegionProcessor.java | 6 +++---
.../geoentitylinker/indexing/USGSProcessor.java | 4 ++--
.../scoring/CountryProximityScorer.java | 2 +-
.../scoring/FuzzyStringMatchScorer.java | 3 +--
.../geoentitylinker/scoring/ModelBasedScorer.java | 4 ++--
.../geoentitylinker/scoring/PointClustering.java | 4 ++--
.../scoring/ProvinceProximityScorer.java | 22 +++++++++-------------
.../AuxiliaryInfoNameContextGenerator.java | 8 ++++----
.../AuxiliaryInfoTokenNameFinderFactory.java | 2 +-
.../opennlp/tools/namefind/AuxiliaryInfoUtil.java | 2 +-
...AuxiliaryInfoAwareDelegateFeatureGenerator.java | 2 +-
.../lang/jpn/BrownTokenClassFeatureGenerator.java | 2 +-
.../featuregen/lang/jpn/FeatureGeneratorUtil.java | 2 +-
.../lang/jpn/TokenClassFeatureGenerator.java | 2 +-
.../lang/jpn/TokenPatternFeatureGenerator.java | 4 ++--
.../tools/namefind/AuxiliaryInfoUtilTest.java | 12 ++++++------
...liaryInfoAwareDelegateFeatureGeneratorTest.java | 6 +++---
.../lang/jpn/BigramNameFeatureGeneratorTest.java | 10 +++++-----
.../lang/jpn/TokenClassFeatureGeneratorTest.java | 8 ++++----
.../lang/jpn/TokenPatternFeatureGeneratorTest.java | 2 +-
.../lang/jpn/TrigramNameFeatureGeneratorTest.java | 12 ++++++------
.../jwnl/lemmatizer/JWNLLemmatizerTest.java | 5 ++---
.../src/main/java/LiblinearModelSerializer.java | 2 ++
.../src/main/java/LiblinearTrainer.java | 2 +-
.../addons/modelbuilder/KnownEntityProvider.java | 2 +-
.../modelbuilder/ModelGenerationValidator.java | 5 +----
.../impls/FileKnownEntityProvider.java | 14 ++++++--------
.../modelbuilder/impls/FileModelValidatorImpl.java | 8 ++------
.../modelbuilder/impls/GenericModelGenerator.java | 6 +++---
morfologik-addon/src/main/assembly/bin.xml | 6 +++---
morfologik-addon/src/main/assembly/src.xml | 2 +-
...ilder.java => MorfologikDictionaryBuilder.java} | 6 ++----
.../main/java/opennlp/morfologik/cmdline/CLI.java | 2 +-
.../builder/MorfologikDictionaryBuilderTool.java | 6 +++---
.../builder/XMLDictionaryToTableParams.java | 2 +-
.../cmdline/builder/XMLDictionaryToTableTool.java | 2 +-
.../lemmatizer/MorfologikLemmatizer.java | 2 +-
.../tagdict/MorfologikTagDictionary.java | 6 +++---
...lderTest.java => POSDictionaryBuilderTest.java} | 10 +++++-----
.../lemmatizer/MorfologikLemmatizerTest.java | 4 ++--
.../tagdict/MorfologikTagDictionaryTest.java | 16 ++++++++--------
.../morfologik/tagdict/POSTaggerFactoryTest.java | 9 +++++----
47 files changed, 122 insertions(+), 140 deletions(-)
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/AdminBoundaryContextGenerator.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/AdminBoundaryContextGenerator.java
index 71e00a5..bcf6076 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/AdminBoundaryContextGenerator.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/AdminBoundaryContextGenerator.java
@@ -350,19 +350,20 @@ public class AdminBoundaryContextGenerator {
provs = new HashMap<>();
}
//if (!provs.containsKey(adm.getProvCode())) {
- provs.put(adm.countryCode() + "." + adm.getProvCode(),
adm.provinceName());
+ String combined = adm.countryCode() + "." + adm.getProvCode();
+ provs.put(combined, adm.provinceName());
provMap.put(adm.countryCode(), provs);
// }
if (!adm.countyCode().equalsIgnoreCase("no_data_found") &&
!adm.countyName().equalsIgnoreCase("no_data_found")) {
- Map<String, String> counties = countyMap.get(adm.countryCode() +
"." + adm.getProvCode());
+ Map<String, String> counties = countyMap.get(combined);
if (counties == null) {
counties = new HashMap<>();
}
// if (!counties.containsKey(adm.getCountyCode())) {
- String countyid = adm.countryCode() + "." + adm.getProvCode() +
"." + adm.countyCode();
+ String countyid = combined + "." + adm.countyCode();
counties.put(countyid, adm.countyName());
- countyMap.put(adm.countryCode() + "." + adm.getProvCode(),
counties);
+ countyMap.put(combined, counties);
// }
}
}
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java
index 86fc0ea..6497894 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java
@@ -127,7 +127,7 @@ public class GazetteerEntry extends BaseLink {
@Override
public String toString() {
- return super.toString() + "\n\t\tGazateerEntry\n" + "\t\tlatitude=" +
+ return super.toString() + "\n\t\tGazetteerEntry\n" + "\t\tlatitude=" +
latitude + ", \n\t\tlongitude=" + longitude + ", \n\t\tsource=" +
source + ", \n\t\tindexID=" + indexID + ",\n\t\tindexData=" +
indexData + "\n";
}
@@ -152,9 +152,6 @@ public class GazetteerEntry extends BaseLink {
if (!Objects.equals(this.source, other.source)) {
return false;
}
- if (!Objects.equals(this.indexID, other.indexID)) {
- return false;
- }
- return true;
+ return Objects.equals(this.indexID, other.indexID);
}
}
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GazetteerIndexer.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GazetteerIndexer.java
index d178a44..a640c5d 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GazetteerIndexer.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GazetteerIndexer.java
@@ -93,7 +93,7 @@ public class GazetteerIndexer {
* <a href="https://geonames.usgs.gov/domestic/download_data.htm">here</a>
click on the
* national_file####.zip link to get all the most recent features
*
- * @param usgsGovUnitsFile go to
+ * @param usgsGovUnitsFile go
* <a href="https://geonames.usgs.gov/domestic/download_data.htm">here</a>
in the section titled
* "Topical Gazetteers -- File Format" click on the dropdown list and select
* "Government Units". The downloaded file is what you need for this param.
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesFileDownloader.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesFileDownloader.java
index df010ed..b255553 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesFileDownloader.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesFileDownloader.java
@@ -28,7 +28,7 @@ import java.util.zip.ZipFile;
public class GeonamesFileDownloader {
final static int size = 1024;
- private static final String ALL_COUNTRIES =
"http://download.geonames.org/export/dump/ZM.zip";
+ private static final String ALL_COUNTRIES =
"https://download.geonames.org/export/dump/ZM.zip";
private static final String COUNTRY_INFO = "";
private static final String ADM1_LOOKUP = "";
@@ -81,7 +81,7 @@ public class GeonamesFileDownloader {
byteWritten += byteRead;
}
System.out.println("Downloaded Successfully.");
- System.out.println("File name:\"" + localFileName + "\"\nNo ofbytes :" +
byteWritten);
+ System.out.println("File name:\"" + localFileName + "\"\nNo of bytes :"
+ byteWritten);
} catch (Exception e) {
e.printStackTrace();
}
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesProcessor.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesProcessor.java
index 0553e3c..b1e8bee 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesProcessor.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesProcessor.java
@@ -127,7 +127,7 @@ public class GeonamesProcessor {
}
String[] values = line.split(TAB);
- String ccode = values[0].toLowerCase();//this is the 2 digit ISO code
+ String ccode = values[0].toLowerCase(); //this is the 2-digit ISO code
String cname = values[4].toLowerCase();
if (!ccode.isEmpty()) {
ccs.put(ccode, cname);
@@ -245,8 +245,6 @@ public class GeonamesProcessor {
doc.add(new TextField("hierarchy", concatIndexEntry, Field.Store.YES));
doc.add(new TextField("placename", placeName, Field.Store.YES));
// doc.add(new TextField("countryname", countryname, Field.Store.YES));
- //System.out.println(placeName);
-
doc.add(new TextField("latitude", lat, Field.Store.YES));
doc.add(new TextField("longitude", lon, Field.Store.YES));
doc.add(new StringField("loctype", dsg, Field.Store.YES));
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/RegionProcessor.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/RegionProcessor.java
index 5335c2b..d97427f 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/RegionProcessor.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/RegionProcessor.java
@@ -55,14 +55,14 @@ public class RegionProcessor {
}
}
- public static void readFile(File gazateerInputData, File
outputCountryContextfile,
- IndexWriter w) throws IOException {
+ public static void readFile(File gazetteerInput, File
outputCountryContextfile, IndexWriter w)
+ throws IOException {
List<String> ccfileentries = new ArrayList<>();
List<String> fields = new ArrayList<>();
int counter = 0;
System.out.println("reading gazetteer data from Regions file...........");
String line;
- try (BufferedReader reader = new BufferedReader(new
FileReader(gazateerInputData))) {
+ try (BufferedReader reader = new BufferedReader(new
FileReader(gazetteerInput))) {
while ((line = reader.readLine()) != null) {
String[] values = line.split("\t");
if (counter == 0) {
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/USGSProcessor.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/USGSProcessor.java
index ac1e91f..09105f8 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/USGSProcessor.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/USGSProcessor.java
@@ -56,11 +56,11 @@ public class USGSProcessor {
writeCountryContextFile(outputCountryContextfile, provData);
}
- public static void readFile(File gazateerInputData, IndexWriter w,
GazetteerIndexer.GazType type,
+ public static void readFile(File gazetteerInput, IndexWriter w,
GazetteerIndexer.GazType type,
Map<String, AdminBoundary> lookupMap) throws
IOException {
Map<String, StateCentroid> states = new HashMap<>();
- try (BufferedReader reader = new BufferedReader(new
FileReader(gazateerInputData))) {
+ try (BufferedReader reader = new BufferedReader(new
FileReader(gazetteerInput))) {
List<String> fields = new ArrayList<>();
int counter = 0;
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/CountryProximityScorer.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/CountryProximityScorer.java
index 726c809..08d6055 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/CountryProximityScorer.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/CountryProximityScorer.java
@@ -204,7 +204,7 @@ public class CountryProximityScorer implements
LinkedEntityScorer<BaseLink, Adm
for (Integer i : distanceMap.get(key)) {
Double norm = normalize(i, min, max);
//reverse the normed distance so low numbers (closer) are better
- //this could be improved with a "decaying " function using an
imcreaseing negative exponent
+ //this could be improved with a "decaying " function using an
increasing negative exponent
Double reverse = Math.abs(norm - 1);
normalizedDistances.add(reverse);
}
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/FuzzyStringMatchScorer.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/FuzzyStringMatchScorer.java
index a9a7e3e..873b6ea 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/FuzzyStringMatchScorer.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/FuzzyStringMatchScorer.java
@@ -28,8 +28,7 @@ import opennlp.tools.entitylinker.LinkedSpan;
import opennlp.tools.util.Span;
/**
- *
- * Generates scores based on string comparisons levenstein and dice
+ * Generates scores based on string comparisons Levenshtein and Dice.
*/
public class FuzzyStringMatchScorer implements
LinkedEntityScorer<GazetteerEntry, AdminBoundaryContext> {
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ModelBasedScorer.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ModelBasedScorer.java
index d227b8d..4b305e4 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ModelBasedScorer.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ModelBasedScorer.java
@@ -120,10 +120,10 @@ public class ModelBasedScorer implements
LinkedEntityScorer<BaseLink, AdminBound
public String getTextChunk(int mentionIdx, String docText, int radius) {
int docSize = docText.length();
- int left = 0, right = 0;
+ int left, right;
left = (mentionIdx - radius < 0) ? 0 : mentionIdx - radius;
right = (mentionIdx + radius > docSize) ? docSize : mentionIdx + radius;
- String chunk = "";
+ String chunk;
if (right <= left) {
chunk = "";
} else {
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/PointClustering.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/PointClustering.java
index a49c1aa..5f218d9 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/PointClustering.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/PointClustering.java
@@ -31,7 +31,7 @@ import opennlp.addons.geoentitylinker.GazetteerEntry;
public class PointClustering {
/**
- * Clusters a set of points from the gazateers. The idea is that locations
+ * Clusters a set of points from the gazetteers. The idea is that locations
* that matched a name that are closer to each other, the more likely the
* toponym is to be accurate
*
@@ -63,7 +63,7 @@ public class PointClustering {
for (String key : clusters.keySet()) {
int size = clusters.get(key).size();
if (size > max) {
- max = (double) size;
+ max = size;
}
}
for (String key : clusters.keySet()) {
diff --git
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ProvinceProximityScorer.java
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ProvinceProximityScorer.java
index 6badb60..13ce217 100644
---
a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ProvinceProximityScorer.java
+++
b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ProvinceProximityScorer.java
@@ -62,7 +62,7 @@ public class ProvinceProximityScorer implements
LinkedEntityScorer<BaseLink, Adm
/**
* Assigns a score to each BaseLink in each linkedSpan's set of N best
- * matches. Currently the scoring indicates the probability that the toponym
+ * matches. Currently, the scoring indicates the probability that the toponym
* is correct based on the country context in the document
*
* @param linkedData the linked spans, holds the Namefinder results, and the
@@ -118,7 +118,7 @@ public class ProvinceProximityScorer implements
LinkedEntityScorer<BaseLink, Adm
LinkedSpan<BaseLink>
span, Integer maxAllowedDistance) {
Double score = 0.0;
/*
- * get the index of the actual span, begining of sentence //should generate
+ * get the index of the actual span, beginning of sentence //should
generate
* tokens from sentence and create a char offset... //could have large
* sentences due to poor sentence detection or wonky doc text
*/
@@ -131,22 +131,18 @@ public class ProvinceProximityScorer implements
LinkedEntityScorer<BaseLink, Adm
Map<String, Set<Integer>> distancesFromCodeMap = new HashMap<>();
//map = Map<countrycode, Set <of distances this span is from all the
mentions of the code>>
for (String cCode : countryHits.keySet()) {
- //iterate over all the regex start values and calculate an offset
+ // iterate over all the regex start values and calculate an offset
for (Integer cHit : countryHits.get(cCode)) {
- Integer absDist = Math.abs(sentIndexInDoc - cHit);
+ int absDist = Math.abs(sentIndexInDoc - cHit);
//only include near mentions based on a heuristic
//TODO make this a property
// if (absDist < maxAllowedDistance) {
if (distancesFromCodeMap.containsKey(cCode)) {
distancesFromCodeMap.get(cCode).add(absDist);
} else {
- HashSet<Integer> newset = new HashSet<>();
- newset.add(absDist);
- distancesFromCodeMap.put(cCode, newset);
+ distancesFromCodeMap.put(cCode, new HashSet<>(absDist));
}
}
-
- //}
}
//we now know how far this named entity is from every country mention in
the document
@@ -172,7 +168,7 @@ public class ProvinceProximityScorer implements
LinkedEntityScorer<BaseLink, Adm
if (nameCodesMap.containsKey(link.getItemName().toLowerCase()) ||
regexMatch(link.getItemName(), link.getItemParentID())) {
//if so, is it the correct country code for that name?
if
(nameCodesMap.get(entry.getItemName().toLowerCase()).contains(entry.getProvinceCode()))
{
- //boost the score becuase it is likely that this is the location
in the text, so add 50% to the score or set to 1
+ //boost the score because it is likely that this is the location
in the text, so add 50% to the score or set to 1
//TODO: make this smarter
score = (score + .75) > 1.0 ? 1d : (score + .75);
@@ -219,15 +215,15 @@ public class ProvinceProximityScorer implements
LinkedEntityScorer<BaseLink, Adm
for (String key : distanceMap.keySet()) {
all.addAll(distanceMap.get(key));
}
- //get min max for normalization, this could be more efficient
-
+
+ // get min max for normalization, this could be more efficient
int min = all.first();
int max = all.last();
if (min == max) {
min = 0;
}
- for (String key : distanceMap.keySet()) {
+ for (String key : distanceMap.keySet()) {
TreeSet<Double> normalizedDistances = new TreeSet<>();
for (Integer i : distanceMap.get(key)) {
Double norm = normalize(i, min, max);
diff --git
a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoNameContextGenerator.java
b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoNameContextGenerator.java
index bda691b..333379f 100644
---
a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoNameContextGenerator.java
+++
b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoNameContextGenerator.java
@@ -24,7 +24,7 @@ import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
import opennlp.tools.util.featuregen.FeatureGeneratorUtil;
/**
- * If a token contains an auxiliary information, e.g. POS tag, this class can
be used
+ * If a token contains auxiliary information, e.g. POS tag, this class can be
used
* to extract word part in {@link #getContext(int, String[], String[],
Object[])} method.
*
* <strong>EXPERIMENTAL</strong>.
@@ -40,11 +40,11 @@ public class AuxiliaryInfoNameContextGenerator extends
DefaultNameContextGenerat
* Return the context for finding names at the specified index.
* @param index The index of the token in the specified toks array for which
the
* context should be constructed.
- * @param tokens The tokens of the sentence. The <code>toString</code>
methods
+ * @param tokens The tokens of the sentence. The <code>toString</code>
methods
* of these objects should return the token text.
* @param preds The previous decisions made in the tagging of this sequence.
- * Only indices less than i will be examined.
- * @param additionalContext Addition features which may be based on a
context outside of the sentence.
+ * Only indices less than {@code i} will be examined.
+ * @param additionalContext Addition features which may be based on a
context outside the sentence.
*
* @return the context for finding names at the specified index.
*/
diff --git
a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoTokenNameFinderFactory.java
b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoTokenNameFinderFactory.java
index ea091e6..9977433 100644
---
a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoTokenNameFinderFactory.java
+++
b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoTokenNameFinderFactory.java
@@ -28,7 +28,7 @@ import opennlp.tools.util.featuregen.TokenFeatureGenerator;
import opennlp.tools.util.featuregen.WindowFeatureGenerator;
/**
- * If a token contains an auxiliary information, e.g. POS tag, in the training
data,
+ * If a token contains auxiliary information, e.g. POS tag, in the training
data,
* you can use this class via -factory command line option.
*
* <strong>EXPERIMENTAL</strong>.
diff --git
a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoUtil.java
b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoUtil.java
index 7bb336c..a9eb7f8 100644
--- a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoUtil.java
+++ b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoUtil.java
@@ -18,7 +18,7 @@
package opennlp.tools.namefind;
/**
- * If a token contains an auxiliary information, e.g. POS tag, this class can
be used
+ * If a token contains auxiliary information, e.g. POS tag, this class can be
used
* to extract word part or auxiliary information part.<br>
*
* ex) token := word '/' POStag
diff --git
a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGenerator.java
b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGenerator.java
index ee82c0c..aa9178d 100644
---
a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGenerator.java
+++
b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGenerator.java
@@ -23,7 +23,7 @@ import java.util.List;
import opennlp.tools.namefind.AuxiliaryInfoUtil;
/**
- * If a token contains an auxiliary information, e.g. POS tag, in the training
data,
+ * If a token contains auxiliary information, e.g. POS tag, in the training
data,
* you can use this feature generator in order to let the feature generator
choose
* word part or auxiliary information part.<br>
*
diff --git
a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/BrownTokenClassFeatureGenerator.java
b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/BrownTokenClassFeatureGenerator.java
index 2d12171..edae61f 100644
---
a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/BrownTokenClassFeatureGenerator.java
+++
b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/BrownTokenClassFeatureGenerator.java
@@ -28,7 +28,7 @@ import opennlp.tools.util.featuregen.BrownTokenClasses;
*/
public class BrownTokenClassFeatureGenerator implements
AdaptiveFeatureGenerator {
- private BrownCluster brownLexicon;
+ private final BrownCluster brownLexicon;
public BrownTokenClassFeatureGenerator(BrownCluster dict) {
this.brownLexicon = dict;
diff --git
a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/FeatureGeneratorUtil.java
b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/FeatureGeneratorUtil.java
index a6c603a..03b5659 100644
---
a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/FeatureGeneratorUtil.java
+++
b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/FeatureGeneratorUtil.java
@@ -42,7 +42,7 @@ public class FeatureGeneratorUtil {
Objects.requireNonNull(token, "token must be not null!");
- if (token.length() == 0) return "other";
+ if (token.isEmpty()) return "other";
// scan token only once
char c = token.charAt(0);
diff --git
a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGenerator.java
b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGenerator.java
index 14cff33..be4cadf 100644
---
a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGenerator.java
+++
b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGenerator.java
@@ -31,7 +31,7 @@ public class TokenClassFeatureGenerator implements
AdaptiveFeatureGenerator {
private static final String TOKEN_CLASS_PREFIX = "wc";
private static final String TOKEN_AND_CLASS_PREFIX = "w&c";
- private boolean generateWordAndClassFeature;
+ private final boolean generateWordAndClassFeature;
public TokenClassFeatureGenerator() {
this(false);
diff --git
a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGenerator.java
b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGenerator.java
index 7771ed7..cf1fe7a 100644
---
a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGenerator.java
+++
b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGenerator.java
@@ -32,8 +32,8 @@ import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
*/
public class TokenPatternFeatureGenerator implements AdaptiveFeatureGenerator {
- private Pattern noLetters = Pattern.compile("[^a-zA-Z]");
- private Tokenizer tokenizer;
+ private final Pattern noLetters = Pattern.compile("[^a-zA-Z]");
+ private final Tokenizer tokenizer;
/**
* Initializes a new instance.
diff --git
a/japanese-addon/src/test/java/opennlp/tools/namefind/AuxiliaryInfoUtilTest.java
b/japanese-addon/src/test/java/opennlp/tools/namefind/AuxiliaryInfoUtilTest.java
index 4c0bcc1..db9bad2 100644
---
a/japanese-addon/src/test/java/opennlp/tools/namefind/AuxiliaryInfoUtilTest.java
+++
b/japanese-addon/src/test/java/opennlp/tools/namefind/AuxiliaryInfoUtilTest.java
@@ -25,20 +25,20 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
public class AuxiliaryInfoUtilTest {
@Test
- public void testGetSeparatorIndex() throws Exception {
+ public void testGetSeparatorIndex() {
assertEquals(0, AuxiliaryInfoUtil.getSeparatorIndex("/POStag"));
assertEquals(1, AuxiliaryInfoUtil.getSeparatorIndex("1/POStag"));
assertEquals(10, AuxiliaryInfoUtil.getSeparatorIndex("word/stuff/POStag"));
}
@Test
- public void testGetSeparatorIndexNoPos() throws Exception {
+ public void testGetSeparatorIndexNoPos() {
Assertions.assertThrows(RuntimeException.class, () ->
AuxiliaryInfoUtil.getSeparatorIndex("NOPOStags"));
}
@Test
- public void testGetWordPart() throws Exception {
+ public void testGetWordPart() {
assertEquals(" ", AuxiliaryInfoUtil.getWordPart("/POStag"));
assertEquals("1", AuxiliaryInfoUtil.getWordPart("1/POStag"));
assertEquals("word", AuxiliaryInfoUtil.getWordPart("word/POStag"));
@@ -46,7 +46,7 @@ public class AuxiliaryInfoUtilTest {
}
@Test
- public void testGetWordParts() throws Exception {
+ public void testGetWordParts() {
String[] results = AuxiliaryInfoUtil.getWordParts(new String[]{"1/A",
"234/B", "3456/C", "/D"});
assertEquals(4, results.length);
assertEquals("1", results[0]);
@@ -56,7 +56,7 @@ public class AuxiliaryInfoUtilTest {
}
@Test
- public void testGetAuxPart() throws Exception {
+ public void testGetAuxPart() {
assertEquals("POStag", AuxiliaryInfoUtil.getAuxPart("/POStag"));
assertEquals("POStag", AuxiliaryInfoUtil.getAuxPart("1/POStag"));
assertEquals("POStag", AuxiliaryInfoUtil.getAuxPart("word/POStag"));
@@ -64,7 +64,7 @@ public class AuxiliaryInfoUtilTest {
}
@Test
- public void testGetAuxParts() throws Exception {
+ public void testGetAuxParts() {
String[] results = AuxiliaryInfoUtil.getAuxParts(new String[] {"1/ABC",
"234/B", "3456/CD", "/DEFGH"});
assertEquals(4, results.length);
assertEquals("ABC", results[0]);
diff --git
a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGeneratorTest.java
b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGeneratorTest.java
index 8c8f44d..40f34e6 100644
---
a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGeneratorTest.java
+++
b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGeneratorTest.java
@@ -32,12 +32,12 @@ public class AuxiliaryInfoAwareDelegateFeatureGeneratorTest
{
private List<String> features;
@BeforeEach
- public void setUp() throws Exception {
+ public void setUp() {
features = new ArrayList<>();
}
@Test
- public void testWord() throws Exception {
+ public void testWord() {
AdaptiveFeatureGenerator featureGenerator = new
AuxiliaryInfoAwareDelegateFeatureGenerator(
new IdentityFeatureGenerator(), false);
@@ -47,7 +47,7 @@ public class AuxiliaryInfoAwareDelegateFeatureGeneratorTest {
}
@Test
- public void testAuxInfo() throws Exception {
+ public void testAuxInfo() {
AdaptiveFeatureGenerator featureGenerator = new
AuxiliaryInfoAwareDelegateFeatureGenerator(
new IdentityFeatureGenerator(), true);
diff --git
a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/BigramNameFeatureGeneratorTest.java
b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/BigramNameFeatureGeneratorTest.java
index 46d952e..12407a5 100644
---
a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/BigramNameFeatureGeneratorTest.java
+++
b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/BigramNameFeatureGeneratorTest.java
@@ -30,10 +30,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
public class BigramNameFeatureGeneratorTest {
private List<String> features;
- static String[] testSentence = new String[] {"This", "is", "an", "example",
"sentence"};
+ private static final String[] TEST_SENTENCE = new String[] {"This", "is",
"an", "example", "sentence"};
@BeforeEach
- public void setUp() throws Exception {
+ public void setUp() {
features = new ArrayList<>();
}
@@ -44,7 +44,7 @@ public class BigramNameFeatureGeneratorTest {
AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator();
- generator.createFeatures(features, testSentence, testTokenIndex, null);
+ generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null);
assertEquals(2, features.size());
assertEquals("w,nw=This,is", features.get(0));
@@ -58,7 +58,7 @@ public class BigramNameFeatureGeneratorTest {
AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator();
- generator.createFeatures(features, testSentence, testTokenIndex, null);
+ generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null);
assertEquals(4, features.size());
assertEquals("pw,w=is,an", features.get(0));
@@ -74,7 +74,7 @@ public class BigramNameFeatureGeneratorTest {
AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator();
- generator.createFeatures(features, testSentence, testTokenIndex, null);
+ generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null);
assertEquals(2, features.size());
assertEquals("pw,w=example,sentence", features.get(0));
diff --git
a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGeneratorTest.java
b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGeneratorTest.java
index dc6962d..96cd746 100644
---
a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGeneratorTest.java
+++
b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGeneratorTest.java
@@ -30,10 +30,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
public class TokenClassFeatureGeneratorTest {
private List<String> features;
- static String[] testSentence = new String[] {"This", "is", "an", "Example",
"sentence"};
+ private static final String[] TEST_SENTENCE = new String[] {"This", "is",
"an", "Example", "sentence"};
@BeforeEach
- public void setUp() throws Exception {
+ public void setUp() {
features = new ArrayList<>();
}
@@ -44,7 +44,7 @@ public class TokenClassFeatureGeneratorTest {
AdaptiveFeatureGenerator generator = new TokenClassFeatureGenerator(true);
- generator.createFeatures(features, testSentence, testTokenIndex, null);
+ generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null);
assertEquals(2, features.size());
assertEquals("wc=alpha", features.get(0));
@@ -58,7 +58,7 @@ public class TokenClassFeatureGeneratorTest {
AdaptiveFeatureGenerator generator = new TokenClassFeatureGenerator(false);
- generator.createFeatures(features, testSentence, testTokenIndex, null);
+ generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null);
assertEquals(1, features.size());
assertEquals("wc=alpha", features.get(0));
diff --git
a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGeneratorTest.java
b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGeneratorTest.java
index 24509ef..3b84faf 100644
---
a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGeneratorTest.java
+++
b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGeneratorTest.java
@@ -32,7 +32,7 @@ public class TokenPatternFeatureGeneratorTest {
private List<String> features;
@BeforeEach
- public void setUp() throws Exception {
+ public void setUp() {
features = new ArrayList<>();
}
diff --git
a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TrigramNameFeatureGeneratorTest.java
b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TrigramNameFeatureGeneratorTest.java
index 789c508..02bafb1 100644
---
a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TrigramNameFeatureGeneratorTest.java
+++
b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TrigramNameFeatureGeneratorTest.java
@@ -30,10 +30,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
public class TrigramNameFeatureGeneratorTest {
private List<String> features;
- static String[] testSentence = new String[] {"This", "is", "an", "example",
"sentence"};
+ private static final String[] TEST_SENTENCE = new String[] {"This", "is",
"an", "example", "sentence"};
@BeforeEach
- public void setUp() throws Exception {
+ public void setUp() {
features = new ArrayList<>();
}
@@ -44,7 +44,7 @@ public class TrigramNameFeatureGeneratorTest {
AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
- generator.createFeatures(features, testSentence, testTokenIndex, null);
+ generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null);
assertEquals(2, features.size());
assertEquals("w,nw,nnw=This,is,an", features.get(0));
@@ -58,7 +58,7 @@ public class TrigramNameFeatureGeneratorTest {
AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
- generator.createFeatures(features, testSentence, testTokenIndex, null);
+ generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null);
assertEquals(2, features.size());
assertEquals("w,nw,nnw=is,an,example", features.get(0));
@@ -72,7 +72,7 @@ public class TrigramNameFeatureGeneratorTest {
AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
- generator.createFeatures(features, testSentence, testTokenIndex, null);
+ generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null);
assertEquals(4, features.size());
assertEquals("ppw,pw,w=This,is,an", features.get(0));
@@ -88,7 +88,7 @@ public class TrigramNameFeatureGeneratorTest {
AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
- generator.createFeatures(features, testSentence, testTokenIndex, null);
+ generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null);
assertEquals(2, features.size());
assertEquals("ppw,pw,w=an,example,sentence", features.get(0));
diff --git
a/jwnl-addon/src/test/java/opennlp/jwnl/lemmatizer/JWNLLemmatizerTest.java
b/jwnl-addon/src/test/java/opennlp/jwnl/lemmatizer/JWNLLemmatizerTest.java
index e417830..88d8b1b 100644
--- a/jwnl-addon/src/test/java/opennlp/jwnl/lemmatizer/JWNLLemmatizerTest.java
+++ b/jwnl-addon/src/test/java/opennlp/jwnl/lemmatizer/JWNLLemmatizerTest.java
@@ -60,9 +60,8 @@ public class JWNLLemmatizerTest {
@Test
public void testLemmatizeList() {
- assertThrows(UnsupportedOperationException.class, () -> {
- lemmatizer.lemmatize(List.of("mouse"), List.of("NN"));
- });
+ assertThrows(UnsupportedOperationException.class, () ->
+ lemmatizer.lemmatize(List.of("mouse"), List.of("NN")));
}
private static Stream<Arguments> provideData() {
diff --git a/liblinear-addon/src/main/java/LiblinearModelSerializer.java
b/liblinear-addon/src/main/java/LiblinearModelSerializer.java
index 19da2bb..370e225 100644
--- a/liblinear-addon/src/main/java/LiblinearModelSerializer.java
+++ b/liblinear-addon/src/main/java/LiblinearModelSerializer.java
@@ -27,11 +27,13 @@ import opennlp.tools.util.model.ArtifactSerializer;
public class LiblinearModelSerializer implements
ArtifactSerializer<LiblinearModel> {
+ @Override
public LiblinearModel create(InputStream in) throws IOException,
InvalidFormatException {
return new LiblinearModel(in);
}
+ @Override
public void serialize(LiblinearModel model, OutputStream out)
throws IOException {
model.serialize(out);
diff --git a/liblinear-addon/src/main/java/LiblinearTrainer.java
b/liblinear-addon/src/main/java/LiblinearTrainer.java
index abfac72..a494a09 100644
--- a/liblinear-addon/src/main/java/LiblinearTrainer.java
+++ b/liblinear-addon/src/main/java/LiblinearTrainer.java
@@ -103,7 +103,7 @@ public class LiblinearTrainer extends AbstractEventTrainer {
List<Feature[]> vx = new ArrayList<>();
// outcomes
- int outcomes[] = indexer.getOutcomeList();
+ int[] outcomes = indexer.getOutcomeList();
int max_index = 0;
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
index 9354c50..60104a1 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
@@ -28,7 +28,7 @@ Supplies a list of known entities (a list of names or
locations)
public interface KnownEntityProvider extends
ModelParameter<BaseModelBuilderParams> {
/**
- * returns a list of known non ambiguous entities.
+ * returns a list of known non-ambiguous entities.
* @return a set of entities
*/
Set<String> getKnownEntities();
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
index a66f36f..16c11e0 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
@@ -20,16 +20,13 @@ import
opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
import java.util.Collection;
/**
- *
-Validates results from the iterative namefinding
+ * Validates results from the iterative name finding.
*/
public interface ModelGenerationValidator extends
ModelParameter<BaseModelBuilderParams> {
Boolean validSentence(String sentence);
Boolean validNamedEntity(String namedEntity);
-
-
Collection<String> getBlackList();
}
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
index 7aa7d0c..1cb94b4 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
@@ -17,24 +17,22 @@ package opennlp.addons.modelbuilder.impls;
import java.io.BufferedReader;
import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
+
import opennlp.addons.modelbuilder.KnownEntityProvider;
-/**
- *
- */
public class FileKnownEntityProvider implements KnownEntityProvider {
- Set<String> knownEntities = new HashSet<>();
- BaseModelBuilderParams params;
+ private final Set<String> knownEntities = new HashSet<>();
+ private BaseModelBuilderParams params;
+
@Override
public Set<String> getKnownEntities() {
if (knownEntities.isEmpty()) {
@@ -44,7 +42,7 @@ public class FileKnownEntityProvider implements
KnownEntityProvider {
String line;
fis = new FileInputStream(params.getKnownEntitiesFile());
- br = new BufferedReader(new InputStreamReader(fis,
Charset.forName("UTF-8")));
+ br = new BufferedReader(new InputStreamReader(fis,
StandardCharsets.UTF_8));
while ((line = br.readLine()) != null) {
knownEntities.add(line);
}
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
index a953c76..8b31d56 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
@@ -34,7 +34,7 @@ import opennlp.addons.modelbuilder.ModelGenerationValidator;
public class FileModelValidatorImpl implements ModelGenerationValidator {
private final Set<String> badentities = new HashSet<>();
- BaseModelBuilderParams params;
+ private BaseModelBuilderParams params;
@Override
public void setParameters(BaseModelBuilderParams params) {
@@ -58,11 +58,7 @@ public class FileModelValidatorImpl implements
ModelGenerationValidator {
// if (p.matcher(namedEntity).find()) {
// return false;
// }
- boolean b = true;
- if (badentities.contains(namedEntity.toLowerCase())) {
- b = false;
- }
- return b;
+ return !badentities.contains(namedEntity.toLowerCase());
}
@Override
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
index 126157f..358cef1 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
@@ -43,8 +43,8 @@ public class GenericModelGenerator implements
SemiSupervisedModelGenerator {
ModelGenerationValidator validator, Modelable modelable, int
iterations) {
for (int iteration = 0; iteration < iterations; iteration++) {
System.out.println("ITERATION: " + iteration);
- System.out.println("\tPerfoming Known Entity Annotation");
- System.out.println("\t\tknowns: " +
knownEntityProvider.getKnownEntities().size());
+ System.out.println("\tPerforming Known Entity Annotation");
+ System.out.println("\t\tknown size: " +
knownEntityProvider.getKnownEntities().size());
System.out.println("\t\treading data....: ");
for (String sentence : sentenceProvider.getSentences()) {
for (String knownEntity : knownEntityProvider.getKnownEntities()) {
@@ -95,7 +95,7 @@ public class GenericModelGenerator implements
SemiSupervisedModelGenerator {
}
}
System.out.println("\t\tannotated sentences: " +
modelable.getAnnotatedSentences().size());
- System.out.println("\t\tknowns: " +
knownEntityProvider.getKnownEntities().size());
+ System.out.println("\t\tknown size: " +
knownEntityProvider.getKnownEntities().size());
}
modelable.writeAnnotatedSentences();
modelable.buildModel(knownEntityProvider.getKnownEntitiesType());
diff --git a/morfologik-addon/src/main/assembly/bin.xml
b/morfologik-addon/src/main/assembly/bin.xml
index ab4f6da..856685a 100644
--- a/morfologik-addon/src/main/assembly/bin.xml
+++ b/morfologik-addon/src/main/assembly/bin.xml
@@ -45,14 +45,14 @@
<fileSets>
<fileSet>
<directory>src/main/readme</directory>
- <outputDirectory></outputDirectory>
+ <outputDirectory/>
<fileMode>644</fileMode>
<directoryMode>755</directoryMode>
</fileSet>
<fileSet>
<directory>.</directory>
- <outputDirectory></outputDirectory>
+ <outputDirectory/>
<filtered>true</filtered>
<fileMode>644</fileMode>
<directoryMode>755</directoryMode>
@@ -64,7 +64,7 @@
<fileSet>
<directory>target</directory>
- <outputDirectory></outputDirectory>
+ <outputDirectory/>
<fileMode>644</fileMode>
<directoryMode>755</directoryMode>
<includes>
diff --git a/morfologik-addon/src/main/assembly/src.xml
b/morfologik-addon/src/main/assembly/src.xml
index cdcc9d3..f67f953 100644
--- a/morfologik-addon/src/main/assembly/src.xml
+++ b/morfologik-addon/src/main/assembly/src.xml
@@ -27,7 +27,7 @@
<fileSets>
<fileSet>
<directory>../</directory>
- <outputDirectory></outputDirectory>
+ <outputDirectory/>
<excludes>
<exclude>**/target/**</exclude>
<exclude>**/.*/**</exclude>
diff --git
a/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java
b/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionaryBuilder.java
similarity index 95%
rename from
morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java
rename to
morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionaryBuilder.java
index dbbca4d..f827d03 100644
---
a/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java
+++
b/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionaryBuilder.java
@@ -17,8 +17,6 @@
package opennlp.morfologik.builder;
-import java.io.FileNotFoundException;
-import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.Properties;
@@ -32,7 +30,7 @@ import morfologik.tools.DictCompile;
* file. The first column is the word, the second its lemma and the third a POS
* tag. If there is no lemma information leave the second column empty.
*/
-public class MorfologikDictionayBuilder {
+public class MorfologikDictionaryBuilder {
/**
* Helper to compile a morphological dictionary automaton.
@@ -90,7 +88,7 @@ public class MorfologikDictionayBuilder {
}
Properties createProperties(Charset encoding, String separator,
- EncoderType encoderType) throws FileNotFoundException, IOException {
+ EncoderType encoderType) {
Properties properties = new Properties();
properties.setProperty("fsa.dict.separator", separator);
diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
index 5205739..5373ecd 100644
--- a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
+++ b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java
@@ -103,7 +103,7 @@ public final class CLI {
System.exit(0);
}
- String toolArguments[] = new String[args.length -1];
+ String[] toolArguments = new String[args.length -1];
System.arraycopy(args, 1, toolArguments, 0, toolArguments.length);
String toolName = args[0];
diff --git
a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java
b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java
index eb9b51c..923255f 100644
---
a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java
+++
b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java
@@ -21,7 +21,7 @@ import java.io.File;
import java.nio.file.Path;
import morfologik.stemming.DictionaryMetadata;
-import opennlp.morfologik.builder.MorfologikDictionayBuilder;
+import opennlp.morfologik.builder.MorfologikDictionaryBuilder;
import opennlp.tools.cmdline.BasicCmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
@@ -48,14 +48,14 @@ public class MorfologikDictionaryBuilderTool extends
BasicCmdLineTool {
Path metadataPath =
DictionaryMetadata.getExpectedMetadataLocation(dictInFile.toPath());
CmdLineUtil.checkInputFile("dictionary metadata (.info) input file",
metadataPath.toFile());
- MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder();
+ MorfologikDictionaryBuilder builder = new MorfologikDictionaryBuilder();
try {
builder.build(dictInFile.toPath(), params.getOverwrite(),
params.getValidate(), params.getAcceptBOM(), params.getAcceptCR(),
params.getIgnoreEmpty());
} catch (Exception e) {
throw new TerminateToolException(-1,
- "Error while creating Morfologik POS Dictionay: " + e.getMessage(),
e);
+ "Error while creating Morfologik POS Dictionary: " + e.getMessage(),
e);
}
}
diff --git
a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java
b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java
index 4ee8cd4..eb001dc 100644
---
a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java
+++
b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java
@@ -34,7 +34,7 @@ interface XMLDictionaryToTableParams extends
EncodingParameter {
@ParameterDescription(valueName = "out", description = "Output for
Morfologik (.info will be also created).")
File getOutputFile();
- @ParameterDescription(valueName = "char", description = "Columm separator
(must be a single character)")
+ @ParameterDescription(valueName = "char", description = "Column separator
(must be a single character)")
@OptionalParameter(defaultValue=",")
String getSeparator();
diff --git
a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java
b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java
index edc41c7..c53a4b8 100644
---
a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java
+++
b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java
@@ -64,7 +64,7 @@ public class XMLDictionaryToTableTool extends
BasicCmdLineTool {
tagDictionary = POSDictionary.create(new FileInputStream(dictInFile));
} catch (IOException e) {
throw new TerminateToolException(-1,
- "Error while loading XML POS Dictionay: " + e.getMessage(), e);
+ "Error while loading XML POS Dictionary: " + e.getMessage(), e);
}
Iterator<String> iterator = tagDictionary.iterator();
diff --git
a/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
b/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
index 650f7a6..421a82f 100644
---
a/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
+++
b/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
@@ -34,7 +34,7 @@ import opennlp.tools.lemmatizer.Lemmatizer;
public class MorfologikLemmatizer implements Lemmatizer {
- private IStemmer dictLookup;
+ private final IStemmer dictLookup;
public final Set<String> constantTags = new HashSet<>(Arrays.asList("NNP",
"NP00000"));
public MorfologikLemmatizer(Path dictionaryPath) throws
IllegalArgumentException, IOException {
diff --git
a/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java
b/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java
index 5b6bf13..c5219c1 100644
---
a/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java
+++
b/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java
@@ -55,7 +55,7 @@ public class MorfologikTagDictionary implements TagDictionary
{
* @param dict
* a Morfologik FSA dictionary
* @param caseSensitive
- * if true it performs case sensitive lookup
+ * if true it performs case-sensitive lookup
* @throws IllegalArgumentException
* if FSA's root node cannot be acquired (dictionary is empty).
* @throws IOException
@@ -73,12 +73,12 @@ public class MorfologikTagDictionary implements
TagDictionary {
}
List<WordData> data = dictLookup.lookup(word);
- if (data != null && data.size() > 0) {
+ if (data != null && !data.isEmpty()) {
List<String> tags = new ArrayList<>(data.size());
for (WordData datum : data) {
tags.add(datum.getTag().toString());
}
- if (tags.size() > 0)
+ if (!tags.isEmpty())
return tags.toArray(new String[0]);
return null;
}
diff --git
a/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
b/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionaryBuilderTest.java
similarity index 85%
rename from
morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
rename to
morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionaryBuilderTest.java
index ae2bb46..834447c 100644
---
a/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java
+++
b/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionaryBuilderTest.java
@@ -29,7 +29,7 @@ import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertNotNull;
-public class POSDictionayBuilderTest {
+public class POSDictionaryBuilderTest {
@Test
public void testBuildDictionary() throws Exception {
@@ -42,15 +42,15 @@ public class POSDictionayBuilderTest {
public static Path createMorfologikDictionary() throws Exception {
Path tabFilePath = File.createTempFile(
- POSDictionayBuilderTest.class.getName(), ".txt").toPath();
+ POSDictionaryBuilderTest.class.getName(), ".txt").toPath();
Path infoFilePath =
DictionaryMetadata.getExpectedMetadataLocation(tabFilePath);
- Files.copy(POSDictionayBuilderTest.class.getResourceAsStream(
+ Files.copy(POSDictionaryBuilderTest.class.getResourceAsStream(
"/dictionaryWithLemma.txt"), tabFilePath,
StandardCopyOption.REPLACE_EXISTING);
- Files.copy(POSDictionayBuilderTest.class.getResourceAsStream(
+ Files.copy(POSDictionaryBuilderTest.class.getResourceAsStream(
"/dictionaryWithLemma.info"), infoFilePath,
StandardCopyOption.REPLACE_EXISTING);
- MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder();
+ MorfologikDictionaryBuilder builder = new MorfologikDictionaryBuilder();
return builder.build(tabFilePath);
}
diff --git
a/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
b/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
index f1212cc..10f61ca 100644
---
a/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
+++
b/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java
@@ -19,7 +19,7 @@ package opennlp.morfologik.lemmatizer;
import java.nio.file.Path;
-import opennlp.morfologik.builder.POSDictionayBuilderTest;
+import opennlp.morfologik.builder.POSDictionaryBuilderTest;
import org.junit.jupiter.api.Test;
@@ -38,7 +38,7 @@ public class MorfologikLemmatizerTest {
}
private MorfologikLemmatizer createDictionary(boolean caseSensitive) throws
Exception {
- Path output = POSDictionayBuilderTest.createMorfologikDictionary();
+ Path output = POSDictionaryBuilderTest.createMorfologikDictionary();
return new MorfologikLemmatizer(output);
}
diff --git
a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
b/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
index d6bc2fe..7e63e27 100644
---
a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
+++
b/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java
@@ -21,7 +21,7 @@ import java.util.Arrays;
import java.util.List;
import morfologik.stemming.Dictionary;
-import opennlp.morfologik.builder.POSDictionayBuilderTest;
+import opennlp.morfologik.builder.POSDictionaryBuilderTest;
import opennlp.tools.postag.TagDictionary;
import org.junit.jupiter.api.Test;
@@ -49,9 +49,9 @@ public class MorfologikTagDictionaryTest {
assertTrue(tags.contains("NOUN"));
assertTrue(tags.contains("V"));
- // this is the behavior of case insensitive dictionary
- // if we search it using case insensitive, Casa as a proper noun
- // should be lower case in the dictionary
+ // this is the behavior of case-insensitive dictionary
+ // if we search it using case-insensitive, Casa as a proper noun
+ // should be lower-cased in the dictionary
tags = Arrays.asList(dict.getTags("Casa"));
assertEquals(2, tags.size());
assertTrue(tags.contains("NOUN"));
@@ -68,9 +68,9 @@ public class MorfologikTagDictionaryTest {
assertTrue(tags.contains("NOUN"));
assertTrue(tags.contains("V"));
- // this is the behavior of case insensitive dictionary
- // if we search it using case insensitive, Casa as a proper noun
- // should be lower case in the dictionary
+ // this is the behavior of case-insensitive dictionary
+ // if we search it using case-insensitive, Casa as a proper noun
+ // should be lower-cased in the dictionary
tags = Arrays.asList(dict.getTags("Casa"));
assertEquals(1, tags.size());
assertTrue(tags.contains("PROP"));
@@ -84,7 +84,7 @@ public class MorfologikTagDictionaryTest {
private MorfologikTagDictionary createDictionary(boolean caseSensitive,
List<String> constant) throws Exception {
- Dictionary dic =
Dictionary.read(POSDictionayBuilderTest.createMorfologikDictionary());
+ Dictionary dic =
Dictionary.read(POSDictionaryBuilderTest.createMorfologikDictionary());
return new MorfologikTagDictionary(dic, caseSensitive);
}
diff --git
a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
b/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
index 602ffc6..871977d 100644
---
a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
+++
b/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
@@ -24,7 +24,7 @@ import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
-import opennlp.morfologik.builder.POSDictionayBuilderTest;
+import opennlp.morfologik.builder.POSDictionaryBuilderTest;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.POSTaggerFactory;
@@ -39,6 +39,7 @@ import opennlp.tools.util.model.ModelType;
import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -50,7 +51,7 @@ public class POSTaggerFactoryTest {
@Test
public void testPOSTaggerWithCustomFactory() throws Exception {
- Path dictionary = POSDictionayBuilderTest.createMorfologikDictionary();
+ Path dictionary = POSDictionaryBuilderTest.createMorfologikDictionary();
POSTaggerFactory inFactory = new MorfologikPOSTaggerFactory();
TagDictionary inDict = inFactory.createTagDictionary(dictionary.toFile());
inFactory.setTagDictionary(inDict);
@@ -58,7 +59,7 @@ public class POSTaggerFactoryTest {
POSModel posModel = trainPOSModel(ModelType.MAXENT, inFactory);
POSTaggerFactory factory = posModel.getFactory();
- assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary);
+ assertInstanceOf(MorfologikTagDictionary.class,
factory.getTagDictionary());
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
@@ -66,7 +67,7 @@ public class POSTaggerFactoryTest {
POSModel fromSerialized = new POSModel(new
ByteArrayInputStream(out.toByteArray()));
factory = fromSerialized.getFactory();
- assertTrue(factory.getTagDictionary() instanceof
MorfologikTagDictionary);
+ assertInstanceOf(MorfologikTagDictionary.class,
factory.getTagDictionary());
assertEquals(2, factory.getTagDictionary().getTags("casa").length);
}