This is an automated email from the ASF dual-hosted git repository. humbedooh pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/opennlp-addons.git
commit 0bd12e8c57a94c2f4d9ebac26eddbd83c9922302 Author: Martin Wiesner <[email protected]> AuthorDate: Sat Jan 25 21:52:21 2025 +0100 corrects minor typos in geoentitylinker classes fixes grammer issues throughout all modules corrects some C-style array declarations makes fields final where applicable simplifies if-else statements removes redundant 'throws' clauses simplifies some JUnit assertions --- .../AdminBoundaryContextGenerator.java | 9 +++++---- .../addons/geoentitylinker/GazetteerEntry.java | 7 ++----- .../geoentitylinker/indexing/GazetteerIndexer.java | 2 +- .../indexing/GeonamesFileDownloader.java | 4 ++-- .../indexing/GeonamesProcessor.java | 4 +--- .../geoentitylinker/indexing/RegionProcessor.java | 6 +++--- .../geoentitylinker/indexing/USGSProcessor.java | 4 ++-- .../scoring/CountryProximityScorer.java | 2 +- .../scoring/FuzzyStringMatchScorer.java | 3 +-- .../geoentitylinker/scoring/ModelBasedScorer.java | 4 ++-- .../geoentitylinker/scoring/PointClustering.java | 4 ++-- .../scoring/ProvinceProximityScorer.java | 22 +++++++++------------- .../AuxiliaryInfoNameContextGenerator.java | 8 ++++---- .../AuxiliaryInfoTokenNameFinderFactory.java | 2 +- .../opennlp/tools/namefind/AuxiliaryInfoUtil.java | 2 +- ...AuxiliaryInfoAwareDelegateFeatureGenerator.java | 2 +- .../lang/jpn/BrownTokenClassFeatureGenerator.java | 2 +- .../featuregen/lang/jpn/FeatureGeneratorUtil.java | 2 +- .../lang/jpn/TokenClassFeatureGenerator.java | 2 +- .../lang/jpn/TokenPatternFeatureGenerator.java | 4 ++-- .../tools/namefind/AuxiliaryInfoUtilTest.java | 12 ++++++------ ...liaryInfoAwareDelegateFeatureGeneratorTest.java | 6 +++--- .../lang/jpn/BigramNameFeatureGeneratorTest.java | 10 +++++----- .../lang/jpn/TokenClassFeatureGeneratorTest.java | 8 ++++---- .../lang/jpn/TokenPatternFeatureGeneratorTest.java | 2 +- .../lang/jpn/TrigramNameFeatureGeneratorTest.java | 12 ++++++------ .../jwnl/lemmatizer/JWNLLemmatizerTest.java | 5 ++--- .../src/main/java/LiblinearModelSerializer.java | 2 ++ .../src/main/java/LiblinearTrainer.java | 2 +- .../addons/modelbuilder/KnownEntityProvider.java | 2 +- .../modelbuilder/ModelGenerationValidator.java | 5 +---- .../impls/FileKnownEntityProvider.java | 14 ++++++-------- .../modelbuilder/impls/FileModelValidatorImpl.java | 8 ++------ .../modelbuilder/impls/GenericModelGenerator.java | 6 +++--- morfologik-addon/src/main/assembly/bin.xml | 6 +++--- morfologik-addon/src/main/assembly/src.xml | 2 +- ...ilder.java => MorfologikDictionaryBuilder.java} | 6 ++---- .../main/java/opennlp/morfologik/cmdline/CLI.java | 2 +- .../builder/MorfologikDictionaryBuilderTool.java | 6 +++--- .../builder/XMLDictionaryToTableParams.java | 2 +- .../cmdline/builder/XMLDictionaryToTableTool.java | 2 +- .../lemmatizer/MorfologikLemmatizer.java | 2 +- .../tagdict/MorfologikTagDictionary.java | 6 +++--- ...lderTest.java => POSDictionaryBuilderTest.java} | 10 +++++----- .../lemmatizer/MorfologikLemmatizerTest.java | 4 ++-- .../tagdict/MorfologikTagDictionaryTest.java | 16 ++++++++-------- .../morfologik/tagdict/POSTaggerFactoryTest.java | 9 +++++---- 47 files changed, 122 insertions(+), 140 deletions(-) diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/AdminBoundaryContextGenerator.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/AdminBoundaryContextGenerator.java index 71e00a5..bcf6076 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/AdminBoundaryContextGenerator.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/AdminBoundaryContextGenerator.java @@ -350,19 +350,20 @@ public class AdminBoundaryContextGenerator { provs = new HashMap<>(); } //if (!provs.containsKey(adm.getProvCode())) { - provs.put(adm.countryCode() + "." + adm.getProvCode(), adm.provinceName()); + String combined = adm.countryCode() + "." + adm.getProvCode(); + provs.put(combined, adm.provinceName()); provMap.put(adm.countryCode(), provs); // } if (!adm.countyCode().equalsIgnoreCase("no_data_found") && !adm.countyName().equalsIgnoreCase("no_data_found")) { - Map<String, String> counties = countyMap.get(adm.countryCode() + "." + adm.getProvCode()); + Map<String, String> counties = countyMap.get(combined); if (counties == null) { counties = new HashMap<>(); } // if (!counties.containsKey(adm.getCountyCode())) { - String countyid = adm.countryCode() + "." + adm.getProvCode() + "." + adm.countyCode(); + String countyid = combined + "." + adm.countyCode(); counties.put(countyid, adm.countyName()); - countyMap.put(adm.countryCode() + "." + adm.getProvCode(), counties); + countyMap.put(combined, counties); // } } } diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java index 86fc0ea..6497894 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java @@ -127,7 +127,7 @@ public class GazetteerEntry extends BaseLink { @Override public String toString() { - return super.toString() + "\n\t\tGazateerEntry\n" + "\t\tlatitude=" + + return super.toString() + "\n\t\tGazetteerEntry\n" + "\t\tlatitude=" + latitude + ", \n\t\tlongitude=" + longitude + ", \n\t\tsource=" + source + ", \n\t\tindexID=" + indexID + ",\n\t\tindexData=" + indexData + "\n"; } @@ -152,9 +152,6 @@ public class GazetteerEntry extends BaseLink { if (!Objects.equals(this.source, other.source)) { return false; } - if (!Objects.equals(this.indexID, other.indexID)) { - return false; - } - return true; + return Objects.equals(this.indexID, other.indexID); } } diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GazetteerIndexer.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GazetteerIndexer.java index d178a44..a640c5d 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GazetteerIndexer.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GazetteerIndexer.java @@ -93,7 +93,7 @@ public class GazetteerIndexer { * <a href="https://geonames.usgs.gov/domestic/download_data.htm">here</a> click on the * national_file####.zip link to get all the most recent features * - * @param usgsGovUnitsFile go to + * @param usgsGovUnitsFile go * <a href="https://geonames.usgs.gov/domestic/download_data.htm">here</a> in the section titled * "Topical Gazetteers -- File Format" click on the dropdown list and select * "Government Units". The downloaded file is what you need for this param. diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesFileDownloader.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesFileDownloader.java index df010ed..b255553 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesFileDownloader.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesFileDownloader.java @@ -28,7 +28,7 @@ import java.util.zip.ZipFile; public class GeonamesFileDownloader { final static int size = 1024; - private static final String ALL_COUNTRIES = "http://download.geonames.org/export/dump/ZM.zip"; + private static final String ALL_COUNTRIES = "https://download.geonames.org/export/dump/ZM.zip"; private static final String COUNTRY_INFO = ""; private static final String ADM1_LOOKUP = ""; @@ -81,7 +81,7 @@ public class GeonamesFileDownloader { byteWritten += byteRead; } System.out.println("Downloaded Successfully."); - System.out.println("File name:\"" + localFileName + "\"\nNo ofbytes :" + byteWritten); + System.out.println("File name:\"" + localFileName + "\"\nNo of bytes :" + byteWritten); } catch (Exception e) { e.printStackTrace(); } diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesProcessor.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesProcessor.java index 0553e3c..b1e8bee 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesProcessor.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/GeonamesProcessor.java @@ -127,7 +127,7 @@ public class GeonamesProcessor { } String[] values = line.split(TAB); - String ccode = values[0].toLowerCase();//this is the 2 digit ISO code + String ccode = values[0].toLowerCase(); //this is the 2-digit ISO code String cname = values[4].toLowerCase(); if (!ccode.isEmpty()) { ccs.put(ccode, cname); @@ -245,8 +245,6 @@ public class GeonamesProcessor { doc.add(new TextField("hierarchy", concatIndexEntry, Field.Store.YES)); doc.add(new TextField("placename", placeName, Field.Store.YES)); // doc.add(new TextField("countryname", countryname, Field.Store.YES)); - //System.out.println(placeName); - doc.add(new TextField("latitude", lat, Field.Store.YES)); doc.add(new TextField("longitude", lon, Field.Store.YES)); doc.add(new StringField("loctype", dsg, Field.Store.YES)); diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/RegionProcessor.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/RegionProcessor.java index 5335c2b..d97427f 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/RegionProcessor.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/RegionProcessor.java @@ -55,14 +55,14 @@ public class RegionProcessor { } } - public static void readFile(File gazateerInputData, File outputCountryContextfile, - IndexWriter w) throws IOException { + public static void readFile(File gazetteerInput, File outputCountryContextfile, IndexWriter w) + throws IOException { List<String> ccfileentries = new ArrayList<>(); List<String> fields = new ArrayList<>(); int counter = 0; System.out.println("reading gazetteer data from Regions file..........."); String line; - try (BufferedReader reader = new BufferedReader(new FileReader(gazateerInputData))) { + try (BufferedReader reader = new BufferedReader(new FileReader(gazetteerInput))) { while ((line = reader.readLine()) != null) { String[] values = line.split("\t"); if (counter == 0) { diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/USGSProcessor.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/USGSProcessor.java index ac1e91f..09105f8 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/USGSProcessor.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/indexing/USGSProcessor.java @@ -56,11 +56,11 @@ public class USGSProcessor { writeCountryContextFile(outputCountryContextfile, provData); } - public static void readFile(File gazateerInputData, IndexWriter w, GazetteerIndexer.GazType type, + public static void readFile(File gazetteerInput, IndexWriter w, GazetteerIndexer.GazType type, Map<String, AdminBoundary> lookupMap) throws IOException { Map<String, StateCentroid> states = new HashMap<>(); - try (BufferedReader reader = new BufferedReader(new FileReader(gazateerInputData))) { + try (BufferedReader reader = new BufferedReader(new FileReader(gazetteerInput))) { List<String> fields = new ArrayList<>(); int counter = 0; diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/CountryProximityScorer.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/CountryProximityScorer.java index 726c809..08d6055 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/CountryProximityScorer.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/CountryProximityScorer.java @@ -204,7 +204,7 @@ public class CountryProximityScorer implements LinkedEntityScorer<BaseLink, Adm for (Integer i : distanceMap.get(key)) { Double norm = normalize(i, min, max); //reverse the normed distance so low numbers (closer) are better - //this could be improved with a "decaying " function using an imcreaseing negative exponent + //this could be improved with a "decaying " function using an increasing negative exponent Double reverse = Math.abs(norm - 1); normalizedDistances.add(reverse); } diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/FuzzyStringMatchScorer.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/FuzzyStringMatchScorer.java index a9a7e3e..873b6ea 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/FuzzyStringMatchScorer.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/FuzzyStringMatchScorer.java @@ -28,8 +28,7 @@ import opennlp.tools.entitylinker.LinkedSpan; import opennlp.tools.util.Span; /** - * - * Generates scores based on string comparisons levenstein and dice + * Generates scores based on string comparisons Levenshtein and Dice. */ public class FuzzyStringMatchScorer implements LinkedEntityScorer<GazetteerEntry, AdminBoundaryContext> { diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ModelBasedScorer.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ModelBasedScorer.java index d227b8d..4b305e4 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ModelBasedScorer.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ModelBasedScorer.java @@ -120,10 +120,10 @@ public class ModelBasedScorer implements LinkedEntityScorer<BaseLink, AdminBound public String getTextChunk(int mentionIdx, String docText, int radius) { int docSize = docText.length(); - int left = 0, right = 0; + int left, right; left = (mentionIdx - radius < 0) ? 0 : mentionIdx - radius; right = (mentionIdx + radius > docSize) ? docSize : mentionIdx + radius; - String chunk = ""; + String chunk; if (right <= left) { chunk = ""; } else { diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/PointClustering.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/PointClustering.java index a49c1aa..5f218d9 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/PointClustering.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/PointClustering.java @@ -31,7 +31,7 @@ import opennlp.addons.geoentitylinker.GazetteerEntry; public class PointClustering { /** - * Clusters a set of points from the gazateers. The idea is that locations + * Clusters a set of points from the gazetteers. The idea is that locations * that matched a name that are closer to each other, the more likely the * toponym is to be accurate * @@ -63,7 +63,7 @@ public class PointClustering { for (String key : clusters.keySet()) { int size = clusters.get(key).size(); if (size > max) { - max = (double) size; + max = size; } } for (String key : clusters.keySet()) { diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ProvinceProximityScorer.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ProvinceProximityScorer.java index 6badb60..13ce217 100644 --- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ProvinceProximityScorer.java +++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ProvinceProximityScorer.java @@ -62,7 +62,7 @@ public class ProvinceProximityScorer implements LinkedEntityScorer<BaseLink, Adm /** * Assigns a score to each BaseLink in each linkedSpan's set of N best - * matches. Currently the scoring indicates the probability that the toponym + * matches. Currently, the scoring indicates the probability that the toponym * is correct based on the country context in the document * * @param linkedData the linked spans, holds the Namefinder results, and the @@ -118,7 +118,7 @@ public class ProvinceProximityScorer implements LinkedEntityScorer<BaseLink, Adm LinkedSpan<BaseLink> span, Integer maxAllowedDistance) { Double score = 0.0; /* - * get the index of the actual span, begining of sentence //should generate + * get the index of the actual span, beginning of sentence //should generate * tokens from sentence and create a char offset... //could have large * sentences due to poor sentence detection or wonky doc text */ @@ -131,22 +131,18 @@ public class ProvinceProximityScorer implements LinkedEntityScorer<BaseLink, Adm Map<String, Set<Integer>> distancesFromCodeMap = new HashMap<>(); //map = Map<countrycode, Set <of distances this span is from all the mentions of the code>> for (String cCode : countryHits.keySet()) { - //iterate over all the regex start values and calculate an offset + // iterate over all the regex start values and calculate an offset for (Integer cHit : countryHits.get(cCode)) { - Integer absDist = Math.abs(sentIndexInDoc - cHit); + int absDist = Math.abs(sentIndexInDoc - cHit); //only include near mentions based on a heuristic //TODO make this a property // if (absDist < maxAllowedDistance) { if (distancesFromCodeMap.containsKey(cCode)) { distancesFromCodeMap.get(cCode).add(absDist); } else { - HashSet<Integer> newset = new HashSet<>(); - newset.add(absDist); - distancesFromCodeMap.put(cCode, newset); + distancesFromCodeMap.put(cCode, new HashSet<>(absDist)); } } - - //} } //we now know how far this named entity is from every country mention in the document @@ -172,7 +168,7 @@ public class ProvinceProximityScorer implements LinkedEntityScorer<BaseLink, Adm if (nameCodesMap.containsKey(link.getItemName().toLowerCase()) || regexMatch(link.getItemName(), link.getItemParentID())) { //if so, is it the correct country code for that name? if (nameCodesMap.get(entry.getItemName().toLowerCase()).contains(entry.getProvinceCode())) { - //boost the score becuase it is likely that this is the location in the text, so add 50% to the score or set to 1 + //boost the score because it is likely that this is the location in the text, so add 50% to the score or set to 1 //TODO: make this smarter score = (score + .75) > 1.0 ? 1d : (score + .75); @@ -219,15 +215,15 @@ public class ProvinceProximityScorer implements LinkedEntityScorer<BaseLink, Adm for (String key : distanceMap.keySet()) { all.addAll(distanceMap.get(key)); } - //get min max for normalization, this could be more efficient - + + // get min max for normalization, this could be more efficient int min = all.first(); int max = all.last(); if (min == max) { min = 0; } - for (String key : distanceMap.keySet()) { + for (String key : distanceMap.keySet()) { TreeSet<Double> normalizedDistances = new TreeSet<>(); for (Integer i : distanceMap.get(key)) { Double norm = normalize(i, min, max); diff --git a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoNameContextGenerator.java b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoNameContextGenerator.java index bda691b..333379f 100644 --- a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoNameContextGenerator.java +++ b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoNameContextGenerator.java @@ -24,7 +24,7 @@ import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator; import opennlp.tools.util.featuregen.FeatureGeneratorUtil; /** - * If a token contains an auxiliary information, e.g. POS tag, this class can be used + * If a token contains auxiliary information, e.g. POS tag, this class can be used * to extract word part in {@link #getContext(int, String[], String[], Object[])} method. * * <strong>EXPERIMENTAL</strong>. @@ -40,11 +40,11 @@ public class AuxiliaryInfoNameContextGenerator extends DefaultNameContextGenerat * Return the context for finding names at the specified index. * @param index The index of the token in the specified toks array for which the * context should be constructed. - * @param tokens The tokens of the sentence. The <code>toString</code> methods + * @param tokens The tokens of the sentence. The <code>toString</code> methods * of these objects should return the token text. * @param preds The previous decisions made in the tagging of this sequence. - * Only indices less than i will be examined. - * @param additionalContext Addition features which may be based on a context outside of the sentence. + * Only indices less than {@code i} will be examined. + * @param additionalContext Addition features which may be based on a context outside the sentence. * * @return the context for finding names at the specified index. */ diff --git a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoTokenNameFinderFactory.java b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoTokenNameFinderFactory.java index ea091e6..9977433 100644 --- a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoTokenNameFinderFactory.java +++ b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoTokenNameFinderFactory.java @@ -28,7 +28,7 @@ import opennlp.tools.util.featuregen.TokenFeatureGenerator; import opennlp.tools.util.featuregen.WindowFeatureGenerator; /** - * If a token contains an auxiliary information, e.g. POS tag, in the training data, + * If a token contains auxiliary information, e.g. POS tag, in the training data, * you can use this class via -factory command line option. * * <strong>EXPERIMENTAL</strong>. diff --git a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoUtil.java b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoUtil.java index 7bb336c..a9eb7f8 100644 --- a/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoUtil.java +++ b/japanese-addon/src/main/java/opennlp/tools/namefind/AuxiliaryInfoUtil.java @@ -18,7 +18,7 @@ package opennlp.tools.namefind; /** - * If a token contains an auxiliary information, e.g. POS tag, this class can be used + * If a token contains auxiliary information, e.g. POS tag, this class can be used * to extract word part or auxiliary information part.<br> * * ex) token := word '/' POStag diff --git a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGenerator.java b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGenerator.java index ee82c0c..aa9178d 100644 --- a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGenerator.java +++ b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGenerator.java @@ -23,7 +23,7 @@ import java.util.List; import opennlp.tools.namefind.AuxiliaryInfoUtil; /** - * If a token contains an auxiliary information, e.g. POS tag, in the training data, + * If a token contains auxiliary information, e.g. POS tag, in the training data, * you can use this feature generator in order to let the feature generator choose * word part or auxiliary information part.<br> * diff --git a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/BrownTokenClassFeatureGenerator.java b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/BrownTokenClassFeatureGenerator.java index 2d12171..edae61f 100644 --- a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/BrownTokenClassFeatureGenerator.java +++ b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/BrownTokenClassFeatureGenerator.java @@ -28,7 +28,7 @@ import opennlp.tools.util.featuregen.BrownTokenClasses; */ public class BrownTokenClassFeatureGenerator implements AdaptiveFeatureGenerator { - private BrownCluster brownLexicon; + private final BrownCluster brownLexicon; public BrownTokenClassFeatureGenerator(BrownCluster dict) { this.brownLexicon = dict; diff --git a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/FeatureGeneratorUtil.java b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/FeatureGeneratorUtil.java index a6c603a..03b5659 100644 --- a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/FeatureGeneratorUtil.java +++ b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/FeatureGeneratorUtil.java @@ -42,7 +42,7 @@ public class FeatureGeneratorUtil { Objects.requireNonNull(token, "token must be not null!"); - if (token.length() == 0) return "other"; + if (token.isEmpty()) return "other"; // scan token only once char c = token.charAt(0); diff --git a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGenerator.java b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGenerator.java index 14cff33..be4cadf 100644 --- a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGenerator.java +++ b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGenerator.java @@ -31,7 +31,7 @@ public class TokenClassFeatureGenerator implements AdaptiveFeatureGenerator { private static final String TOKEN_CLASS_PREFIX = "wc"; private static final String TOKEN_AND_CLASS_PREFIX = "w&c"; - private boolean generateWordAndClassFeature; + private final boolean generateWordAndClassFeature; public TokenClassFeatureGenerator() { this(false); diff --git a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGenerator.java b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGenerator.java index 7771ed7..cf1fe7a 100644 --- a/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGenerator.java +++ b/japanese-addon/src/main/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGenerator.java @@ -32,8 +32,8 @@ import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator; */ public class TokenPatternFeatureGenerator implements AdaptiveFeatureGenerator { - private Pattern noLetters = Pattern.compile("[^a-zA-Z]"); - private Tokenizer tokenizer; + private final Pattern noLetters = Pattern.compile("[^a-zA-Z]"); + private final Tokenizer tokenizer; /** * Initializes a new instance. diff --git a/japanese-addon/src/test/java/opennlp/tools/namefind/AuxiliaryInfoUtilTest.java b/japanese-addon/src/test/java/opennlp/tools/namefind/AuxiliaryInfoUtilTest.java index 4c0bcc1..db9bad2 100644 --- a/japanese-addon/src/test/java/opennlp/tools/namefind/AuxiliaryInfoUtilTest.java +++ b/japanese-addon/src/test/java/opennlp/tools/namefind/AuxiliaryInfoUtilTest.java @@ -25,20 +25,20 @@ import static org.junit.jupiter.api.Assertions.assertEquals; public class AuxiliaryInfoUtilTest { @Test - public void testGetSeparatorIndex() throws Exception { + public void testGetSeparatorIndex() { assertEquals(0, AuxiliaryInfoUtil.getSeparatorIndex("/POStag")); assertEquals(1, AuxiliaryInfoUtil.getSeparatorIndex("1/POStag")); assertEquals(10, AuxiliaryInfoUtil.getSeparatorIndex("word/stuff/POStag")); } @Test - public void testGetSeparatorIndexNoPos() throws Exception { + public void testGetSeparatorIndexNoPos() { Assertions.assertThrows(RuntimeException.class, () -> AuxiliaryInfoUtil.getSeparatorIndex("NOPOStags")); } @Test - public void testGetWordPart() throws Exception { + public void testGetWordPart() { assertEquals(" ", AuxiliaryInfoUtil.getWordPart("/POStag")); assertEquals("1", AuxiliaryInfoUtil.getWordPart("1/POStag")); assertEquals("word", AuxiliaryInfoUtil.getWordPart("word/POStag")); @@ -46,7 +46,7 @@ public class AuxiliaryInfoUtilTest { } @Test - public void testGetWordParts() throws Exception { + public void testGetWordParts() { String[] results = AuxiliaryInfoUtil.getWordParts(new String[]{"1/A", "234/B", "3456/C", "/D"}); assertEquals(4, results.length); assertEquals("1", results[0]); @@ -56,7 +56,7 @@ public class AuxiliaryInfoUtilTest { } @Test - public void testGetAuxPart() throws Exception { + public void testGetAuxPart() { assertEquals("POStag", AuxiliaryInfoUtil.getAuxPart("/POStag")); assertEquals("POStag", AuxiliaryInfoUtil.getAuxPart("1/POStag")); assertEquals("POStag", AuxiliaryInfoUtil.getAuxPart("word/POStag")); @@ -64,7 +64,7 @@ public class AuxiliaryInfoUtilTest { } @Test - public void testGetAuxParts() throws Exception { + public void testGetAuxParts() { String[] results = AuxiliaryInfoUtil.getAuxParts(new String[] {"1/ABC", "234/B", "3456/CD", "/DEFGH"}); assertEquals(4, results.length); assertEquals("ABC", results[0]); diff --git a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGeneratorTest.java b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGeneratorTest.java index 8c8f44d..40f34e6 100644 --- a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGeneratorTest.java +++ b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/AuxiliaryInfoAwareDelegateFeatureGeneratorTest.java @@ -32,12 +32,12 @@ public class AuxiliaryInfoAwareDelegateFeatureGeneratorTest { private List<String> features; @BeforeEach - public void setUp() throws Exception { + public void setUp() { features = new ArrayList<>(); } @Test - public void testWord() throws Exception { + public void testWord() { AdaptiveFeatureGenerator featureGenerator = new AuxiliaryInfoAwareDelegateFeatureGenerator( new IdentityFeatureGenerator(), false); @@ -47,7 +47,7 @@ public class AuxiliaryInfoAwareDelegateFeatureGeneratorTest { } @Test - public void testAuxInfo() throws Exception { + public void testAuxInfo() { AdaptiveFeatureGenerator featureGenerator = new AuxiliaryInfoAwareDelegateFeatureGenerator( new IdentityFeatureGenerator(), true); diff --git a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/BigramNameFeatureGeneratorTest.java b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/BigramNameFeatureGeneratorTest.java index 46d952e..12407a5 100644 --- a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/BigramNameFeatureGeneratorTest.java +++ b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/BigramNameFeatureGeneratorTest.java @@ -30,10 +30,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals; public class BigramNameFeatureGeneratorTest { private List<String> features; - static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"}; + private static final String[] TEST_SENTENCE = new String[] {"This", "is", "an", "example", "sentence"}; @BeforeEach - public void setUp() throws Exception { + public void setUp() { features = new ArrayList<>(); } @@ -44,7 +44,7 @@ public class BigramNameFeatureGeneratorTest { AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator(); - generator.createFeatures(features, testSentence, testTokenIndex, null); + generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null); assertEquals(2, features.size()); assertEquals("w,nw=This,is", features.get(0)); @@ -58,7 +58,7 @@ public class BigramNameFeatureGeneratorTest { AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator(); - generator.createFeatures(features, testSentence, testTokenIndex, null); + generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null); assertEquals(4, features.size()); assertEquals("pw,w=is,an", features.get(0)); @@ -74,7 +74,7 @@ public class BigramNameFeatureGeneratorTest { AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator(); - generator.createFeatures(features, testSentence, testTokenIndex, null); + generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null); assertEquals(2, features.size()); assertEquals("pw,w=example,sentence", features.get(0)); diff --git a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGeneratorTest.java b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGeneratorTest.java index dc6962d..96cd746 100644 --- a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGeneratorTest.java +++ b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenClassFeatureGeneratorTest.java @@ -30,10 +30,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals; public class TokenClassFeatureGeneratorTest { private List<String> features; - static String[] testSentence = new String[] {"This", "is", "an", "Example", "sentence"}; + private static final String[] TEST_SENTENCE = new String[] {"This", "is", "an", "Example", "sentence"}; @BeforeEach - public void setUp() throws Exception { + public void setUp() { features = new ArrayList<>(); } @@ -44,7 +44,7 @@ public class TokenClassFeatureGeneratorTest { AdaptiveFeatureGenerator generator = new TokenClassFeatureGenerator(true); - generator.createFeatures(features, testSentence, testTokenIndex, null); + generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null); assertEquals(2, features.size()); assertEquals("wc=alpha", features.get(0)); @@ -58,7 +58,7 @@ public class TokenClassFeatureGeneratorTest { AdaptiveFeatureGenerator generator = new TokenClassFeatureGenerator(false); - generator.createFeatures(features, testSentence, testTokenIndex, null); + generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null); assertEquals(1, features.size()); assertEquals("wc=alpha", features.get(0)); diff --git a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGeneratorTest.java b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGeneratorTest.java index 24509ef..3b84faf 100644 --- a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGeneratorTest.java +++ b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TokenPatternFeatureGeneratorTest.java @@ -32,7 +32,7 @@ public class TokenPatternFeatureGeneratorTest { private List<String> features; @BeforeEach - public void setUp() throws Exception { + public void setUp() { features = new ArrayList<>(); } diff --git a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TrigramNameFeatureGeneratorTest.java b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TrigramNameFeatureGeneratorTest.java index 789c508..02bafb1 100644 --- a/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TrigramNameFeatureGeneratorTest.java +++ b/japanese-addon/src/test/java/opennlp/tools/util/featuregen/lang/jpn/TrigramNameFeatureGeneratorTest.java @@ -30,10 +30,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals; public class TrigramNameFeatureGeneratorTest { private List<String> features; - static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"}; + private static final String[] TEST_SENTENCE = new String[] {"This", "is", "an", "example", "sentence"}; @BeforeEach - public void setUp() throws Exception { + public void setUp() { features = new ArrayList<>(); } @@ -44,7 +44,7 @@ public class TrigramNameFeatureGeneratorTest { AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator(); - generator.createFeatures(features, testSentence, testTokenIndex, null); + generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null); assertEquals(2, features.size()); assertEquals("w,nw,nnw=This,is,an", features.get(0)); @@ -58,7 +58,7 @@ public class TrigramNameFeatureGeneratorTest { AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator(); - generator.createFeatures(features, testSentence, testTokenIndex, null); + generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null); assertEquals(2, features.size()); assertEquals("w,nw,nnw=is,an,example", features.get(0)); @@ -72,7 +72,7 @@ public class TrigramNameFeatureGeneratorTest { AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator(); - generator.createFeatures(features, testSentence, testTokenIndex, null); + generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null); assertEquals(4, features.size()); assertEquals("ppw,pw,w=This,is,an", features.get(0)); @@ -88,7 +88,7 @@ public class TrigramNameFeatureGeneratorTest { AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator(); - generator.createFeatures(features, testSentence, testTokenIndex, null); + generator.createFeatures(features, TEST_SENTENCE, testTokenIndex, null); assertEquals(2, features.size()); assertEquals("ppw,pw,w=an,example,sentence", features.get(0)); diff --git a/jwnl-addon/src/test/java/opennlp/jwnl/lemmatizer/JWNLLemmatizerTest.java b/jwnl-addon/src/test/java/opennlp/jwnl/lemmatizer/JWNLLemmatizerTest.java index e417830..88d8b1b 100644 --- a/jwnl-addon/src/test/java/opennlp/jwnl/lemmatizer/JWNLLemmatizerTest.java +++ b/jwnl-addon/src/test/java/opennlp/jwnl/lemmatizer/JWNLLemmatizerTest.java @@ -60,9 +60,8 @@ public class JWNLLemmatizerTest { @Test public void testLemmatizeList() { - assertThrows(UnsupportedOperationException.class, () -> { - lemmatizer.lemmatize(List.of("mouse"), List.of("NN")); - }); + assertThrows(UnsupportedOperationException.class, () -> + lemmatizer.lemmatize(List.of("mouse"), List.of("NN"))); } private static Stream<Arguments> provideData() { diff --git a/liblinear-addon/src/main/java/LiblinearModelSerializer.java b/liblinear-addon/src/main/java/LiblinearModelSerializer.java index 19da2bb..370e225 100644 --- a/liblinear-addon/src/main/java/LiblinearModelSerializer.java +++ b/liblinear-addon/src/main/java/LiblinearModelSerializer.java @@ -27,11 +27,13 @@ import opennlp.tools.util.model.ArtifactSerializer; public class LiblinearModelSerializer implements ArtifactSerializer<LiblinearModel> { + @Override public LiblinearModel create(InputStream in) throws IOException, InvalidFormatException { return new LiblinearModel(in); } + @Override public void serialize(LiblinearModel model, OutputStream out) throws IOException { model.serialize(out); diff --git a/liblinear-addon/src/main/java/LiblinearTrainer.java b/liblinear-addon/src/main/java/LiblinearTrainer.java index abfac72..a494a09 100644 --- a/liblinear-addon/src/main/java/LiblinearTrainer.java +++ b/liblinear-addon/src/main/java/LiblinearTrainer.java @@ -103,7 +103,7 @@ public class LiblinearTrainer extends AbstractEventTrainer { List<Feature[]> vx = new ArrayList<>(); // outcomes - int outcomes[] = indexer.getOutcomeList(); + int[] outcomes = indexer.getOutcomeList(); int max_index = 0; diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java index 9354c50..60104a1 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java @@ -28,7 +28,7 @@ Supplies a list of known entities (a list of names or locations) public interface KnownEntityProvider extends ModelParameter<BaseModelBuilderParams> { /** - * returns a list of known non ambiguous entities. + * returns a list of known non-ambiguous entities. * @return a set of entities */ Set<String> getKnownEntities(); diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java index a66f36f..16c11e0 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java @@ -20,16 +20,13 @@ import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams; import java.util.Collection; /** - * -Validates results from the iterative namefinding + * Validates results from the iterative name finding. */ public interface ModelGenerationValidator extends ModelParameter<BaseModelBuilderParams> { Boolean validSentence(String sentence); Boolean validNamedEntity(String namedEntity); - - Collection<String> getBlackList(); } diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java index 7aa7d0c..1cb94b4 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java @@ -17,24 +17,22 @@ package opennlp.addons.modelbuilder.impls; import java.io.BufferedReader; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; + import opennlp.addons.modelbuilder.KnownEntityProvider; -/** - * - */ public class FileKnownEntityProvider implements KnownEntityProvider { - Set<String> knownEntities = new HashSet<>(); - BaseModelBuilderParams params; + private final Set<String> knownEntities = new HashSet<>(); + private BaseModelBuilderParams params; + @Override public Set<String> getKnownEntities() { if (knownEntities.isEmpty()) { @@ -44,7 +42,7 @@ public class FileKnownEntityProvider implements KnownEntityProvider { String line; fis = new FileInputStream(params.getKnownEntitiesFile()); - br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8"))); + br = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)); while ((line = br.readLine()) != null) { knownEntities.add(line); } diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java index a953c76..8b31d56 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java @@ -34,7 +34,7 @@ import opennlp.addons.modelbuilder.ModelGenerationValidator; public class FileModelValidatorImpl implements ModelGenerationValidator { private final Set<String> badentities = new HashSet<>(); - BaseModelBuilderParams params; + private BaseModelBuilderParams params; @Override public void setParameters(BaseModelBuilderParams params) { @@ -58,11 +58,7 @@ public class FileModelValidatorImpl implements ModelGenerationValidator { // if (p.matcher(namedEntity).find()) { // return false; // } - boolean b = true; - if (badentities.contains(namedEntity.toLowerCase())) { - b = false; - } - return b; + return !badentities.contains(namedEntity.toLowerCase()); } @Override diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java index 126157f..358cef1 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java @@ -43,8 +43,8 @@ public class GenericModelGenerator implements SemiSupervisedModelGenerator { ModelGenerationValidator validator, Modelable modelable, int iterations) { for (int iteration = 0; iteration < iterations; iteration++) { System.out.println("ITERATION: " + iteration); - System.out.println("\tPerfoming Known Entity Annotation"); - System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size()); + System.out.println("\tPerforming Known Entity Annotation"); + System.out.println("\t\tknown size: " + knownEntityProvider.getKnownEntities().size()); System.out.println("\t\treading data....: "); for (String sentence : sentenceProvider.getSentences()) { for (String knownEntity : knownEntityProvider.getKnownEntities()) { @@ -95,7 +95,7 @@ public class GenericModelGenerator implements SemiSupervisedModelGenerator { } } System.out.println("\t\tannotated sentences: " + modelable.getAnnotatedSentences().size()); - System.out.println("\t\tknowns: " + knownEntityProvider.getKnownEntities().size()); + System.out.println("\t\tknown size: " + knownEntityProvider.getKnownEntities().size()); } modelable.writeAnnotatedSentences(); modelable.buildModel(knownEntityProvider.getKnownEntitiesType()); diff --git a/morfologik-addon/src/main/assembly/bin.xml b/morfologik-addon/src/main/assembly/bin.xml index ab4f6da..856685a 100644 --- a/morfologik-addon/src/main/assembly/bin.xml +++ b/morfologik-addon/src/main/assembly/bin.xml @@ -45,14 +45,14 @@ <fileSets> <fileSet> <directory>src/main/readme</directory> - <outputDirectory></outputDirectory> + <outputDirectory/> <fileMode>644</fileMode> <directoryMode>755</directoryMode> </fileSet> <fileSet> <directory>.</directory> - <outputDirectory></outputDirectory> + <outputDirectory/> <filtered>true</filtered> <fileMode>644</fileMode> <directoryMode>755</directoryMode> @@ -64,7 +64,7 @@ <fileSet> <directory>target</directory> - <outputDirectory></outputDirectory> + <outputDirectory/> <fileMode>644</fileMode> <directoryMode>755</directoryMode> <includes> diff --git a/morfologik-addon/src/main/assembly/src.xml b/morfologik-addon/src/main/assembly/src.xml index cdcc9d3..f67f953 100644 --- a/morfologik-addon/src/main/assembly/src.xml +++ b/morfologik-addon/src/main/assembly/src.xml @@ -27,7 +27,7 @@ <fileSets> <fileSet> <directory>../</directory> - <outputDirectory></outputDirectory> + <outputDirectory/> <excludes> <exclude>**/target/**</exclude> <exclude>**/.*/**</exclude> diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java b/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionaryBuilder.java similarity index 95% rename from morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java rename to morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionaryBuilder.java index dbbca4d..f827d03 100644 --- a/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java +++ b/morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionaryBuilder.java @@ -17,8 +17,6 @@ package opennlp.morfologik.builder; -import java.io.FileNotFoundException; -import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.Path; import java.util.Properties; @@ -32,7 +30,7 @@ import morfologik.tools.DictCompile; * file. The first column is the word, the second its lemma and the third a POS * tag. If there is no lemma information leave the second column empty. */ -public class MorfologikDictionayBuilder { +public class MorfologikDictionaryBuilder { /** * Helper to compile a morphological dictionary automaton. @@ -90,7 +88,7 @@ public class MorfologikDictionayBuilder { } Properties createProperties(Charset encoding, String separator, - EncoderType encoderType) throws FileNotFoundException, IOException { + EncoderType encoderType) { Properties properties = new Properties(); properties.setProperty("fsa.dict.separator", separator); diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java index 5205739..5373ecd 100644 --- a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java +++ b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java @@ -103,7 +103,7 @@ public final class CLI { System.exit(0); } - String toolArguments[] = new String[args.length -1]; + String[] toolArguments = new String[args.length -1]; System.arraycopy(args, 1, toolArguments, 0, toolArguments.length); String toolName = args[0]; diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java index eb9b51c..923255f 100644 --- a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java +++ b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderTool.java @@ -21,7 +21,7 @@ import java.io.File; import java.nio.file.Path; import morfologik.stemming.DictionaryMetadata; -import opennlp.morfologik.builder.MorfologikDictionayBuilder; +import opennlp.morfologik.builder.MorfologikDictionaryBuilder; import opennlp.tools.cmdline.BasicCmdLineTool; import opennlp.tools.cmdline.CmdLineUtil; import opennlp.tools.cmdline.TerminateToolException; @@ -48,14 +48,14 @@ public class MorfologikDictionaryBuilderTool extends BasicCmdLineTool { Path metadataPath = DictionaryMetadata.getExpectedMetadataLocation(dictInFile.toPath()); CmdLineUtil.checkInputFile("dictionary metadata (.info) input file", metadataPath.toFile()); - MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder(); + MorfologikDictionaryBuilder builder = new MorfologikDictionaryBuilder(); try { builder.build(dictInFile.toPath(), params.getOverwrite(), params.getValidate(), params.getAcceptBOM(), params.getAcceptCR(), params.getIgnoreEmpty()); } catch (Exception e) { throw new TerminateToolException(-1, - "Error while creating Morfologik POS Dictionay: " + e.getMessage(), e); + "Error while creating Morfologik POS Dictionary: " + e.getMessage(), e); } } diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java index 4ee8cd4..eb001dc 100644 --- a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java +++ b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java @@ -34,7 +34,7 @@ interface XMLDictionaryToTableParams extends EncodingParameter { @ParameterDescription(valueName = "out", description = "Output for Morfologik (.info will be also created).") File getOutputFile(); - @ParameterDescription(valueName = "char", description = "Columm separator (must be a single character)") + @ParameterDescription(valueName = "char", description = "Column separator (must be a single character)") @OptionalParameter(defaultValue=",") String getSeparator(); diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java index edc41c7..c53a4b8 100644 --- a/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java +++ b/morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java @@ -64,7 +64,7 @@ public class XMLDictionaryToTableTool extends BasicCmdLineTool { tagDictionary = POSDictionary.create(new FileInputStream(dictInFile)); } catch (IOException e) { throw new TerminateToolException(-1, - "Error while loading XML POS Dictionay: " + e.getMessage(), e); + "Error while loading XML POS Dictionary: " + e.getMessage(), e); } Iterator<String> iterator = tagDictionary.iterator(); diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java b/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java index 650f7a6..421a82f 100644 --- a/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java +++ b/morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java @@ -34,7 +34,7 @@ import opennlp.tools.lemmatizer.Lemmatizer; public class MorfologikLemmatizer implements Lemmatizer { - private IStemmer dictLookup; + private final IStemmer dictLookup; public final Set<String> constantTags = new HashSet<>(Arrays.asList("NNP", "NP00000")); public MorfologikLemmatizer(Path dictionaryPath) throws IllegalArgumentException, IOException { diff --git a/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java b/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java index 5b6bf13..c5219c1 100644 --- a/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java +++ b/morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikTagDictionary.java @@ -55,7 +55,7 @@ public class MorfologikTagDictionary implements TagDictionary { * @param dict * a Morfologik FSA dictionary * @param caseSensitive - * if true it performs case sensitive lookup + * if true it performs case-sensitive lookup * @throws IllegalArgumentException * if FSA's root node cannot be acquired (dictionary is empty). * @throws IOException @@ -73,12 +73,12 @@ public class MorfologikTagDictionary implements TagDictionary { } List<WordData> data = dictLookup.lookup(word); - if (data != null && data.size() > 0) { + if (data != null && !data.isEmpty()) { List<String> tags = new ArrayList<>(data.size()); for (WordData datum : data) { tags.add(datum.getTag().toString()); } - if (tags.size() > 0) + if (!tags.isEmpty()) return tags.toArray(new String[0]); return null; } diff --git a/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java b/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionaryBuilderTest.java similarity index 85% rename from morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java rename to morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionaryBuilderTest.java index ae2bb46..834447c 100644 --- a/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java +++ b/morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionaryBuilderTest.java @@ -29,7 +29,7 @@ import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertNotNull; -public class POSDictionayBuilderTest { +public class POSDictionaryBuilderTest { @Test public void testBuildDictionary() throws Exception { @@ -42,15 +42,15 @@ public class POSDictionayBuilderTest { public static Path createMorfologikDictionary() throws Exception { Path tabFilePath = File.createTempFile( - POSDictionayBuilderTest.class.getName(), ".txt").toPath(); + POSDictionaryBuilderTest.class.getName(), ".txt").toPath(); Path infoFilePath = DictionaryMetadata.getExpectedMetadataLocation(tabFilePath); - Files.copy(POSDictionayBuilderTest.class.getResourceAsStream( + Files.copy(POSDictionaryBuilderTest.class.getResourceAsStream( "/dictionaryWithLemma.txt"), tabFilePath, StandardCopyOption.REPLACE_EXISTING); - Files.copy(POSDictionayBuilderTest.class.getResourceAsStream( + Files.copy(POSDictionaryBuilderTest.class.getResourceAsStream( "/dictionaryWithLemma.info"), infoFilePath, StandardCopyOption.REPLACE_EXISTING); - MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder(); + MorfologikDictionaryBuilder builder = new MorfologikDictionaryBuilder(); return builder.build(tabFilePath); } diff --git a/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java b/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java index f1212cc..10f61ca 100644 --- a/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java +++ b/morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java @@ -19,7 +19,7 @@ package opennlp.morfologik.lemmatizer; import java.nio.file.Path; -import opennlp.morfologik.builder.POSDictionayBuilderTest; +import opennlp.morfologik.builder.POSDictionaryBuilderTest; import org.junit.jupiter.api.Test; @@ -38,7 +38,7 @@ public class MorfologikLemmatizerTest { } private MorfologikLemmatizer createDictionary(boolean caseSensitive) throws Exception { - Path output = POSDictionayBuilderTest.createMorfologikDictionary(); + Path output = POSDictionaryBuilderTest.createMorfologikDictionary(); return new MorfologikLemmatizer(output); } diff --git a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java b/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java index d6bc2fe..7e63e27 100644 --- a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java +++ b/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/MorfologikTagDictionaryTest.java @@ -21,7 +21,7 @@ import java.util.Arrays; import java.util.List; import morfologik.stemming.Dictionary; -import opennlp.morfologik.builder.POSDictionayBuilderTest; +import opennlp.morfologik.builder.POSDictionaryBuilderTest; import opennlp.tools.postag.TagDictionary; import org.junit.jupiter.api.Test; @@ -49,9 +49,9 @@ public class MorfologikTagDictionaryTest { assertTrue(tags.contains("NOUN")); assertTrue(tags.contains("V")); - // this is the behavior of case insensitive dictionary - // if we search it using case insensitive, Casa as a proper noun - // should be lower case in the dictionary + // this is the behavior of case-insensitive dictionary + // if we search it using case-insensitive, Casa as a proper noun + // should be lower-cased in the dictionary tags = Arrays.asList(dict.getTags("Casa")); assertEquals(2, tags.size()); assertTrue(tags.contains("NOUN")); @@ -68,9 +68,9 @@ public class MorfologikTagDictionaryTest { assertTrue(tags.contains("NOUN")); assertTrue(tags.contains("V")); - // this is the behavior of case insensitive dictionary - // if we search it using case insensitive, Casa as a proper noun - // should be lower case in the dictionary + // this is the behavior of case-insensitive dictionary + // if we search it using case-insensitive, Casa as a proper noun + // should be lower-cased in the dictionary tags = Arrays.asList(dict.getTags("Casa")); assertEquals(1, tags.size()); assertTrue(tags.contains("PROP")); @@ -84,7 +84,7 @@ public class MorfologikTagDictionaryTest { private MorfologikTagDictionary createDictionary(boolean caseSensitive, List<String> constant) throws Exception { - Dictionary dic = Dictionary.read(POSDictionayBuilderTest.createMorfologikDictionary()); + Dictionary dic = Dictionary.read(POSDictionaryBuilderTest.createMorfologikDictionary()); return new MorfologikTagDictionary(dic, caseSensitive); } diff --git a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java b/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java index 602ffc6..871977d 100644 --- a/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java +++ b/morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java @@ -24,7 +24,7 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Path; -import opennlp.morfologik.builder.POSDictionayBuilderTest; +import opennlp.morfologik.builder.POSDictionaryBuilderTest; import opennlp.tools.postag.POSModel; import opennlp.tools.postag.POSSample; import opennlp.tools.postag.POSTaggerFactory; @@ -39,6 +39,7 @@ import opennlp.tools.util.model.ModelType; import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -50,7 +51,7 @@ public class POSTaggerFactoryTest { @Test public void testPOSTaggerWithCustomFactory() throws Exception { - Path dictionary = POSDictionayBuilderTest.createMorfologikDictionary(); + Path dictionary = POSDictionaryBuilderTest.createMorfologikDictionary(); POSTaggerFactory inFactory = new MorfologikPOSTaggerFactory(); TagDictionary inDict = inFactory.createTagDictionary(dictionary.toFile()); inFactory.setTagDictionary(inDict); @@ -58,7 +59,7 @@ public class POSTaggerFactoryTest { POSModel posModel = trainPOSModel(ModelType.MAXENT, inFactory); POSTaggerFactory factory = posModel.getFactory(); - assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary); + assertInstanceOf(MorfologikTagDictionary.class, factory.getTagDictionary()); try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { @@ -66,7 +67,7 @@ public class POSTaggerFactoryTest { POSModel fromSerialized = new POSModel(new ByteArrayInputStream(out.toByteArray())); factory = fromSerialized.getFactory(); - assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary); + assertInstanceOf(MorfologikTagDictionary.class, factory.getTagDictionary()); assertEquals(2, factory.getTagDictionary().getTags("casa").length); }
