This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/master by this push:
new 03aa30c migrate sandbox component 'nlp-utils' to Java 11 (#75)
03aa30c is described below
commit 03aa30c1ca5b8edbb863078c085cccb78cb01802
Author: Martin Wiesner <[email protected]>
AuthorDate: Sat Jan 28 18:56:30 2023 +0100
migrate sandbox component 'nlp-utils' to Java 11 (#75)
---
nlp-utils/pom.xml | 25 +-
.../anomalydetection/AnomalyDetectionUtils.java | 7 +-
.../org/apache/opennlp/utils/cfg/CFGBuilder.java | 2 +-
.../org/apache/opennlp/utils/cfg/CFGRunner.java | 265 ++++++++++-----------
.../opennlp/utils/cfg/ContextFreeGrammar.java | 8 +-
.../utils/cfg/ProbabilisticContextFreeGrammar.java | 22 +-
.../classification/SimpleNaiveBayesClassifier.java | 27 ++-
.../UpdatableSimpleNaiveBayesClassifier.java | 24 +-
.../opennlp/utils/languagemodel/LanguageModel.java | 12 +-
.../languagemodel/NaiveSentenceLanguageModel.java | 4 +-
.../opennlp/utils/languagemodel/NoisyChannel.java | 4 +-
.../TrigramSentenceLanguageModel.java | 2 +-
.../org/apache/opennlp/utils/ngram/NGramUtils.java | 6 +-
.../utils/regression/GradientDescentUtils.java | 2 +-
.../opennlp/utils/regression/Hypothesis.java | 2 +-
.../regression/LinearCombinationHypothesis.java | 2 +-
.../utils/regression/RegressionModelUtils.java | 32 +--
.../java/org/apache/opennlp/utils/TestUtils.java | 8 +-
.../AnomalyDetectionUtilsTest.java | 5 +-
.../SimpleNaiveBayesClassifierTest.java | 6 +-
.../NaiveSentenceLanguageModelTest.java | 16 +-
.../TrigramSentenceLanguageModelTest.java | 16 +-
.../apache/opennlp/utils/ngram/NGramUtilsTest.java | 12 +-
.../utils/regression/GradientDescentUtilsTest.java | 2 +-
.../utils/regression/RegressionModelUtilsTest.java | 13 +-
25 files changed, 268 insertions(+), 256 deletions(-)
diff --git a/nlp-utils/pom.xml b/nlp-utils/pom.xml
index 70d0df9..5a006ab 100644
--- a/nlp-utils/pom.xml
+++ b/nlp-utils/pom.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
@@ -19,12 +20,22 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
-
+ <parent>
+ <groupId>org.apache</groupId>
+ <artifactId>apache</artifactId>
+ <!-- TODO OPENNLP-1452 once this is resolved, move to 29 as well. -->
+ <version>18</version>
+ <relativePath />
+ </parent>
+
<groupId>org.apache.opennlp</groupId>
<artifactId>nlp-utils</artifactId>
- <version>0.1-SNAPSHOT</version>
+ <version>2.1.1-SNAPSHOT</version>
+ <name>Apache OpenNLP Utils</name>
<properties>
+ <maven.compiler.source>11</maven.compiler.source>
+ <maven.compiler.target>11</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
@@ -32,20 +43,20 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>4.11</version>
+ <version>4.13.2</version>
<scope>test</scope>
</dependency>
</dependencies>
+
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
- <version>2.0.2</version>
<configuration>
- <source>1.7</source>
- <target>1.7</target>
- <encoding>UTF-8</encoding>
+ <source>${maven.compiler.source}</source>
+ <target>${maven.compiler.target}</target>
+ <compilerArgument>-Xlint</compilerArgument>
</configuration>
</plugin>
</plugins>
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java
index 0d7d4a8..009441f 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java
@@ -19,6 +19,8 @@
package org.apache.opennlp.utils.anomalydetection;
import java.math.BigDecimal;
+import java.math.RoundingMode;
+
import org.apache.opennlp.utils.TrainingExample;
import org.apache.opennlp.utils.TrainingSet;
@@ -84,9 +86,8 @@ public class AnomalyDetectionUtils {
* @param x the input
* @param set the training set
* @return the probability of the given input
- * @throws Exception
*/
- public static double getGaussianProbability(TrainingExample x, TrainingSet
set) throws Exception {
+ public static double getGaussianProbability(TrainingExample x, TrainingSet
set) {
double[] mus = fitMus(set);
double[] sigmas = fitSigmas(mus, set);
return calculateGaussianProbability(x, mus, sigmas);
@@ -97,7 +98,7 @@ public class AnomalyDetectionUtils {
assert mus.length == sigmas.length : "parameters not aligned";
BigDecimal px = new BigDecimal(1d);
for (int i = 0; i < mus.length; i++) {
- BigDecimal firstTerm =
BigDecimal.ONE.divide(BigDecimal.valueOf(Math.sqrt(2d * Math.PI * sigmas[i])),
BigDecimal.ROUND_CEILING);
+ BigDecimal firstTerm =
BigDecimal.ONE.divide(BigDecimal.valueOf(Math.sqrt(2d * Math.PI * sigmas[i])),
RoundingMode.CEILING);
BigDecimal secondTerm = BigDecimal.valueOf(Math.exp(-1 *
(Math.pow(x.getInputs()[i] - mus[i], 2) / (2 * Math.pow(sigmas[i], 2)))));
px = px.multiply(firstTerm.multiply(secondTerm));
}
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
index 7cca8ee..806433f 100644
--- a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
+++ b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
@@ -62,7 +62,7 @@ public class CFGBuilder {
public ContextFreeGrammar build() {
assert nonTerminalSymbols != null && terminalSymbols != null && rules !=
null && startSymbol != null :
- "missing definitions { V : " + nonTerminalSymbols + ", ∑ : " +
terminalSymbols + ", R : " + rules + ", S : " + startSymbol + "}";
+ "missing definitions {V : " + nonTerminalSymbols + ", ∑ : " +
terminalSymbols + ", R : " + rules + ", S : " + startSymbol + "}";
return new ContextFreeGrammar(nonTerminalSymbols, terminalSymbols, rules,
startSymbol, randomExpansion);
}
}
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
index e3bb59b..07d93e1 100644
--- a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
+++ b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGRunner.java
@@ -34,147 +34,146 @@ import java.util.Map;
*/
public class CFGRunner {
- public static void main(String[] args) throws Exception {
- CFGBuilder builder = new CFGBuilder();
-
- Arrays.sort(args);
- boolean useWn = Arrays.binarySearch(args, "-wn") >= 0;
-
- Collection<String> adverbsCollection;
- Collection<String> verbsCollection;
- Collection<String> adjectivesCollection;
- Collection<String> nounsCollection;
- if (useWn) {
- adverbsCollection = getTokens("/opennlp/cfg/wn/adv.txt");
- adjectivesCollection = getTokens("/opennlp/cfg/wn/adj.txt");
- nounsCollection = getTokens("/opennlp/cfg/wn/noun.txt");
- verbsCollection = getTokens("/opennlp/cfg/wn/verb.txt");
- } else {
- adverbsCollection = getTokens("/opennlp/cfg/an/adv.txt");
- adjectivesCollection = getTokens("/opennlp/cfg/an/adj.txt");
- nounsCollection = getTokens("/opennlp/cfg/an/noun.txt");
- verbsCollection = getTokens("/opennlp/cfg/an/verb.txt");
- }
+ public static void main(String[] args) throws Exception {
+ CFGBuilder builder = new CFGBuilder();
+
+ Arrays.sort(args);
+ boolean useWn = Arrays.binarySearch(args, "-wn") >= 0;
+
+ Collection<String> adverbsCollection;
+ Collection<String> verbsCollection;
+ Collection<String> adjectivesCollection;
+ Collection<String> nounsCollection;
+ if (useWn) {
+ adverbsCollection = getTokens("/opennlp/cfg/wn/adv.txt");
+ adjectivesCollection = getTokens("/opennlp/cfg/wn/adj.txt");
+ nounsCollection = getTokens("/opennlp/cfg/wn/noun.txt");
+ verbsCollection = getTokens("/opennlp/cfg/wn/verb.txt");
+ } else {
+ adverbsCollection = getTokens("/opennlp/cfg/an/adv.txt");
+ adjectivesCollection = getTokens("/opennlp/cfg/an/adj.txt");
+ nounsCollection = getTokens("/opennlp/cfg/an/noun.txt");
+ verbsCollection = getTokens("/opennlp/cfg/an/verb.txt");
+ }
+
+ Collection<String> terminals = new LinkedList<>();
+ terminals.addAll(adverbsCollection);
+ terminals.addAll(verbsCollection);
+ terminals.addAll(adjectivesCollection);
+ terminals.addAll(nounsCollection);
+
+ builder.withTerminals(terminals);
+
+ Collection<String> nonTerminals = new LinkedList<>();
+ String startSymbol = "START_SYMBOL";
+ nonTerminals.add(startSymbol);
+ nonTerminals.add("NP");
+ nonTerminals.add("NN");
+ nonTerminals.add("Adv");
+ nonTerminals.add("Adj");
+ nonTerminals.add("VP");
+ nonTerminals.add("Vb");
+ builder.withNonTerminals(nonTerminals);
+
+ builder.withStartSymbol(startSymbol);
+
+ Collection<Rule> rules = new LinkedList<Rule>();
+ rules.add(new Rule(startSymbol, "VP", "NP"));
+ rules.add(new Rule("VP", "Adv", "Vb"));
+ rules.add(new Rule("NP", "Adj", "NN"));
+
+ for (String v : verbsCollection) {
+ rules.add(new Rule("Vb", v));
+ }
+ for (String adj : adjectivesCollection) {
+ rules.add(new Rule("Adj", adj));
+ }
+ for (String n : nounsCollection) {
+ rules.add(new Rule("NN", n));
+ }
+ for (String adv : adverbsCollection) {
+ rules.add(new Rule("Adv", adv));
+ }
+ builder.withRules(rules);
+ ContextFreeGrammar cfg = builder.withRandomExpansion(true).build();
+ String[] sentence = cfg.leftMostDerivation(startSymbol);
+ String toString = Arrays.toString(sentence);
+
+ if (toString.length() > 0) {
+ System.out.println(toString.substring(1, toString.length() -
1).replaceAll(",", ""));
+ }
- Collection<String> terminals = new LinkedList<>();
- terminals.addAll(adverbsCollection);
- terminals.addAll(verbsCollection);
- terminals.addAll(adjectivesCollection);
- terminals.addAll(nounsCollection);
-
- builder.withTerminals(terminals);
-
- Collection<String> nonTerminals = new LinkedList<String>();
- String startSymbol = "START_SYMBOL";
- nonTerminals.add(startSymbol);
- nonTerminals.add("NP");
- nonTerminals.add("NN");
- nonTerminals.add("Adv");
- nonTerminals.add("Adj");
- nonTerminals.add("VP");
- nonTerminals.add("Vb");
- builder.withNonTerminals(nonTerminals);
-
- builder.withStartSymbol(startSymbol);
-
- Collection<Rule> rules = new LinkedList<Rule>();
- rules.add(new Rule(startSymbol, "VP", "NP"));
- rules.add(new Rule("VP", "Adv", "Vb"));
- rules.add(new Rule("NP", "Adj", "NN"));
-
- for (String v : verbsCollection) {
- rules.add(new Rule("Vb", v));
+ boolean pt = Arrays.binarySearch(args, "-pt") >= 0;
+
+ if (pt) {
+ Map<Rule, Double> rulesMap = new HashMap<>();
+ rulesMap.put(new Rule(startSymbol, "VP", "NP"), 1d);
+ rulesMap.put(new Rule("VP", "Adv", "Vb"), 1d);
+ rulesMap.put(new Rule("NP", "Adj", "NN"), 1d);
+
+ SecureRandom secureRandom = new SecureRandom();
+
+ double remainingP = 1d;
+ for (String v : verbsCollection) {
+ double p = (double) secureRandom.nextInt(1000) / 1001d;
+ if (rulesMap.size() == verbsCollection.size() - 1) {
+ p = remainingP;
}
- for (String adj : adjectivesCollection) {
- rules.add(new Rule("Adj", adj));
+ if (remainingP - p <= 0) {
+ p /= 10;
}
- for (String n : nounsCollection) {
- rules.add(new Rule("NN", n));
+ rulesMap.put(new Rule("Vb", v), p);
+ remainingP -= p;
+ }
+ for (String a : adjectivesCollection) {
+ double p = (double) secureRandom.nextInt(1000) / 1001d;
+ if (rulesMap.size() == adjectivesCollection.size() - 1) {
+ p = remainingP;
}
- for (String adv : adverbsCollection) {
- rules.add(new Rule("Adv", adv));
+ if (remainingP - p <= 0) {
+ p /= 10;
}
- builder.withRules(rules);
- ContextFreeGrammar cfg = builder.withRandomExpansion(true).build();
- String[] sentence = cfg.leftMostDerivation(startSymbol);
- String toString = Arrays.toString(sentence);
-
- if (toString.length() > 0) {
- System.out.println(toString.substring(1, toString.length() -
1).replaceAll(",", ""));
+ rulesMap.put(new Rule("Adj", a), p);
+ remainingP -= p;
+ }
+ for (String n : nounsCollection) {
+ double p = (double) secureRandom.nextInt(1000) / 1001d;
+ if (rulesMap.size() == nounsCollection.size() - 1) {
+ p = remainingP;
+ } else if (remainingP - p <= 0) {
+ p /= 10;
}
-
- boolean pt = Arrays.binarySearch(args, "-pt") >= 0;
-
- if (pt) {
- Map<Rule, Double> rulesMap = new HashMap<>();
- rulesMap.put(new Rule(startSymbol, "VP", "NP"), 1d);
- rulesMap.put(new Rule("VP", "Adv", "Vb"), 1d);
- rulesMap.put(new Rule("NP", "Adj", "NN"), 1d);
-
- SecureRandom secureRandom = new SecureRandom();
-
- double remainingP = 1d;
- for (String v : verbsCollection) {
- double p = (double) secureRandom.nextInt(1000) / 1001d;
- if (rulesMap.size() == verbsCollection.size() - 1) {
- p = remainingP;
- }
- if (remainingP - p <= 0) {
- p /= 10;
- }
- rulesMap.put(new Rule("Vb", v), p);
- remainingP -= p;
- }
- for (String a : adjectivesCollection) {
- double p = (double) secureRandom.nextInt(1000) / 1001d;
- if (rulesMap.size() == adjectivesCollection.size() - 1) {
- p = remainingP;
- }
- if (remainingP - p <= 0) {
- p /= 10;
- }
- rulesMap.put(new Rule("Adj", a), p);
- remainingP -= p;
- }
- for (String n : nounsCollection) {
- double p = (double) secureRandom.nextInt(1000) / 1001d;
- if (rulesMap.size() == nounsCollection.size() - 1) {
- p = remainingP;
- } else if (remainingP - p <= 0) {
- p /= 10;
- }
- rulesMap.put(new Rule("NN", n), p);
- remainingP -= p;
- }
- for (String a : adverbsCollection) {
- double p = (double) secureRandom.nextInt(1000) / 1001d;
- if (rulesMap.size() == adverbsCollection.size() - 1) {
- p = remainingP;
- }
- if (remainingP - p <= 0) {
- p /= 10;
- }
- rulesMap.put(new Rule("Adv", a), p);
- remainingP -= p;
- }
- ProbabilisticContextFreeGrammar pcfg = new
ProbabilisticContextFreeGrammar(cfg.getNonTerminalSymbols(),
cfg.getTerminalSymbols(),
- rulesMap, startSymbol, true);
- ProbabilisticContextFreeGrammar.ParseTree parseTree =
pcfg.cky(Arrays.asList(sentence));
- System.out.println(parseTree);
+ rulesMap.put(new Rule("NN", n), p);
+ remainingP -= p;
+ }
+ for (String a : adverbsCollection) {
+ double p = (double) secureRandom.nextInt(1000) / 1001d;
+ if (rulesMap.size() == adverbsCollection.size() - 1) {
+ p = remainingP;
}
- }
-
- private static Collection<String> getTokens(String s) throws IOException {
- Collection<String> tokens = new LinkedList<>();
- InputStream resourceStream = CFGRunner.class.getResourceAsStream(s);
- BufferedReader bufferedReader = new BufferedReader(new
InputStreamReader(resourceStream));
- String line;
- while ((line = bufferedReader.readLine()) != null) {
- tokens.add(line);
+ if (remainingP - p <= 0) {
+ p /= 10;
}
- bufferedReader.close();
- resourceStream.close();
- return tokens;
+ rulesMap.put(new Rule("Adv", a), p);
+ remainingP -= p;
+ }
+ ProbabilisticContextFreeGrammar pcfg = new
ProbabilisticContextFreeGrammar(cfg.getNonTerminalSymbols(),
cfg.getTerminalSymbols(),
+ rulesMap, startSymbol, true);
+ ProbabilisticContextFreeGrammar.ParseTree parseTree =
pcfg.cky(Arrays.asList(sentence));
+ System.out.println(parseTree);
+ }
+ }
+
+ private static Collection<String> getTokens(String s) throws IOException {
+ Collection<String> tokens = new LinkedList<>();
+ try (BufferedReader bufferedReader = new BufferedReader(
+ new InputStreamReader(CFGRunner.class.getResourceAsStream(s)))) {
+ String line;
+ while ((line = bufferedReader.readLine()) != null) {
+ tokens.add(line);
+ }
}
+ return tokens;
+ }
}
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
index c3419ed..e3e4826 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
@@ -65,7 +65,7 @@ public class ContextFreeGrammar {
}
public String[] leftMostDerivation(String... words) {
- ArrayList<String> expansion = new ArrayList<String>(words.length);
+ ArrayList<String> expansion = new ArrayList<>(words.length);
assert words.length > 0 && startSymbol.equals(words[0]);
@@ -78,13 +78,13 @@ public class ContextFreeGrammar {
private Collection<String> getTerminals(String word) {
if (terminalSymbols.contains(word)) {
- Collection<String> c = new LinkedList<String>();
+ Collection<String> c = new LinkedList<>();
c.add(word);
return c;
} else {
assert nonTerminalSymbols.contains(word) : "word " + word + " is not
contained in non terminals";
String[] expansions = getExpansionForSymbol(word);
- Collection<String> c = new LinkedList<String>();
+ Collection<String> c = new LinkedList<>();
for (String e : expansions) {
c.addAll(getTerminals(e));
}
@@ -98,7 +98,7 @@ public class ContextFreeGrammar {
}
private Rule getRuleForSymbol(String word) {
- ArrayList<Rule> possibleRules = new ArrayList<Rule>();
+ ArrayList<Rule> possibleRules = new ArrayList<>();
for (Rule r : rules) {
if (word.equals(r.getEntry())) {
if (!randomExpansion) {
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
index f5d936c..edd6b37 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
@@ -39,7 +39,7 @@ public class ProbabilisticContextFreeGrammar {
private final Collection<String> terminalSymbols;
private final Map<Rule, Double> rules;
private final String startSymbol;
- private boolean randomExpansion;
+ private final boolean randomExpansion;
private static final Rule emptyRule = new Rule("EMPTY~", "");
@@ -86,7 +86,7 @@ public class ProbabilisticContextFreeGrammar {
public String[] leftMostDerivation(String... words) {
- ArrayList<String> expansion = new ArrayList<String>(words.length);
+ ArrayList<String> expansion = new ArrayList<>(words.length);
assert words.length > 0 && startSymbol.equals(words[0]);
@@ -98,13 +98,13 @@ public class ProbabilisticContextFreeGrammar {
private Collection<String> getTerminals(String word) {
if (terminalSymbols.contains(word)) {
- Collection<String> c = new LinkedList<String>();
+ Collection<String> c = new LinkedList<>();
c.add(word);
return c;
} else {
assert nonTerminalSymbols.contains(word) : "word " + word + " is not
contained in non terminals";
String[] expansions = getExpansionForSymbol(word);
- Collection<String> c = new LinkedList<String>();
+ Collection<String> c = new LinkedList<>();
for (String e : expansions) {
c.addAll(getTerminals(e));
}
@@ -118,7 +118,7 @@ public class ProbabilisticContextFreeGrammar {
}
private Rule getRuleForSymbol(String word) {
- ArrayList<Rule> possibleRules = new ArrayList<Rule>();
+ ArrayList<Rule> possibleRules = new ArrayList<>();
for (Rule r : rules.keySet()) {
if (word.equals(r.getEntry())) {
if (!randomExpansion) {
@@ -186,7 +186,7 @@ public class ProbabilisticContextFreeGrammar {
}
private Collection<Rule> getRulesForNonTerminal(String x) {
- LinkedList<Rule> ntRules = new LinkedList<Rule>();
+ LinkedList<Rule> ntRules = new LinkedList<>();
for (Rule r : rules.keySet()) {
String[] expansion = r.getExpansion();
if (expansion.length == 2 && x.equals(r.getEntry()) &&
nonTerminalSymbols.contains(expansion[0]) &&
nonTerminalSymbols.contains(expansion[1])) {
@@ -197,7 +197,7 @@ public class ProbabilisticContextFreeGrammar {
}
private Collection<Rule> getNTRules() {
- Collection<Rule> ntRules = new LinkedList<Rule>();
+ Collection<Rule> ntRules = new LinkedList<>();
for (Rule r : rules.keySet()) {
String[] expansion = r.getExpansion();
if (expansion.length == 2 && nonTerminalSymbols.contains(expansion[0])
&& nonTerminalSymbols.contains(expansion[1])) {
@@ -211,7 +211,7 @@ public class ProbabilisticContextFreeGrammar {
return rules.keySet().contains(rule) ? rules.get(rule) : 0;
}
- public class ParseTree {
+ public static class ParseTree {
private final double probability;
private final int splitPoint;
@@ -281,11 +281,11 @@ public class ProbabilisticContextFreeGrammar {
}
public static ProbabilisticContextFreeGrammar parseGrammar(boolean trim,
String... parseTreeStrings) {
- return parseGrammar(new HashMap<Rule, Double>(), "S", trim,
parseTreeStrings);
+ return parseGrammar(new HashMap<>(), "S", trim, parseTreeStrings);
}
public static ProbabilisticContextFreeGrammar parseGrammar(String...
parseTreeStrings) {
- return parseGrammar(new HashMap<Rule, Double>(), "S", true,
parseTreeStrings);
+ return parseGrammar(new HashMap<>(), "S", true, parseTreeStrings);
}
public static ProbabilisticContextFreeGrammar parseGrammar(Map<Rule, Double>
rulesMap, String startSymbol, boolean trim, String... parseStrings) {
@@ -303,7 +303,7 @@ public class ProbabilisticContextFreeGrammar {
for (String parseTreeString : parseStrings) {
if (trim) {
- parseTreeString = parseTreeString.replaceAll("\n",
"").replaceAll("\t", "").replaceAll("\\s+", " ");
+ parseTreeString = parseTreeString.replace("\n", "").replace("\t",
"").replaceAll("\\s+", " ");
}
String toConsume = String.valueOf(parseTreeString);
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/SimpleNaiveBayesClassifier.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/SimpleNaiveBayesClassifier.java
index b7bf33c..1114677 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/SimpleNaiveBayesClassifier.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/SimpleNaiveBayesClassifier.java
@@ -25,16 +25,17 @@ import java.util.LinkedList;
import java.util.Map;
/**
- * C = argmax( P(d|c) * P(c) )
- * where P(d|c) is called: likelihood
- * and P(c) is called: prior - we can count relative frequencies in a corpus
- * and d is a vector of features
- * <p/>
- * we assume:
- * 1. bag of words assumption: positions don't matter
- * 2. conditional independence: the feature probabilities are independent
given a class
- * <p/>
- * thus P(d|c) == P(x1,..,xn|c) == P(x1|c)*...P(xn|c)
+ * {@code C = argmax( P(d|c) * P(c) )}
+ * where {@code P(d|c)} is called: likelihood
+ * and {@code P(c)} is called: prior - we can count relative frequencies in a
corpus
+ * and {@code d} is a vector of features.
+ * <p>
+ * We assume:
+ * <ol>
+ * <li>bag of words assumption: positions don't matter</li>
+ * <li>conditional independence: the feature probabilities are independent
given a class</li>
+ * </ol>
+ * thus {@code P(d|c) == P(x1,..,xn|c) == P(x1|c)*...P(xn|c)}
*/
public class SimpleNaiveBayesClassifier implements
NaiveBayesClassifier<String, String> {
@@ -104,10 +105,10 @@ public class SimpleNaiveBayesClassifier implements
NaiveBayesClassifier<String,
@Override
public String calculateClass(String inputDocument) {
- Double max = 0d;
+ double max = 0d;
String foundClass = null;
for (String cl : classMegaDocMap.keySet()) {
- Double clVal = priors.get(cl) * calculateLikelihood(inputDocument, cl);
+ double clVal = priors.get(cl) * calculateLikelihood(inputDocument, cl);
if (clVal > max) {
max = clVal;
foundClass = cl;
@@ -120,7 +121,7 @@ public class SimpleNaiveBayesClassifier implements
NaiveBayesClassifier<String,
private Double calculateLikelihood(String document, String c) {
String megaDoc = classMegaDocMap.get(c);
// for each word
- Double result = 1d;
+ double result = 1d;
for (String word : tokenizeDoc(document)) {
// num : count the no of times the word appears in documents of class c
(+1)
double num = count(word, megaDoc) + 1; // +1 is added because of add 1
smoothing
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/UpdatableSimpleNaiveBayesClassifier.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/UpdatableSimpleNaiveBayesClassifier.java
index dab6f60..a3c064a 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/UpdatableSimpleNaiveBayesClassifier.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/UpdatableSimpleNaiveBayesClassifier.java
@@ -25,22 +25,21 @@ import java.util.List;
import java.util.Map;
import java.util.TreeSet;
-
public class UpdatableSimpleNaiveBayesClassifier implements
NaiveBayesClassifier<List<String>, String> {
- private final Collection<String> vocabulary = new TreeSet<String>(); // the
bag of all the words in the corpus
- private final Map<String, Integer> classCounts = new LinkedHashMap<String,
Integer>();
+ private final Collection<String> vocabulary = new TreeSet<>(); // the bag of
all the words in the corpus
+ private final Map<String, Integer> classCounts = new LinkedHashMap<>();
private double noDocs = 0d;
- private final Map<String, Map<String, Integer>> nm = new HashMap<String,
Map<String, Integer>>();
- private final Map<String, Double> priors = new HashMap<String, Double>();
- private final Map<String, Double> dens = new HashMap<String, Double>();
+ private final Map<String, Map<String, Integer>> nm = new HashMap<>();
+ private final Map<String, Double> priors = new HashMap<>();
+ private final Map<String, Double> dens = new HashMap<>();
public void addExample(String klass, List<String> words) {
vocabulary.addAll(words);
Integer integer = classCounts.get(klass);
- Integer f = integer != null ? integer : 0;
+ int f = integer != null ? integer : 0;
classCounts.put(klass, f + 1);
noDocs++;
@@ -48,7 +47,7 @@ public class UpdatableSimpleNaiveBayesClassifier implements
NaiveBayesClassifier
for (String w : words) {
Map<String, Integer> wordCountsForClass = nm.get(klass);
if (wordCountsForClass == null) {
- wordCountsForClass = new HashMap<String, Integer>();
+ wordCountsForClass = new HashMap<>();
}
Integer count = wordCountsForClass.get(w);
if (count == null) {
@@ -69,7 +68,7 @@ public class UpdatableSimpleNaiveBayesClassifier implements
NaiveBayesClassifier
private void calculateDen(String c) {
// den : for the whole dictionary, count the no of times a word appears in
documents of class c (+|V|)
- Double den = 0d;
+ double den = 0d;
for (String w : vocabulary) {
Integer integer = nm.get(c).get(w);
den += integer != null ? integer : 0;
@@ -78,8 +77,9 @@ public class UpdatableSimpleNaiveBayesClassifier implements
NaiveBayesClassifier
dens.put(c, den);
}
- public String calculateClass(List<String> words) throws Exception {
- Double max = -1000000d;
+ @Override
+ public String calculateClass(List<String> words) {
+ double max = -1000000d;
String foundClass = null;
for (String cl : nm.keySet()) {
double prior = priors.get(cl);
@@ -100,7 +100,7 @@ public class UpdatableSimpleNaiveBayesClassifier implements
NaiveBayesClassifier
double result = 0d;
for (String word : words) {
// num : count the no of times the word appears in documents of class c
(+1)
- Integer freq = wordFreqs.get(word) != null ? wordFreqs.get(word) : 0;
+ int freq = wordFreqs.get(word) != null ? wordFreqs.get(word) : 0;
double num = freq + 1d; // +1 is added because of add 1 smoothing
// P(w|c) = num/den
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/LanguageModel.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/LanguageModel.java
index e84f48c..7b56847 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/LanguageModel.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/LanguageModel.java
@@ -22,18 +22,18 @@ import java.util.Collection;
/**
* A language model can calculate the probability <i>p</i> (between 0 and 1)
of a
- * certain set of <code>T</code> objects, given a vocabulary.
- * <code>T</code> is usually an {@link java.lang.Iterable} or an array as
language models are very commonly used for
- * sentences, so that T is e.g. an array of <code>String</code>s.
+ * certain set of {@code T} objects, given a vocabulary.
+ * {@code T} is usually an {@link java.lang.Iterable} or an array as language
models are very commonly used for
+ * sentences, so that T is e.g. an array of {@code String}s.
*/
public interface LanguageModel<T> {
/**
- * Calculate the probability of a sample, given a vocabulary
+ * Calculates the probability of a sample, given a {@code vocabulary},
*
- * @param vocabulary a {@link Collection} of objects of type <code>T</code>
+ * @param vocabulary a {@link Collection} of objects of type {@code T}
* @param sample the sample to evaluate the probability for
- * @return a <code>double</code> between <code>0</code> and <code>1</code>
+ * @return a {@code double} between {@code 0} and {@code 1}
*/
double calculateProbability(Collection<T> vocabulary, T sample);
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java
index abadc23..d29b2e5 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java
@@ -22,8 +22,8 @@ import java.util.Collection;
import java.util.Collections;
/**
- * Simple sentence language model which just counts the no. of occurrences of
- * a sentence over the no. of sentences in the vocabulary.
+ * Simple sentence language model which just counts the occurrences of
+ * a sentence over the number of sentences in the vocabulary.
*/
public class NaiveSentenceLanguageModel<T> implements LanguageModel<T[]> {
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NoisyChannel.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NoisyChannel.java
index fd2b81d..9085605 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NoisyChannel.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NoisyChannel.java
@@ -30,10 +30,10 @@ public abstract class NoisyChannel {
}
public String findCorrection(String mispelledWord) {
- Double val = 0d;
+ double val = 0d;
String correctWord = null;
for (String word : dictionary) {
- Double curVal = calculateLikelihood(mispelledWord, word) *
calculatePrior(word);
+ double curVal = calculateLikelihood(mispelledWord, word) *
calculatePrior(word);
if (curVal > val) {
val = curVal;
correctWord = word;
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModel.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModel.java
index a916cd3..b51856e 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModel.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModel.java
@@ -54,7 +54,7 @@ public class TrigramSentenceLanguageModel<T> implements
LanguageModel<T[]> {
}
private Set<Trigram> getTrigrams(T[] sample) {
- Set<Trigram> trigrams = new HashSet<Trigram>();
+ Set<Trigram> trigrams = new HashSet<>();
for (int i = 0; i < sample.length; i++) {
T x0 = null;
T x1 = null;
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
index a7371bc..00eccb6 100644
--- a/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
+++ b/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
@@ -23,7 +23,7 @@ import java.util.Collection;
import java.util.HashSet;
/**
- * utility class for calculating probabilities of tri/bi/uni-grams
+ * A utility class for calculating probabilities of tri/bi/uni-grams.
*/
public class NGramUtils {
@@ -123,8 +123,8 @@ public class NGramUtils {
}
public static <T> Double calculateMissingBigramProbabilityMass(T x1, Double
discount, Collection<T[]> set) {
- Double missingMass = 0d;
- Double countWord = count(x1, set);
+ double missingMass = 0d;
+ double countWord = count(x1, set);
for (T word : flatSet(set)) {
missingMass += (count(word, x1, set) - discount) / countWord;
}
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java
index 009e72b..67613b5 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java
@@ -23,7 +23,7 @@ import java.util.Random;
import org.apache.opennlp.utils.TrainingSet;
/**
- * Utility class for calculating gradient descent
+ * A utility class for calculating gradient descent.
*/
public class GradientDescentUtils {
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/Hypothesis.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/Hypothesis.java
index 4642fbd..711ae43 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/Hypothesis.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/Hypothesis.java
@@ -24,7 +24,7 @@ package org.apache.opennlp.utils.regression;
public interface Hypothesis {
/**
- * calculate the output given some inputs according to the underlying model.
+ * Calculates the output given some inputs according to the underlying model.
*
* @param inputs an array of inputs as <code>double</code>
* @return a <code>double</code> representing the output
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/LinearCombinationHypothesis.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/LinearCombinationHypothesis.java
index 45efba8..6150a37 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/LinearCombinationHypothesis.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/LinearCombinationHypothesis.java
@@ -19,7 +19,7 @@
package org.apache.opennlp.utils.regression;
/**
- * Simplest {@link Hypothesis} which just linearly combines inputs with weights
+ * Simplest {@link Hypothesis} which just linearly combines inputs with
weights.
*/
public class LinearCombinationHypothesis implements Hypothesis {
private final double[] weights;
diff --git
a/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/RegressionModelUtils.java
b/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/RegressionModelUtils.java
index d543f51..7b606a2 100644
---
a/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/RegressionModelUtils.java
+++
b/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/RegressionModelUtils.java
@@ -22,16 +22,16 @@ import org.apache.opennlp.utils.TrainingExample;
import org.apache.opennlp.utils.TrainingSet;
/**
- * Utility class for calculating various regression models costs
+ * A utility class for calculating various regression models costs.
*/
public class RegressionModelUtils {
/**
- * calculate the ordinary least squares (OLS) cost in the given training set
for a given hypothesis
+ * Calculates the ordinary least squares (OLS) cost in the given training
set for a given hypothesis.
*
- * @param trainingSet the training set used
- * @param hypothesis the hypothesis function representing the model
- * @return the cost of the hypothesis for the given training set using OLS
+ * @param trainingSet The {@link TrainingSet} used.
+ * @param hypothesis The {@link Hypothesis} function representing the model.
+ * @return The cost of the hypothesis for the given training set using OLS.
*/
public static double ordinaryLeastSquares(TrainingSet trainingSet,
Hypothesis hypothesis) {
double output = 0;
@@ -43,12 +43,12 @@ public class RegressionModelUtils {
}
/**
- * calculate the least mean square (LMS) update for a given weight vector
+ * Calculates the least mean square (LMS) update for a given weight vector.
*
- * @param thetas the array of weights
- * @param alpha the learning rate alpha
- * @param trainingSet the training set to use for learning
- * @param hypothesis the hypothesis representing the model
+ * @param thetas The array of weights.
+ * @param alpha The learning rate alpha.
+ * @param trainingSet The {@link TrainingSet} to use for learning.
+ * @param hypothesis The {@link Hypothesis} representing the model.
* @return the updated weights vector
*/
public static double[] batchLeastMeanSquareUpdate(double[] thetas, double
alpha, TrainingSet trainingSet, Hypothesis hypothesis) {
@@ -66,12 +66,12 @@ public class RegressionModelUtils {
/**
* calculate least mean square update for a given training example for the
j-th input
*
- * @param thetas the array of weights
- * @param alpha the learning rate alpha
- * @param trainingExample the training example to use for learning
- * @param hypothesis the hypothesis representing the model
- * @param j the index of the j-th input
- * @return the updated weight for the j-th element of the weights vector
+ * @param thetas The array of weights.
+ * @param alpha The learning rate alpha.
+ * @param trainingExample The {@link TrainingExample} to use for learning.
+ * @param hypothesis The {@link Hypothesis} representing the model.
+ * @param j The index of the j-th input.
+ * @return The updated weight for the j-th element of the weights vector.
*/
public static double singleLeastMeanSquareUpdate(double[] thetas, double
alpha, TrainingExample trainingExample, Hypothesis hypothesis, int j) {
return thetas[j] + alpha * (trainingExample.getOutput() -
hypothesis.calculateOutput(trainingExample.getInputs())) *
trainingExample.getInputs()[j];
diff --git a/nlp-utils/src/test/java/org/apache/opennlp/utils/TestUtils.java
b/nlp-utils/src/test/java/org/apache/opennlp/utils/TestUtils.java
index 5d4b84f..32d318f 100644
--- a/nlp-utils/src/test/java/org/apache/opennlp/utils/TestUtils.java
+++ b/nlp-utils/src/test/java/org/apache/opennlp/utils/TestUtils.java
@@ -29,7 +29,7 @@ import org.junit.Ignore;
@Ignore
public class TestUtils {
- private static Random r = new Random();
+ private static final Random R = new Random();
public static void fillTrainingSet(TrainingSet trainingSet, int size, int
dimension) {
for (int i = 0; i < size; i++) {
@@ -43,7 +43,7 @@ public class TestUtils {
}
public static Collection<String[]> generateRandomVocabulary() {
- int size = r.nextInt(1000);
+ int size = R.nextInt(1000);
Collection<String[]> vocabulary = new ArrayList<String[]>(size);
for (int i = 0; i < size; i++) {
String[] sentence = generateRandomSentence();
@@ -53,10 +53,10 @@ public class TestUtils {
}
public static String[] generateRandomSentence() {
- int dimension = r.nextInt(10);
+ int dimension = R.nextInt(10);
String[] sentence = new String[dimension];
for (int j = 0; j < dimension; j++) {
- char c = (char) r.nextInt(10);
+ char c = (char) R.nextInt(10);
sentence[j] = c + "-" + c + "-" + c;
}
return sentence;
diff --git
a/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
b/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
index 7a84c2f..d5f2560 100644
---
a/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
+++
b/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
@@ -25,7 +25,6 @@ import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
/**
* Testcase for {@link
org.apache.opennlp.utils.anomalydetection.AnomalyDetectionUtils}
@@ -33,7 +32,7 @@ import static org.junit.Assert.assertTrue;
public class AnomalyDetectionUtilsTest {
@Test
- public void testGaussianDistributionProbabilityFromFitParameters() throws
Exception {
+ public void testGaussianDistributionProbabilityFromFitParameters() {
TrainingSet trainingSet = new TrainingSet();
TestUtils.fillTrainingSet(trainingSet, 100, 5);
double[] mus = AnomalyDetectionUtils.fitMus(trainingSet);
@@ -46,7 +45,7 @@ public class AnomalyDetectionUtilsTest {
}
@Test
- public void testGaussianDistributionProbabilityFromTrainingSet() throws
Exception {
+ public void testGaussianDistributionProbabilityFromTrainingSet() {
TrainingSet trainingSet = new TrainingSet();
TestUtils.fillTrainingSet(trainingSet, 100, 5);
TrainingExample newInput = new TrainingExample(new
double[]{0.4d,0.5d,0.5d,0.5d,0.2d}, 0d);
diff --git
a/nlp-utils/src/test/java/org/apache/opennlp/utils/classification/SimpleNaiveBayesClassifierTest.java
b/nlp-utils/src/test/java/org/apache/opennlp/utils/classification/SimpleNaiveBayesClassifierTest.java
index 8016679..8d7f408 100644
---
a/nlp-utils/src/test/java/org/apache/opennlp/utils/classification/SimpleNaiveBayesClassifierTest.java
+++
b/nlp-utils/src/test/java/org/apache/opennlp/utils/classification/SimpleNaiveBayesClassifierTest.java
@@ -31,8 +31,8 @@ import static org.junit.Assert.assertTrue;
public class SimpleNaiveBayesClassifierTest {
@Test
- public void ppsIntegrationTest() throws Exception {
- Map<String, String> trainedCorpus = new HashMap<String, String>();
+ public void ppsIntegrationTest() {
+ Map<String, String> trainedCorpus = new HashMap<>();
trainedCorpus.put("CAVOUR ad.te napoleone III affare: cat. C/2 ottimo" +
" stato ingresso angolo cottura bagno con doccia e camera. " +
"ottimo per investimento o piccolo studio per professionisti" +
@@ -69,7 +69,7 @@ public class SimpleNaiveBayesClassifierTest {
SimpleNaiveBayesClassifier classifier = new
SimpleNaiveBayesClassifier(trainedCorpus);
- Boolean isAgency = classifier.calculateClass("CENTRO S.Maria Maggiore " +
+ boolean isAgency = classifier.calculateClass("CENTRO S.Maria Maggiore " +
"angolo Napoleone III in palazzo epoca con portiere 110 mq
ristrutt." +
" IIp salone doppio cucina ab. 2 camere bagno ripost. balcone " +
"perimetrale E. 730.000 tratt. ").equals("A");
diff --git
a/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModelTest.java
b/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModelTest.java
index 10e0fac..c4d0825 100644
---
a/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModelTest.java
+++
b/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModelTest.java
@@ -31,17 +31,17 @@ import static org.junit.Assert.assertTrue;
public class NaiveSentenceLanguageModelTest {
@Test
- public void testEmptyVocabularyProbability() throws Exception {
- NaiveSentenceLanguageModel<String> model = new
NaiveSentenceLanguageModel<String>();
- assertEquals("probability with an empty vocabulary is always 0", 0d,
model.calculateProbability(Collections.<String[]>emptySet(),
- new String[0]), 0d);
- assertEquals("probability with an empty vocabulary is always 0", 0d,
model.calculateProbability(Collections.<String[]>emptySet(),
- new String[]{"1", "2", "3"}), 0d);
+ public void testEmptyVocabularyProbability() {
+ NaiveSentenceLanguageModel<String> model = new
NaiveSentenceLanguageModel<>();
+ assertEquals("probability with an empty vocabulary is always 0",
+ 0d, model.calculateProbability(Collections.emptySet(), new
String[0]), 0d);
+ assertEquals("probability with an empty vocabulary is always 0",
+ 0d, model.calculateProbability(Collections.emptySet(), new
String[]{"1", "2", "3"}), 0d);
}
@Test
- public void testRandomVocabularyAndSentence() throws Exception {
- NaiveSentenceLanguageModel<String> model = new
NaiveSentenceLanguageModel<String>();
+ public void testRandomVocabularyAndSentence() {
+ NaiveSentenceLanguageModel<String> model = new
NaiveSentenceLanguageModel<>();
double probability =
model.calculateProbability(TestUtils.generateRandomVocabulary(),
TestUtils.generateRandomSentence());
assertTrue("a probability measure should be between 0 and 1 [was " +
probability + "]", probability >= 0 && probability <= 1);
}
diff --git
a/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModelTest.java
b/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModelTest.java
index b2d6d51..b716c26 100644
---
a/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModelTest.java
+++
b/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModelTest.java
@@ -31,17 +31,17 @@ import static org.junit.Assert.assertTrue;
public class TrigramSentenceLanguageModelTest {
@Test
- public void testEmptyVocabularyProbability() throws Exception {
- TrigramSentenceLanguageModel<String> model = new
TrigramSentenceLanguageModel<String>();
- assertEquals("probability with an empty vocabulary is always 0", 0d,
model.calculateProbability(Collections.<String[]>emptySet(),
- new String[0]), 0d);
- assertEquals("probability with an empty vocabulary is always 0", 0d,
model.calculateProbability(Collections.<String[]>emptySet(),
- new String[]{"1", "2", "3"}), 0d);
+ public void testEmptyVocabularyProbability() {
+ TrigramSentenceLanguageModel<String> model = new
TrigramSentenceLanguageModel<>();
+ assertEquals("probability with an empty vocabulary is always 0",
+ 0d, model.calculateProbability(Collections.emptySet(), new
String[0]), 0d);
+ assertEquals("probability with an empty vocabulary is always 0",
+ 0d, model.calculateProbability(Collections.emptySet(), new
String[]{"1", "2", "3"}), 0d);
}
@Test
- public void testRandomVocabularyAndSentence() throws Exception {
- TrigramSentenceLanguageModel<String> model = new
TrigramSentenceLanguageModel<String>();
+ public void testRandomVocabularyAndSentence() {
+ TrigramSentenceLanguageModel<String> model = new
TrigramSentenceLanguageModel<>();
double probability =
model.calculateProbability(TestUtils.generateRandomVocabulary(),
TestUtils.generateRandomSentence());
assertTrue("a probability measure should be between 0 and 1 [was " +
probability + "]", probability >= 0 && probability <= 1);
}
diff --git
a/nlp-utils/src/test/java/org/apache/opennlp/utils/ngram/NGramUtilsTest.java
b/nlp-utils/src/test/java/org/apache/opennlp/utils/ngram/NGramUtilsTest.java
index 8da4947..a988e98 100644
--- a/nlp-utils/src/test/java/org/apache/opennlp/utils/ngram/NGramUtilsTest.java
+++ b/nlp-utils/src/test/java/org/apache/opennlp/utils/ngram/NGramUtilsTest.java
@@ -32,7 +32,7 @@ import static org.junit.Assert.assertTrue;
public class NGramUtilsTest {
@Test
public void testBigram() {
- Collection<String[]> set = new LinkedList<String[]>();
+ Collection<String[]> set = new LinkedList<>();
set.add(new String[]{"<s>", "I", "am", "Sam", "</s>"});
set.add(new String[]{"<s>", "Sam", "I", "am", "</s>"});
set.add(new String[]{"<s>", "I", "do", "not", "like", "green", "eggs",
"and", "ham", "</s>"});
@@ -48,7 +48,7 @@ public class NGramUtilsTest {
@Test
public void testTrigram() {
- Collection<String[]> set = new LinkedList<String[]>();
+ Collection<String[]> set = new LinkedList<>();
set.add(new String[]{"<s>", "I", "am", "Sam", "</s>"});
set.add(new String[]{"<s>", "Sam", "I", "am", "</s>"});
set.add(new String[]{"<s>", "I", "do", "not", "like", "green", "eggs",
"and", "ham", "</s>"});
@@ -60,8 +60,8 @@ public class NGramUtilsTest {
}
@Test
- public void testLinearInterpolation() throws Exception {
- Collection<String[]> set = new LinkedList<String[]>();
+ public void testLinearInterpolation() {
+ Collection<String[]> set = new LinkedList<>();
set.add(new String[]{"the", "green", "book", "STOP"});
set.add(new String[]{"my", "blue", "book", "STOP"});
set.add(new String[]{"his", "green", "house", "STOP"});
@@ -74,8 +74,8 @@ public class NGramUtilsTest {
}
@Test
- public void testLinearInterpolation2() throws Exception {
- Collection<String[]> set = new LinkedList<String[]>();
+ public void testLinearInterpolation2() {
+ Collection<String[]> set = new LinkedList<>();
set.add(new String[]{"D", "N", "V", "STOP"});
set.add(new String[]{"D", "N", "V", "STOP"});
Double lambda = 1d / 3d;
diff --git
a/nlp-utils/src/test/java/org/apache/opennlp/utils/regression/GradientDescentUtilsTest.java
b/nlp-utils/src/test/java/org/apache/opennlp/utils/regression/GradientDescentUtilsTest.java
index 91e7370..4486120 100644
---
a/nlp-utils/src/test/java/org/apache/opennlp/utils/regression/GradientDescentUtilsTest.java
+++
b/nlp-utils/src/test/java/org/apache/opennlp/utils/regression/GradientDescentUtilsTest.java
@@ -28,7 +28,7 @@ import org.junit.Test;
public class GradientDescentUtilsTest {
@Test
- public void testConvergence() throws Exception {
+ public void testConvergence() {
TrainingSet trainingSet = new TrainingSet();
TestUtils.fillTrainingSet(trainingSet, 100, 5);
GradientDescentUtils.batchGradientDescent(trainingSet, 0.00002);
diff --git
a/nlp-utils/src/test/java/org/apache/opennlp/utils/regression/RegressionModelUtilsTest.java
b/nlp-utils/src/test/java/org/apache/opennlp/utils/regression/RegressionModelUtilsTest.java
index 4c46a9b..8676989 100644
---
a/nlp-utils/src/test/java/org/apache/opennlp/utils/regression/RegressionModelUtilsTest.java
+++
b/nlp-utils/src/test/java/org/apache/opennlp/utils/regression/RegressionModelUtilsTest.java
@@ -22,8 +22,8 @@ import org.apache.opennlp.utils.TrainingExample;
import org.apache.opennlp.utils.TrainingSet;
import org.junit.Test;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
/**
* Testcase for {@link
org.apache.opennlp.utils.regression.RegressionModelUtils}
@@ -31,14 +31,15 @@ import static org.junit.Assert.assertTrue;
public class RegressionModelUtilsTest {
@Test
- public void testLMS() throws Exception {
+ public void testLMS() {
TrainingSet trainingSet = new TrainingSet();
trainingSet.add(new TrainingExample(new double[]{10, 10}, 1));
LinearCombinationHypothesis hypothesis = new
LinearCombinationHypothesis(1, 1);
- double[] updatedParameters =
RegressionModelUtils.batchLeastMeanSquareUpdate(new double[]{1, 1}, 0.1,
trainingSet, hypothesis);
+ double[] updatedParameters =
RegressionModelUtils.batchLeastMeanSquareUpdate(
+ new double[]{1, 1}, 0.1, trainingSet, hypothesis);
assertNotNull(updatedParameters);
- assertTrue(updatedParameters.length == 2);
- assertTrue(updatedParameters[0] == -18d);
- assertTrue(updatedParameters[1] == -18d);
+ assertEquals(2, updatedParameters.length);
+ assertEquals(-18d, updatedParameters[0], 0.0);
+ assertEquals(-18d, updatedParameters[1], 0.0);
}
}