Author: tommaso
Date: Tue Nov 18 15:20:27 2014
New Revision: 1640364
URL: http://svn.apache.org/r1640364
Log:
OPENNLP-723 - fixed cky method, minor fixes to formatting
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammarTest.java
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java?rev=1640364&r1=1640363&r2=1640364&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtils.java
Tue Nov 18 15:20:27 2014
@@ -19,7 +19,6 @@
package org.apache.opennlp.utils.anomalydetection;
import java.math.BigDecimal;
-
import org.apache.opennlp.utils.TrainingExample;
import org.apache.opennlp.utils.TrainingSet;
@@ -33,7 +32,6 @@ public class AnomalyDetectionUtils {
*
* @param inputs the {@link org.apache.opennlp.utils.TrainingSet} to fit
* @return the <code>double[]</code> containing the Mu parameters for each
feature
- * @throws Exception
*/
public static double[] fitMus(TrainingSet inputs) {
assert inputs != null && inputs.size() > 0 : "empty dataset";
@@ -54,7 +52,6 @@ public class AnomalyDetectionUtils {
* @param mus mean parameters
* @param inputs the {@link TrainingSet} to fit
* @return the <code>double[]</code> containing the standard deviations
- * @throws Exception
*/
public static double[] fitSigmas(double[] mus, TrainingSet inputs) {
assert inputs != null && inputs.size() > 0 : "empty dataset";
@@ -84,10 +81,10 @@ public class AnomalyDetectionUtils {
/**
* calculate the probability of a certain input in a certain training set
*
- * @param x the input
- * @param set the training set
+ * @param x the input
+ * @param set the training set
* @return the probability of the given input
- * @throws Exception
+ * @throws Exception
*/
public static double getGaussianProbability(TrainingExample x, TrainingSet
set) throws Exception {
double[] mus = fitMus(set);
@@ -96,7 +93,7 @@ public class AnomalyDetectionUtils {
}
private static double calculateGaussianProbability(TrainingExample x,
double[] mus,
- double[] sigmas) {
+ double[] sigmas) {
assert mus.length == sigmas.length : "parameters not aligned";
BigDecimal px = new BigDecimal(1d);
for (int i = 0; i < mus.length; i++) {
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java?rev=1640364&r1=1640363&r2=1640364&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
Tue Nov 18 15:20:27 2014
@@ -25,44 +25,44 @@ import java.util.Collection;
*/
public class CFGBuilder {
- private Collection<String> nonTerminalSymbols;
- private Collection<String> terminalSymbols;
- private Collection<Rule> rules;
- private String startSymbol;
- private boolean randomExpansion;
-
- public static CFGBuilder createCFG() {
- return new CFGBuilder();
- }
-
- public CFGBuilder withTerminals(Collection<String> terminalSymbols) {
- this.terminalSymbols = terminalSymbols;
- return this;
- }
-
- public CFGBuilder withNonTerminals(Collection<String> nonTerminalSymbols) {
- this.nonTerminalSymbols = nonTerminalSymbols;
- return this;
- }
-
- public CFGBuilder withRules(Collection<Rule> rules) {
- this.rules = rules;
- return this;
- }
-
- public CFGBuilder withStartSymbol(String startSymbol) {
- this.startSymbol = startSymbol;
- return this;
- }
-
- public CFGBuilder withRandomExpansion(boolean randomExpansion) {
- this.randomExpansion = randomExpansion;
- return this;
- }
-
- public ContextFreeGrammar build() {
- assert nonTerminalSymbols != null && terminalSymbols != null && rules !=
null && startSymbol != null :
- "missing definitions {Â V : " + nonTerminalSymbols + ", â : "
+ terminalSymbols + ", R : " + rules + ", S : " + startSymbol + "}";
- return new ContextFreeGrammar(nonTerminalSymbols, terminalSymbols,
rules, startSymbol, randomExpansion);
- }
+ private Collection<String> nonTerminalSymbols;
+ private Collection<String> terminalSymbols;
+ private Collection<Rule> rules;
+ private String startSymbol;
+ private boolean randomExpansion;
+
+ public static CFGBuilder createCFG() {
+ return new CFGBuilder();
+ }
+
+ public CFGBuilder withTerminals(Collection<String> terminalSymbols) {
+ this.terminalSymbols = terminalSymbols;
+ return this;
+ }
+
+ public CFGBuilder withNonTerminals(Collection<String> nonTerminalSymbols) {
+ this.nonTerminalSymbols = nonTerminalSymbols;
+ return this;
+ }
+
+ public CFGBuilder withRules(Collection<Rule> rules) {
+ this.rules = rules;
+ return this;
+ }
+
+ public CFGBuilder withStartSymbol(String startSymbol) {
+ this.startSymbol = startSymbol;
+ return this;
+ }
+
+ public CFGBuilder withRandomExpansion(boolean randomExpansion) {
+ this.randomExpansion = randomExpansion;
+ return this;
+ }
+
+ public ContextFreeGrammar build() {
+ assert nonTerminalSymbols != null && terminalSymbols != null && rules !=
null && startSymbol != null :
+ "missing definitions {Â V : " + nonTerminalSymbols + ", â : " +
terminalSymbols + ", R : " + rules + ", S : " + startSymbol + "}";
+ return new ContextFreeGrammar(nonTerminalSymbols, terminalSymbols, rules,
startSymbol, randomExpansion);
+ }
}
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java?rev=1640364&r1=1640363&r2=1640364&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
Tue Nov 18 15:20:27 2014
@@ -22,7 +22,6 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.Random;
-import java.util.Set;
/**
* A context free grammar
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java?rev=1640364&r1=1640363&r2=1640364&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammar.java
Tue Nov 18 15:20:27 2014
@@ -143,7 +143,7 @@ public class ProbabilisticContextFreeGra
return backPointer;
}
- public BackPointer cky(List<String> sentence,
ProbabilisticContextFreeGrammar pcfg) {
+ public BackPointer cky(List<String> sentence) {
BackPointer backPointer = null;
int n = sentence.size();
@@ -151,7 +151,7 @@ public class ProbabilisticContextFreeGra
for (int i = 0; i < n - l; i++) {
int j = i + l;
double max = 0;
- for (String x : pcfg.getNonTerminalSymbols()) {
+ for (String x : getNonTerminalSymbols()) {
for (Rule r : getRulesForNonTerminal(x)) {
for (int s = i; s < j - 1; s++) {
double q = q(r);
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java?rev=1640364&r1=1640363&r2=1640364&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
Tue Nov 18 15:20:27 2014
@@ -32,7 +32,7 @@ public class NGramUtils {
for (T[] sentence : sentences) {
int idx0 = contains(sentence, x0);
if (idx0 >= 0) {
- if (idx0 + 2 < sentence.length && x1.equals(sentence[idx0+1]) &&
x2.equals(sentence[idx0+2])) {
+ if (idx0 + 2 < sentence.length && x1.equals(sentence[idx0 + 1]) &&
x2.equals(sentence[idx0 + 2])) {
count++;
}
}
@@ -42,7 +42,7 @@ public class NGramUtils {
private static <T> int contains(T[] sentence, T word) {
for (int i = 0; i < sentence.length; i++) {
- if (word.equals(sentence[i])){
+ if (word.equals(sentence[i])) {
return i;
}
}
@@ -61,8 +61,7 @@ public class NGramUtils {
if (foundPreceding && sequentWord.equals(w)) {
foundPreceding = false;
result++;
- }
- else
+ } else
foundPreceding = false;
}
}
@@ -85,11 +84,11 @@ public class NGramUtils {
}
public static <T> Double calculateBigramMLProbability(T sequentWord, T
precedingWord, Collection<T[]> set) {
- return count(sequentWord, precedingWord, set)/ count(precedingWord, set);
+ return count(sequentWord, precedingWord, set) / count(precedingWord, set);
}
public static <T> Double calculateTrigramMLProbability(T x0, T x1, T x2,
Collection<T[]> sentences) {
- return count(x0, x1, x2, sentences)/ count(x1, x0, sentences);
+ return count(x0, x1, x2, sentences) / count(x1, x0, sentences);
}
public static Double calculateBigramPriorSmoothingProbability(String
sequentWord, String precedingWord, Collection<String[]> set, Double k) {
@@ -99,17 +98,17 @@ public class NGramUtils {
public static <T> Double calculateUnigramMLProbability(T word,
Collection<T[]> set) {
double vocSize = 0d;
for (T[] s : set) {
- vocSize+= s.length;
+ vocSize += s.length;
}
return count(word, set) / vocSize;
}
public static <T> Double calculateLinearInterpolationProbability(T x0, T x1,
T x2, Collection<T[]> sentences,
- Double lambda1,
Double lambda2, Double lambda3) {
+ Double
lambda1, Double lambda2, Double lambda3) {
assert lambda1 + lambda2 + lambda3 == 1 : "lambdas sum should be equals to
1";
assert lambda1 > 0 && lambda2 > 0 && lambda3 > 0 : "lambdas should all be
greater than 0";
- return lambda1 * calculateTrigramMLProbability(x0, x1, x2, sentences) +
+ return lambda1 * calculateTrigramMLProbability(x0, x1, x2, sentences) +
lambda2 * calculateBigramMLProbability(x2, x1, sentences) +
lambda3 * calculateUnigramMLProbability(x2, sentences);
@@ -117,7 +116,7 @@ public class NGramUtils {
private static <T> Collection<T> flatSet(Collection<T[]> set) {
Collection<T> flatSet = new HashSet<T>();
- for (T[] sentence : set){
+ for (T[] sentence : set) {
flatSet.addAll(Arrays.asList(sentence));
}
return flatSet;
@@ -127,7 +126,7 @@ public class NGramUtils {
Double missingMass = 0d;
Double countWord = count(x1, set);
for (T word : flatSet(set)) {
- missingMass += (count(word, x1, set) - discount)/ countWord;
+ missingMass += (count(word, x1, set) - discount) / countWord;
}
return 1 - missingMass;
}
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java?rev=1640364&r1=1640363&r2=1640364&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/GradientDescentUtils.java
Tue Nov 18 15:20:27 2014
@@ -19,7 +19,6 @@
package org.apache.opennlp.utils.regression;
import java.util.Arrays;
-
import org.apache.opennlp.utils.TrainingSet;
/**
Modified:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java?rev=1640364&r1=1640363&r2=1640364&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
(original)
+++
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
Tue Nov 18 15:20:27 2014
@@ -18,11 +18,10 @@
*/
package org.apache.opennlp.utils.anomalydetection;
-import org.junit.Test;
-
import org.apache.opennlp.utils.TestUtils;
import org.apache.opennlp.utils.TrainingExample;
import org.apache.opennlp.utils.TrainingSet;
+import org.junit.Test;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
Modified:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammarTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammarTest.java?rev=1640364&r1=1640363&r2=1640364&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammarTest.java
(original)
+++
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ProbabilisticContextFreeGrammarTest.java
Tue Nov 18 15:20:27 2014
@@ -138,7 +138,7 @@ public class ProbabilisticContextFreeGra
sentence.add("the");
sentence.add("man");
- ProbabilisticContextFreeGrammar.BackPointer backPointer =
pcfg.cky(sentence, pcfg);
+ ProbabilisticContextFreeGrammar.BackPointer backPointer =
pcfg.cky(sentence);
check(pcfg, backPointer, sentence);
// fixed sentence two
@@ -148,14 +148,14 @@ public class ProbabilisticContextFreeGra
sentence.add("works");
sentence.add("nicely");
- backPointer = pcfg.cky(sentence, pcfg);
+ backPointer = pcfg.cky(sentence);
check(pcfg, backPointer, sentence);
// random sentence generated by the grammar
String[] expansion = pcfg.leftMostDerivation("S");
sentence = Arrays.asList(expansion);
- backPointer = pcfg.cky(sentence, pcfg);
+ backPointer = pcfg.cky(sentence);
check(pcfg, backPointer, sentence);
}