Author: tommaso
Date: Tue Sep 16 08:57:11 2014
New Revision: 1625223
URL: http://svn.apache.org/r1625223
Log:
OPENNLP-713 - fixed some javadocs, using generics in ngrams utils, added more
tests to cfg and language modeling packages
Added:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/CFGBuilderTest.java
(with props)
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModelTest.java
(with props)
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModelTest.java
(with props)
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/NaiveBayesClassifier.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/LanguageModel.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NoisyChannel.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModel.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/Hypothesis.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/LinearCombinationHypothesis.java
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/TestUtils.java
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/ngram/NGramUtilsTest.java
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
Tue Sep 16 08:57:11 2014
@@ -61,7 +61,8 @@ public class CFGBuilder {
}
public ContextFreeGrammar build() {
- assert nonTerminalSymbols != null && terminalSymbols != null && rules
!= null && startSymbol != null;
+ assert nonTerminalSymbols != null && terminalSymbols != null && rules !=
null && startSymbol != null :
+ "missing definitions {Â V : " + nonTerminalSymbols + ", â : "
+ terminalSymbols + ", R : " + rules + ", S : " + startSymbol + "}";
return new ContextFreeGrammar(nonTerminalSymbols, terminalSymbols,
rules, startSymbol, randomExpansion);
}
}
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
Tue Sep 16 08:57:11 2014
@@ -28,6 +28,7 @@ import java.util.Set;
* A context free grammar
*/
public class ContextFreeGrammar {
+
private final Collection<String> nonTerminalSymbols;
private final Collection<String> terminalSymbols;
private final Collection<Rule> rules;
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/NaiveBayesClassifier.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/NaiveBayesClassifier.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/NaiveBayesClassifier.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/classification/NaiveBayesClassifier.java
Tue Sep 16 08:57:11 2014
@@ -23,5 +23,5 @@ package org.apache.opennlp.utils.classif
*/
public interface NaiveBayesClassifier<I, O> {
- public O calculateClass(I inputDocument) throws Exception;
+ O calculateClass(I inputDocument) throws Exception;
}
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/LanguageModel.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/LanguageModel.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/LanguageModel.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/LanguageModel.java
Tue Sep 16 08:57:11 2014
@@ -21,18 +21,20 @@ package org.apache.opennlp.utils.languag
import java.util.Collection;
/**
- * A language model calculate the probability <i>p</i> (between 0 and 1) of a
+ * A language model can calculate the probability <i>p</i> (between 0 and 1)
of a
* certain set of <code>T</code> objects, given a vocabulary.
+ * <code>T</code> is usually an {@link java.lang.Iterable} or an array as
language models are very commonly used for
+ * sentences, so that T is e.g. an array of <code>String</code>s.
*/
public interface LanguageModel<T> {
/**
- * Calculate the probability of a sentence given a vocabulary
+ * Calculate the probability of a sample, given a vocabulary
*
* @param vocabulary a {@link Collection} of objects of type <code>T</code>
* @param sample the sample to evaluate the probability for
* @return a <code>double</code> between <code>0</code> and <code>1</code>
*/
- public double calculateProbability(Collection<T> vocabulary, T sample);
+ double calculateProbability(Collection<T> vocabulary, T sample);
}
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModel.java
Tue Sep 16 08:57:11 2014
@@ -26,8 +26,10 @@ import java.util.Collections;
* a sentence over the no. of sentences in the vocabulary.
*/
public class NaiveSentenceLanguageModel<T> implements LanguageModel<T[]> {
+
@Override
public double calculateProbability(Collection<T[]> vocabulary, T[] sentence)
{
- return Collections.frequency(vocabulary, sentence) / vocabulary.size();
+ return vocabulary.isEmpty() ? 0 : Collections.frequency(vocabulary,
sentence) / vocabulary.size();
}
+
}
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NoisyChannel.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NoisyChannel.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NoisyChannel.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/NoisyChannel.java
Tue Sep 16 08:57:11 2014
@@ -44,6 +44,6 @@ public abstract class NoisyChannel {
public abstract Double calculatePrior(String word);
- public abstract Double calculateLikelihood(String mispelledWord, String
word);
+ public abstract Double calculateLikelihood(String misspelledWord, String
word);
}
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModel.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModel.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModel.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModel.java
Tue Sep 16 08:57:11 2014
@@ -21,70 +21,77 @@ package org.apache.opennlp.utils.languag
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
-
import org.apache.opennlp.utils.ngram.NGramUtils;
/**
* A simple trigram language model for sentences made of <code>String</code>
arrays
*/
-public class TrigramSentenceLanguageModel implements LanguageModel<String[]> {
+public class TrigramSentenceLanguageModel<T> implements LanguageModel<T[]> {
+
@Override
- public double calculateProbability(Collection<String[]> vocabulary, String[]
sample) {
- double probability = 1d;
- for (Trigram trigram : getTrigrams(sample)) {
- if (trigram.getX0() != null && trigram.getX1() != null) {
- // default
- probability *=
NGramUtils.calculateTrigramMLProbability(trigram.getX0(), trigram.getX1(),
trigram.getX2(), vocabulary);
- } else if (trigram.getX0() == null && trigram.getX1() != null) {
- // bigram
- probability *=
NGramUtils.calculateBigramMLProbability(trigram.getX2(), trigram.getX1(),
vocabulary);
- } else if (trigram.getX0() == null && trigram.getX1() == null) {
- // unigram
- probability *=
NGramUtils.calculateUnigramMLProbability(trigram.getX2(), vocabulary);
- } else {
- // unexpected
+ public double calculateProbability(Collection<T[]> vocabulary, T[] sample) {
+ double probability = 0d;
+ if (!vocabulary.isEmpty()) {
+ for (Trigram trigram : getTrigrams(sample)) {
+ if (trigram.getX0() != null && trigram.getX1() != null) {
+ // default
+ probability +=
Math.log(NGramUtils.calculateTrigramMLProbability(trigram.getX0(),
trigram.getX1(), trigram.getX2(), vocabulary));
+ } else if (trigram.getX0() == null && trigram.getX1() != null) {
+ // bigram
+ probability +=
Math.log(NGramUtils.calculateBigramMLProbability(trigram.getX2(),
trigram.getX1(), vocabulary));
+ } else if (trigram.getX0() == null) {
+ // unigram
+ probability +=
Math.log(NGramUtils.calculateUnigramMLProbability(trigram.getX2(), vocabulary));
+ } else {
+ throw new RuntimeException("unexpected");
+ }
+ }
+ if (!Double.isNaN(probability)) {
+ probability = Math.exp(probability);
}
}
return probability;
}
- private Set<Trigram> getTrigrams(String[] sample) {
+ private Set<Trigram> getTrigrams(T[] sample) {
Set<Trigram> trigrams = new HashSet<Trigram>();
- for (int i = 0; i < sample.length - 2; i++) {
- String x0 = null;
- String x1 = null;
- String x2 = sample[i];
- if (i > 1) {
+ for (int i = 0; i < sample.length; i++) {
+ T x0 = null;
+ T x1 = null;
+ T x2 = sample[i];
+ if (i > 0) {
x1 = sample[i - 1];
}
- if (i > 2) {
+ if (i > 1) {
x0 = sample[i - 2];
}
- trigrams.add(new Trigram(x0, x1, x2));
+ if (x0 != null && x1 != null && x2 != null) {
+ trigrams.add(new Trigram(x0, x1, x2));
+ }
}
return trigrams;
}
private class Trigram {
- private final String x0;
- private final String x1;
- private final String x2;
+ private final T x0;
+ private final T x1;
+ private final T x2;
- private Trigram(String x0, String x1, String x2) {
+ private Trigram(T x0, T x1, T x2) {
this.x0 = x0;
this.x1 = x1;
this.x2 = x2;
}
- public String getX0() {
+ public T getX0() {
return x0;
}
- public String getX1() {
+ public T getX1() {
return x1;
}
- public String getX2() {
+ public T getX2() {
return x2;
}
}
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/ngram/NGramUtils.java
Tue Sep 16 08:57:11 2014
@@ -27,9 +27,9 @@ import java.util.HashSet;
*/
public class NGramUtils {
- private static Double count(String x0, String x1, String x2,
Collection<String[]> sentences) {
+ private static <T> Double count(T x0, T x1, T x2, Collection<T[]> sentences)
{
Double count = 0d;
- for (String[] sentence : sentences) {
+ for (T[] sentence : sentences) {
int idx0 = contains(sentence, x0);
if (idx0 >= 0) {
if (idx0 + 2 < sentence.length && x1.equals(sentence[idx0+1]) &&
x2.equals(sentence[idx0+2])) {
@@ -40,7 +40,7 @@ public class NGramUtils {
return count;
}
- private static int contains(String[] sentence, String word) {
+ private static <T> int contains(T[] sentence, T word) {
for (int i = 0; i < sentence.length; i++) {
if (word.equals(sentence[i])){
return i;
@@ -49,11 +49,11 @@ public class NGramUtils {
return -1;
}
- private static Double count(String sequentWord, String precedingWord,
Collection<String[]> set) {
+ private static <T> Double count(T sequentWord, T precedingWord,
Collection<T[]> set) {
Double result = 0d;
boolean foundPreceding = false;
- for (String[] sentence : set) {
- for (String w : sentence) {
+ for (T[] sentence : set) {
+ for (T w : sentence) {
if (precedingWord.equals(w)) {
foundPreceding = true;
continue;
@@ -69,10 +69,10 @@ public class NGramUtils {
return result;
}
- private static Double count(String word, Collection<String[]> set) {
+ private static <T> Double count(T word, Collection<T[]> set) {
Double result = 0d;
- for (String[] sentence : set) {
- for (String w : sentence) {
+ for (T[] sentence : set) {
+ for (T w : sentence) {
if (word.equals(w))
result++;
}
@@ -80,15 +80,15 @@ public class NGramUtils {
return result;
}
- public static Double calculateLaplaceSmoothingProbability(String
sequentWord, String precedingWord, Collection<String[]> set, Double k) {
+ public static <T> Double calculateLaplaceSmoothingProbability(T sequentWord,
T precedingWord, Collection<T[]> set, Double k) {
return (count(sequentWord, precedingWord, set) + k) /
(count(precedingWord, set) + k * set.size());
}
- public static Double calculateBigramMLProbability(String sequentWord, String
precedingWord, Collection<String[]> set) {
+ public static <T> Double calculateBigramMLProbability(T sequentWord, T
precedingWord, Collection<T[]> set) {
return count(sequentWord, precedingWord, set)/ count(precedingWord, set);
}
- public static Double calculateTrigramMLProbability(String x0, String x1,
String x2, Collection<String[]> sentences) {
+ public static <T> Double calculateTrigramMLProbability(T x0, T x1, T x2,
Collection<T[]> sentences) {
return count(x0, x1, x2, sentences)/ count(x1, x0, sentences);
}
@@ -96,18 +96,18 @@ public class NGramUtils {
return (count(sequentWord, precedingWord, set) + k *
calculateUnigramMLProbability(sequentWord, set)) / (count(precedingWord, set) +
k * set.size());
}
- public static Double calculateUnigramMLProbability(String word,
Collection<String[]> set) {
+ public static <T> Double calculateUnigramMLProbability(T word,
Collection<T[]> set) {
double vocSize = 0d;
- for (String[] s : set) {
+ for (T[] s : set) {
vocSize+= s.length;
}
return count(word, set) / vocSize;
}
- public static Double calculateLinearInterpolationProbability(String x0,
String x1, String x2, Collection<String[]> sentences,
+ public static <T> Double calculateLinearInterpolationProbability(T x0, T x1,
T x2, Collection<T[]> sentences,
Double lambda1,
Double lambda2, Double lambda3) {
assert lambda1 + lambda2 + lambda3 == 1 : "lambdas sum should be equals to
1";
- assert lambda1 > 0 && lambda2 > 0 && lambda3 > 0 : "lambdas should be
greater than 0";
+ assert lambda1 > 0 && lambda2 > 0 && lambda3 > 0 : "lambdas should all be
greater than 0";
return lambda1 * calculateTrigramMLProbability(x0, x1, x2, sentences) +
lambda2 * calculateBigramMLProbability(x2, x1, sentences) +
@@ -115,18 +115,18 @@ public class NGramUtils {
}
- private static Collection<String> flatSet(Collection<String[]> set) {
- Collection<String> flatSet = new HashSet<String>();
- for (String[] sentence : set){
+ private static <T> Collection<T> flatSet(Collection<T[]> set) {
+ Collection<T> flatSet = new HashSet<T>();
+ for (T[] sentence : set){
flatSet.addAll(Arrays.asList(sentence));
}
return flatSet;
}
- public static Double calculateMissingBigramProbabilityMass(String x1, Double
discount, Collection<String[]> set) {
+ public static <T> Double calculateMissingBigramProbabilityMass(T x1, Double
discount, Collection<T[]> set) {
Double missingMass = 0d;
Double countWord = count(x1, set);
- for (String word : flatSet(set)) {
+ for (T word : flatSet(set)) {
missingMass += (count(word, x1, set) - discount)/ countWord;
}
return 1 - missingMass;
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/Hypothesis.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/Hypothesis.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/Hypothesis.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/Hypothesis.java
Tue Sep 16 08:57:11 2014
@@ -29,12 +29,12 @@ public interface Hypothesis {
* @param inputs an array of inputs as <code>double</code>
* @return a <code>double</code> representing the output
*/
- public double calculateOutput(double[] inputs);
+ double calculateOutput(double[] inputs);
/**
* update the internal model's parameters.
*
* @param parameters an array of <code>double</code> containing the updated
parameters
*/
- public void updateParameters(double[] parameters);
+ void updateParameters(double[] parameters);
}
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/LinearCombinationHypothesis.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/LinearCombinationHypothesis.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/LinearCombinationHypothesis.java
(original)
+++
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/regression/LinearCombinationHypothesis.java
Tue Sep 16 08:57:11 2014
@@ -19,7 +19,7 @@
package org.apache.opennlp.utils.regression;
/**
- * Simplest {@link Hypothesis} which just linear combines inputs with weights
+ * Simplest {@link Hypothesis} which just linearly combines inputs with weights
*/
public class LinearCombinationHypothesis implements Hypothesis {
private double[] weights;
Modified:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/TestUtils.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/TestUtils.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/TestUtils.java
(original)
+++
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/TestUtils.java
Tue Sep 16 08:57:11 2014
@@ -18,8 +18,9 @@
*/
package org.apache.opennlp.utils;
-import org.apache.opennlp.utils.TrainingExample;
-import org.apache.opennlp.utils.TrainingSet;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Random;
import org.junit.Ignore;
/**
@@ -28,6 +29,8 @@ import org.junit.Ignore;
@Ignore
public class TestUtils {
+ private static Random r = new Random();
+
public static void fillTrainingSet(TrainingSet trainingSet, int size, int
dimension) {
for (int i = 0; i < size; i++) {
double[] inputs = new double[dimension];
@@ -38,4 +41,24 @@ public class TestUtils {
trainingSet.add(new TrainingExample(inputs, out));
}
}
+
+ public static Collection<String[]> generateRandomVocabulary() {
+ int size = r.nextInt(1000);
+ Collection<String[]> vocabulary = new ArrayList<String[]>(size);
+ for (int i = 0; i < size; i++) {
+ String[] sentence = generateRandomSentence();
+ vocabulary.add(sentence);
+ }
+ return vocabulary;
+ }
+
+ public static String[] generateRandomSentence() {
+ int dimension = r.nextInt(10);
+ String[] sentence = new String[dimension];
+ for (int j = 0; j < dimension; j++) {
+ char c = (char) r.nextInt(10);
+ sentence[j] = c + "-" + c + "-" + c;
+ }
+ return sentence;
+ }
}
Modified:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
(original)
+++
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/anomalydetection/AnomalyDetectionUtilsTest.java
Tue Sep 16 08:57:11 2014
@@ -31,8 +31,9 @@ import static org.junit.Assert.assertTru
* Testcase for {@link
org.apache.opennlp.utils.anomalydetection.AnomalyDetectionUtils}
*/
public class AnomalyDetectionUtilsTest {
+
@Test
- public void testGaussianDistributionProbability() throws Exception {
+ public void testGaussianDistributionProbabilityFromFitParameters() throws
Exception {
TrainingSet trainingSet = new TrainingSet();
TestUtils.fillTrainingSet(trainingSet, 100, 5);
double[] mus = AnomalyDetectionUtils.fitMus(trainingSet);
@@ -46,7 +47,7 @@ public class AnomalyDetectionUtilsTest {
}
@Test
- public void testGaussianDistributionProbability2() throws Exception {
+ public void testGaussianDistributionProbabilityFromTrainingSet() throws
Exception {
TrainingSet trainingSet = new TrainingSet();
TestUtils.fillTrainingSet(trainingSet, 100, 5);
TrainingExample newInput = new TrainingExample(new double[]{1d, 2d, 1000d,
123d, 0.1d}, 0d);
Added:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/CFGBuilderTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/CFGBuilderTest.java?rev=1625223&view=auto
==============================================================================
---
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/CFGBuilderTest.java
(added)
+++
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/CFGBuilderTest.java
Tue Sep 16 08:57:11 2014
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.opennlp.utils.cfg;
+
+import java.util.Arrays;
+import java.util.Collections;
+import org.junit.Test;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
+/**
+ * Testcase for {@link org.apache.opennlp.utils.cfg.CFGBuilder}
+ */
+public class CFGBuilderTest {
+
+ @Test
+ public void testVoidBuild() throws Exception {
+ CFGBuilder builder = CFGBuilder.createCFG();
+ assertNotNull(builder);
+ try {
+ builder.build();
+ fail("cannot build a grammar without V, â, R and S");
+ } catch (AssertionError e) {
+ // expected to fail
+ }
+ }
+
+ @Test
+ public void testBuildWithEmptySets() throws Exception {
+ CFGBuilder builder = CFGBuilder.createCFG().
+ withNonTerminals(Collections.<String>emptyList()).
+ withTerminals(Collections.<String>emptyList()).
+ withRules(Collections.<Rule>emptyList()).
+ withStartSymbol("");
+ try {
+ assertNotNull(builder.build());
+ fail("cannot build a grammar whose start symbol doesn't belong to the
non terminals symbols set");
+ } catch (AssertionError e) {
+ // expected to fail
+ }
+ }
+
+ @Test
+ public void testBuildWithMinimalGrammarSettings() throws Exception {
+ CFGBuilder builder = CFGBuilder.createCFG().
+ withNonTerminals(Arrays.asList("")).
+ withTerminals(Collections.<String>emptyList()).
+ withRules(Collections.<Rule>emptyList()).
+ withStartSymbol("");
+ assertNotNull(builder.build());
+ }
+
+ @Test
+ public void testBuildWithMinimalGrammarSettingsAndRandomExpansion() throws
Exception {
+ CFGBuilder builder = CFGBuilder.createCFG().
+ withNonTerminals(Arrays.asList("")).
+ withTerminals(Collections.<String>emptyList()).
+ withRules(Collections.<Rule>emptyList()).
+ withRandomExpansion(true).
+ withStartSymbol("");
+ assertNotNull(builder.build());
+ }
+}
Propchange:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/CFGBuilderTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java
(original)
+++
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java
Tue Sep 16 08:57:11 2014
@@ -18,13 +18,8 @@
*/
package org.apache.opennlp.utils.cfg;
-import java.util.Arrays;
import java.util.Collection;
-import java.util.HashSet;
import java.util.LinkedList;
-import java.util.Set;
-import java.util.TreeSet;
-import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
Added:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModelTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModelTest.java?rev=1625223&view=auto
==============================================================================
---
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModelTest.java
(added)
+++
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModelTest.java
Tue Sep 16 08:57:11 2014
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.opennlp.utils.languagemodel;
+
+import java.util.Collections;
+import org.apache.opennlp.utils.TestUtils;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Testcase for {@link
org.apache.opennlp.utils.languagemodel.NaiveSentenceLanguageModel}
+ */
+public class NaiveSentenceLanguageModelTest {
+
+ @Test
+ public void testEmptyVocabularyProbability() throws Exception {
+ NaiveSentenceLanguageModel<String> model = new
NaiveSentenceLanguageModel<String>();
+ assertEquals("probability with an empty vocabulary is always 0", 0d,
model.calculateProbability(Collections.<String[]>emptySet(),
+ new String[0]), 0d);
+ assertEquals("probability with an empty vocabulary is always 0", 0d,
model.calculateProbability(Collections.<String[]>emptySet(),
+ new String[]{"1", "2", "3"}), 0d);
+ }
+
+ @Test
+ public void testRandomVocabularyAndSentence() throws Exception {
+ NaiveSentenceLanguageModel<String> model = new
NaiveSentenceLanguageModel<String>();
+ double probability =
model.calculateProbability(TestUtils.generateRandomVocabulary(),
TestUtils.generateRandomSentence());
+ assertTrue("a probability measure should be between 0 and 1 [was " +
probability + "]", probability >= 0 && probability <= 1);
+ }
+
+}
Propchange:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/NaiveSentenceLanguageModelTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModelTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModelTest.java?rev=1625223&view=auto
==============================================================================
---
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModelTest.java
(added)
+++
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModelTest.java
Tue Sep 16 08:57:11 2014
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.opennlp.utils.languagemodel;
+
+import java.util.Collections;
+import org.apache.opennlp.utils.TestUtils;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Testcase for {@link
org.apache.opennlp.utils.languagemodel.TrigramSentenceLanguageModel}
+ */
+public class TrigramSentenceLanguageModelTest {
+
+ @Test
+ public void testEmptyVocabularyProbability() throws Exception {
+ TrigramSentenceLanguageModel<String> model = new
TrigramSentenceLanguageModel<String>();
+ assertEquals("probability with an empty vocabulary is always 0", 0d,
model.calculateProbability(Collections.<String[]>emptySet(),
+ new String[0]), 0d);
+ assertEquals("probability with an empty vocabulary is always 0", 0d,
model.calculateProbability(Collections.<String[]>emptySet(),
+ new String[]{"1", "2", "3"}), 0d);
+ }
+
+ @Test
+ public void testRandomVocabularyAndSentence() throws Exception {
+ TrigramSentenceLanguageModel<String> model = new
TrigramSentenceLanguageModel<String>();
+ double probability =
model.calculateProbability(TestUtils.generateRandomVocabulary(),
TestUtils.generateRandomSentence());
+ assertTrue("a probability measure should be between 0 and 1 [was " +
probability + "]", probability >= 0 && probability <= 1);
+ }
+
+}
Propchange:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/languagemodel/TrigramSentenceLanguageModelTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/ngram/NGramUtilsTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/ngram/NGramUtilsTest.java?rev=1625223&r1=1625222&r2=1625223&view=diff
==============================================================================
---
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/ngram/NGramUtilsTest.java
(original)
+++
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/ngram/NGramUtilsTest.java
Tue Sep 16 08:57:11 2014
@@ -33,41 +33,40 @@ public class NGramUtilsTest {
@Test
public void testBigram() {
Collection<String[]> set = new LinkedList<String[]>();
- set.add(new String[]{"<s>","I","am","Sam","</s>"});
- set.add(new String[]{"<s>","Sam","I","am","</s>"});
- set.add(new
String[]{"<s>","I","do","not","like","green","eggs","and","ham","</s>"});
+ set.add(new String[]{"<s>", "I", "am", "Sam", "</s>"});
+ set.add(new String[]{"<s>", "Sam", "I", "am", "</s>"});
+ set.add(new String[]{"<s>", "I", "do", "not", "like", "green", "eggs",
"and", "ham", "</s>"});
set.add(new String[]{});
Double d = NGramUtils.calculateBigramMLProbability("I", "<s>", set);
- assertTrue(d>0);
- assertEquals(Double.valueOf(0.6666666666666666d),d);
+ assertTrue(d > 0);
+ assertEquals(Double.valueOf(0.6666666666666666d), d);
d = NGramUtils.calculateBigramMLProbability("</s>", "Sam", set);
- assertEquals(Double.valueOf(0.5d),d);
+ assertEquals(Double.valueOf(0.5d), d);
d = NGramUtils.calculateBigramMLProbability("Sam", "<s>", set);
- assertEquals(Double.valueOf(0.3333333333333333d),d);
+ assertEquals(Double.valueOf(0.3333333333333333d), d);
}
@Test
public void testTrigram() {
Collection<String[]> set = new LinkedList<String[]>();
- set.add(new String[]{"<s>","I","am","Sam","</s>"});
- set.add(new String[]{"<s>","Sam","I","am","</s>"});
- set.add(new
String[]{"<s>","I","do","not","like","green","eggs","and","ham","</s>"});
+ set.add(new String[]{"<s>", "I", "am", "Sam", "</s>"});
+ set.add(new String[]{"<s>", "Sam", "I", "am", "</s>"});
+ set.add(new String[]{"<s>", "I", "do", "not", "like", "green", "eggs",
"and", "ham", "</s>"});
set.add(new String[]{});
- Double d = NGramUtils.calculateTrigramMLProbability("I", "am", "Sam",set);
- assertTrue(d>0);
- assertEquals(Double.valueOf(0.5),d);
- d = NGramUtils.calculateTrigramMLProbability("Sam","I", "am", set);
- assertEquals(Double.valueOf(1d),d);
+ Double d = NGramUtils.calculateTrigramMLProbability("I", "am", "Sam", set);
+ assertEquals(Double.valueOf(0.5), d);
+ d = NGramUtils.calculateTrigramMLProbability("Sam", "I", "am", set);
+ assertEquals(Double.valueOf(1d), d);
}
@Test
public void testLinearInterpolation() throws Exception {
Collection<String[]> set = new LinkedList<String[]>();
- set.add(new String[]{"the","green","book","STOP"});
- set.add(new String[]{"my","blue","book","STOP"});
- set.add(new String[]{"his","green","house","STOP"});
- set.add(new String[]{"book","STOP"});
- Double lambda = 1d/3d;
+ set.add(new String[]{"the", "green", "book", "STOP"});
+ set.add(new String[]{"my", "blue", "book", "STOP"});
+ set.add(new String[]{"his", "green", "house", "STOP"});
+ set.add(new String[]{"book", "STOP"});
+ Double lambda = 1d / 3d;
Double d = NGramUtils.calculateLinearInterpolationProbability("the",
"green", "book", set, lambda, lambda, lambda);
assertNotNull(d);
assertTrue(d > 0);
@@ -77,9 +76,9 @@ public class NGramUtilsTest {
@Test
public void testLinearInterpolation2() throws Exception {
Collection<String[]> set = new LinkedList<String[]>();
- set.add(new String[]{"D","N","V","STOP"});
- set.add(new String[]{"D","N","V","STOP"});
- Double lambda = 1d/3d;
+ set.add(new String[]{"D", "N", "V", "STOP"});
+ set.add(new String[]{"D", "N", "V", "STOP"});
+ Double lambda = 1d / 3d;
Double d = NGramUtils.calculateLinearInterpolationProbability("N", "V",
"STOP", set, lambda, lambda, lambda);
assertNotNull(d);
assertTrue(d > 0);