Author: beylerian
Date: Fri Mar 25 07:03:25 2016
New Revision: 1736534
URL: http://svn.apache.org/viewvc?rev=1736534&view=rev
Log:
updated tests
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java?rev=1736534&r1=1736533&r2=1736534&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
Fri Mar 25 07:03:25 2016
@@ -59,7 +59,7 @@ public class IMSEvaluatorTest {
WSDHelper.print("------------------" + word + "------------------");
for (WSDSample instance : instances) {
if (instance.getSenseIDs() != null
- && !instance.getSenseIDs().get(0).equals("null")) {
+ && !instance.getSenseIDs()[0].equals("null")) {
evaluator.evaluateSample(instance);
}
}
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java?rev=1736534&r1=1736533&r2=1736534&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
Fri Mar 25 07:03:25 2016
@@ -43,8 +43,8 @@ import opennlp.tools.util.TrainingParame
*
* The scope of this test is to make sure that the IMS disambiguator code can
be
* executed. This test can not detect mistakes which lead to incorrect feature
- * generation or other mistakes which decrease the disambiguation performance
of the
- * disambiguator.
+ * generation or other mistakes which decrease the disambiguation performance
of
+ * the disambiguator.
*
* In this test the {@link IMSME} is trained with Semcor and then the computed
* model is used to predict sentences from the training sentences.
@@ -154,9 +154,8 @@ public class IMSMETester {
*/
@Test
public void testOneWordDisambiguation() {
- String[] senses = ims.disambiguate(sentence1, tags1, lemmas1, 8);
-
- assertEquals("Check number of senses", 1, senses.length);
+ String sense = ims.disambiguate(sentence1, tags1, lemmas1, 8);
+ assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);
}
/*
@@ -167,13 +166,15 @@ public class IMSMETester {
@Test
public void testWordSpanDisambiguation() {
Span span = new Span(3, 7);
- List<String[]> senses = ims.disambiguate(sentence2, tags2, lemmas2, span);
+ List<String> senses = ims.disambiguate(sentence2, tags2, lemmas2, span);
assertEquals("Check number of returned words", 5, senses.size());
- assertEquals("Check number of senses", 1, senses.get(0).length);
- assertEquals("Check monosemous word", 1, senses.get(1).length);
- assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
- assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+ assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",
+ senses.get(0));
+ assertEquals("Check 'radioactive' sense ID",
+ "WORDNET radioactive%3:00:00::", senses.get(1));
+ assertEquals("Check preposition", "WSDHELPER to", senses.get(2));
+ assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));
}
/*
@@ -181,11 +182,11 @@ public class IMSMETester {
*/
@Test
public void testAllWordsDisambiguation() {
- List<String[]> senses = ims.disambiguate(sentence3, tags3, lemmas3);
+ List<String> senses = ims.disambiguate(sentence3, tags3, lemmas3);
assertEquals("Check number of returned words", 15, senses.size());
assertEquals("Check preposition", "WSDHELPER personal pronoun",
- senses.get(6)[0]);
+ senses.get(6));
}
}
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java?rev=1736534&r1=1736533&r2=1736534&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
Fri Mar 25 07:03:25 2016
@@ -22,8 +22,6 @@ package opennlp.tools.disambiguator;
import java.util.ArrayList;
import opennlp.tools.disambiguator.datareader.SensevalReader;
-import opennlp.tools.disambiguator.lesk.Lesk;
-import opennlp.tools.disambiguator.lesk.LeskParameters;
import org.junit.Test;
@@ -59,7 +57,7 @@ public class LeskEvaluatorTest {
WSDHelper.print("------------------" + word + "------------------");
for (WSDSample instance : instances) {
if (instance.getSenseIDs() != null
- && !instance.getSenseIDs().get(0).equals("null")) {
+ && !instance.getSenseIDs()[0].equals("null")) {
evaluator.evaluateSample(instance);
}
}
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java?rev=1736534&r1=1736533&r2=1736534&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
Fri Mar 25 07:03:25 2016
@@ -24,9 +24,7 @@ import static org.junit.Assert.assertEqu
import java.util.ArrayList;
import java.util.List;
-import opennlp.tools.disambiguator.lesk.Lesk;
-import opennlp.tools.disambiguator.lesk.LeskParameters;
-import opennlp.tools.disambiguator.lesk.LeskParameters.LESK_TYPE;
+import opennlp.tools.disambiguator.LeskParameters.LESK_TYPE;
import opennlp.tools.util.Span;
import org.junit.BeforeClass;
@@ -35,10 +33,10 @@ import org.junit.Test;
/**
* This is the test class for {@link Lesk}.
*
- * The scope of this test is to make sure that the Lesk disambiguator code can
be
- * executed. This test can not detect mistakes which lead to incorrect feature
- * generation or other mistakes which decrease the disambiguation performance
of the
- * disambiguator.
+ * The scope of this test is to make sure that the Lesk disambiguator code can
+ * be executed. This test can not detect mistakes which lead to incorrect
+ * feature generation or other mistakes which decrease the disambiguation
+ * performance of the disambiguator.
*/
public class LeskTester {
// TODO write more tests
@@ -118,9 +116,8 @@ public class LeskTester {
*/
@Test
public void testOneWordDisambiguation() {
- String[] senses = lesk.disambiguate(sentence1, tags1, lemmas1, 8);
-
- assertEquals("Check number of senses", 1, senses.length);
+ String sense = lesk.disambiguate(sentence1, tags1, lemmas1, 8);
+ assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00:: -1",
sense);
}
/*
@@ -131,13 +128,15 @@ public class LeskTester {
@Test
public void testWordSpanDisambiguation() {
Span span = new Span(3, 7);
- List<String[]> senses = lesk.disambiguate(sentence2, tags2, lemmas2, span);
+ List<String> senses = lesk.disambiguate(sentence2, tags2, lemmas2, span);
assertEquals("Check number of returned words", 5, senses.size());
- assertEquals("Check number of senses", 3, senses.get(0).length);
- assertEquals("Check monosemous word", 1, senses.get(1).length);
- assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
- assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+ assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01:: 4.8",
+ senses.get(0));
+ assertEquals("Check 'radioactive' sense ID",
+ "WORDNET radioactive%3:00:00:: 6.0", senses.get(1));
+ assertEquals("Check preposition", "WSDHELPER to", senses.get(2));
+ assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));
}
/*
@@ -145,11 +144,11 @@ public class LeskTester {
*/
@Test
public void testAllWordsDisambiguation() {
- List<String[]> senses = lesk.disambiguate(sentence3, tags3, lemmas3);
+ List<String> senses = lesk.disambiguate(sentence3, tags3, lemmas3);
assertEquals("Check number of returned words", 15, senses.size());
assertEquals("Check preposition", "WSDHELPER personal pronoun",
- senses.get(6)[0]);
+ senses.get(6));
}
}
\ No newline at end of file
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java?rev=1736534&r1=1736533&r2=1736534&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
Fri Mar 25 07:03:25 2016
@@ -22,9 +22,7 @@ package opennlp.tools.disambiguator;
import java.util.ArrayList;
import opennlp.tools.disambiguator.datareader.SensevalReader;
-import opennlp.tools.disambiguator.ims.WTDIMS;
-import opennlp.tools.disambiguator.mfs.MFS;
-import opennlp.tools.disambiguator.mfs.MFSParameters;
+import opennlp.tools.disambiguator.MFS;
import org.junit.Test;
@@ -40,7 +38,6 @@ public class MFSEvaluatorTest {
WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
MFS mfs = new MFS();
- WSDParameters.isStemCompare = true;
ArrayList<String> words = seReader.getSensevalWords();
@@ -56,7 +53,7 @@ public class MFSEvaluatorTest {
WSDHelper.print("------------------" + word + "------------------");
for (WSDSample instance : instances) {
if (instance.getSenseIDs() != null
- && !instance.getSenseIDs().get(0).equals("null")) {
+ && !instance.getSenseIDs()[0].equals("null")) {
evaluator.evaluateSample(instance);
}
}
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java?rev=1736534&r1=1736533&r2=1736534&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
Fri Mar 25 07:03:25 2016
@@ -26,7 +26,7 @@ import java.util.List;
import org.junit.BeforeClass;
import org.junit.Test;
-import opennlp.tools.disambiguator.mfs.MFS;
+import opennlp.tools.disambiguator.MFS;
import opennlp.tools.util.Span;
/**
@@ -34,8 +34,8 @@ import opennlp.tools.util.Span;
*
* The scope of this test is to make sure that the MFS disambiguator code can
be
* executed. This test can not detect mistakes which lead to incorrect feature
- * generation or other mistakes which decrease the disambiguation performance
of the
- * disambiguator.
+ * generation or other mistakes which decrease the disambiguation performance
of
+ * the disambiguator.
*/
public class MFSTester {
// TODO write more tests
@@ -110,9 +110,8 @@ public class MFSTester {
*/
@Test
public void testOneWordDisambiguation() {
- String[] senses = mfs.disambiguate(sentence1, tags1, lemmas1, 8);
-
- assertEquals("Check number of senses", 1, senses.length);
+ String sense = mfs.disambiguate(sentence1, tags1, lemmas1, 8);
+ assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);
}
/*
@@ -123,13 +122,15 @@ public class MFSTester {
@Test
public void testWordSpanDisambiguation() {
Span span = new Span(3, 7);
- List<String[]> senses = mfs.disambiguate(sentence2, tags2, lemmas2, span);
+ List<String> senses = mfs.disambiguate(sentence2, tags2, lemmas2, span);
assertEquals("Check number of returned words", 5, senses.size());
- assertEquals("Check number of senses", 1, senses.get(0).length);
- assertEquals("Check monosemous word", 1, senses.get(1).length);
- assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
- assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+ assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",
+ senses.get(0));
+ assertEquals("Check 'radioactive' sense ID",
+ "WORDNET radioactive%3:00:00::", senses.get(1));
+ assertEquals("Check preposition", "WSDHELPER to", senses.get(2));
+ assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));
}
/*
@@ -137,10 +138,10 @@ public class MFSTester {
*/
@Test
public void testAllWordsDisambiguation() {
- List<String[]> senses = mfs.disambiguate(sentence3, tags3, lemmas3);
+ List<String> senses = mfs.disambiguate(sentence3, tags3, lemmas3);
assertEquals("Check number of returned words", 15, senses.size());
assertEquals("Check preposition", "WSDHELPER personal pronoun",
- senses.get(6)[0]);
+ senses.get(6));
}
}
\ No newline at end of file
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java?rev=1736534&r1=1736533&r2=1736534&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCEvaluatorTest.java
Fri Mar 25 07:03:25 2016
@@ -19,11 +19,15 @@
package opennlp.tools.disambiguator;
+import java.io.IOException;
import java.util.ArrayList;
import opennlp.tools.disambiguator.datareader.SensevalReader;
+import opennlp.tools.disambiguator.oscc.OSCCFactory;
import opennlp.tools.disambiguator.oscc.OSCCME;
+import opennlp.tools.disambiguator.oscc.OSCCModel;
import opennlp.tools.disambiguator.oscc.OSCCParameters;
+import opennlp.tools.util.TrainingParameters;
import org.junit.Test;
@@ -39,28 +43,41 @@ public class OSCCEvaluatorTest {
// TODO write unit test
String modelsDir = "src\\test\\resources\\models\\";
+ String trainingDataDirectory =
"src\\test\\resources\\supervised\\models\\";
WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
OSCCParameters OSCCParams = new OSCCParameters("");
+ OSCCParams.setTrainingDataDirectory(trainingDataDirectory);
OSCCME oscc = new OSCCME(OSCCParams);
-
+ OSCCModel model = null;
ArrayList<String> words = seReader.getSensevalWords();
for (String word : words) {
- WSDEvaluator evaluator = new WSDEvaluator(oscc);
-
// don't take verbs because they are not from WordNet
if (!word.split("\\.")[1].equals("v")) {
-
+ try {
+ model = OSCCME.train("en", seReader.getSensevalDataStream(word), new
TrainingParameters(), OSCCParams,
+ new OSCCFactory());
+ model.writeModel(OSCCParams.getTrainingDataDirectory() + word);
+ oscc = new OSCCME(model, OSCCParams);
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ WSDHelper.print("skipped sample");
+ }
+
+ WSDEvaluator evaluator = new WSDEvaluator(oscc);
ArrayList<WSDSample> instances = seReader.getSensevalData(word);
if (instances != null) {
WSDHelper.print("------------------" + word + "------------------");
for (WSDSample instance : instances) {
if (instance.getSenseIDs() != null
- && !instance.getSenseIDs().get(0).equals("null")) {
+ && !instance.getSenseIDs()[0].equals("null")) {
evaluator.evaluateSample(instance);
+ }else{
+ WSDHelper.print("skipped sample");
}
}
WSDHelper.print(evaluator.toString());
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java?rev=1736534&r1=1736533&r2=1736534&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
Fri Mar 25 07:03:25 2016
@@ -156,9 +156,8 @@ public class OSCCMETester {
*/
@Test
public void testOneWordDisambiguation() {
- String[] senses = oscc.disambiguate(sentence1, tags1, lemmas1, 8);
-
- assertEquals("Check number of senses", 1, senses.length);
+ String sense = oscc.disambiguate(sentence1, tags1, lemmas1, 8);
+ assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);
}
/*
@@ -169,13 +168,16 @@ public class OSCCMETester {
@Test
public void testWordSpanDisambiguation() {
Span span = new Span(3, 7);
- List<String[]> senses = oscc.disambiguate(sentence2, tags2, lemmas2, span);
+ List<String> senses = oscc.disambiguate(sentence2, tags2, lemmas2, span);
+
assertEquals("Check number of returned words", 5, senses.size());
- assertEquals("Check number of senses", 1, senses.get(0).length);
- assertEquals("Check monosemous word", 1, senses.get(1).length);
- assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
- assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+ assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",
+ senses.get(0));
+ assertEquals("Check 'radioactive' sense ID",
+ "WORDNET radioactive%3:00:00::", senses.get(1));
+ assertEquals("Check preposition", "WSDHELPER to", senses.get(2));
+ assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));
}
/*
@@ -183,11 +185,11 @@ public class OSCCMETester {
*/
@Test
public void testAllWordsDisambiguation() {
- List<String[]> senses = oscc.disambiguate(sentence3, tags3, lemmas3);
+ List<String> senses = oscc.disambiguate(sentence3, tags3, lemmas3);
assertEquals("Check number of returned words", 15, senses.size());
assertEquals("Check preposition", "WSDHELPER personal pronoun",
- senses.get(6)[0]);
+ senses.get(6));
}
}
\ No newline at end of file
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java?rev=1736534&r1=1736533&r2=1736534&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
Fri Mar 25 07:03:25 2016
@@ -29,7 +29,7 @@ public class Tester {
String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
// output
- List<String[]> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
+ List<String> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
for (int i = 0; i < sentence3.length; i++) {
System.out.print(sentence3[i] + " : ");
WSDHelper.printResults(ims, senses3.get(i));