Author: beylerian
Date: Tue Jun 7 09:23:03 2016
New Revision: 1747175
URL: http://svn.apache.org/viewvc?rev=1747175&view=rev
Log:
OPENNLP-843 - grouped the two supervised techniques into a common one with
different context generators, the default context generator is from the IMS
approach, updated the unit tests, need to remove the useless classes.
Added:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java
- copied, changed from r1746846,
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java
- copied, changed from r1746846,
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
Removed:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClusterMembership.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClustersReader.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
opennlp/sandbox/opennlp-wsd/src/test/ (props changed)
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
Tue Jun 7 09:23:03 2016
@@ -23,8 +23,6 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
-import opennlp.tools.disambiguator.ims.WTDIMS;
-
/**
* Class for the extraction of features for the different Supervised
* Disambiguation approaches.<br>
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
Tue Jun 7 09:23:03 2016
@@ -42,7 +42,7 @@ public class IMSWSDContextGenerator impl
return windowTags;
}
- public String[] extractSurroundingWords(int index, String[] toks,
+ public String[] extractSurroundingContext(int index, String[] toks,
String[] lemmas, int windowSize) {
// TODO consider the windowSize
@@ -117,7 +117,7 @@ public class IMSWSDContextGenerator impl
HashSet<String> surroundingWords = new HashSet<>();
surroundingWords.addAll(Arrays
- .asList(extractSurroundingWords(index, tokens, lemmas, windowSize)));
+ .asList(extractSurroundingContext(index, tokens, lemmas, windowSize)));
String[] localCollocations = extractLocalCollocations(index, tokens,
ngram);
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
Tue Jun 7 09:23:03 2016
@@ -53,8 +53,8 @@ public class Lesk extends WSDisambiguato
/**
* Initializes the WSDParameters object and sets the input parameters
- *
- * @param Input
+ *
+ * @param params
* Parameters
* @throws InvalidParameterException
*/
@@ -65,8 +65,8 @@ public class Lesk extends WSDisambiguato
/**
* If the parameters are null set the default ones, else only set them if
they
* valid. Invalid parameters will return a exception
- *
- * @param Input
+ *
+ * @param params
* parameters
* @throws InvalidParameterException
*/
@@ -75,7 +75,7 @@ public class Lesk extends WSDisambiguato
if (params == null) {
this.params = new LeskParameters();
} else {
- if (params.isValid()) {
+ if (params.areValid()) {
this.params = (LeskParameters) params;
} else {
throw new InvalidParameterException("wrong params");
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
Tue Jun 7 09:23:03 2016
@@ -148,7 +148,7 @@ public class LeskParameters extends WSDP
*
* @see opennlp.tools.disambiguator.WSDParameters#isValid()
*/
- public boolean isValid() {
+ public boolean areValid() {
switch (this.leskType) {
case LESK_BASIC:
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
Tue Jun 7 09:23:03 2016
@@ -30,7 +30,7 @@ import net.sf.extjwnl.data.Synset;
*/
public class OSCCWSDContextGenerator implements WSDContextGenerator {
- public String[] extractSurroundingContextClusters(int index, String[] toks,
+ public String[] extractSurroundingContext(int index, String[] toks,
String[] tags, String[] lemmas, int windowSize) {
// TODO consider windowSize
@@ -78,7 +78,7 @@ public class OSCCWSDContextGenerator imp
HashSet<String> surroundingContextClusters = new HashSet<>();
surroundingContextClusters.addAll(Arrays.asList(
- extractSurroundingContextClusters(index, toks, tags, lemmas,
+ extractSurroundingContext(index, toks, tags, lemmas,
windowSize)));
String[] serializedFeatures = new String[model.size()];
Added:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java?rev=1747175&view=auto
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
(added)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
Tue Jun 7 09:23:03 2016
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.io.File;
+
+/**
+ * This class contains the parameters for the IMS approach as well as the
+ * directories containing the files used
+ */
+public class WSDDefaultParameters extends WSDParameters {
+
+ protected String languageCode;
+ protected int windowSize;
+ protected int ngram;
+
+ protected String trainingDataDirectory;
+
+ protected static final int DFLT_WIN_SIZE = 3;
+ protected static final int DFLT_NGRAM = 2;
+ protected static final String DFLT_LANG_CODE = "En";
+ protected static final SenseSource DFLT_SOURCE = SenseSource.WORDNET;
+
+ /**
+ * This constructor takes only two parameters. The default language used is
+ * <i>English</i>
+ *
+ * @param windowSize the size of the window used for the extraction of the
features
+ * qualified of Surrounding Words
+ * @param ngram the number words used for the extraction of features
qualified of
+ * Local Collocations
+ * @param senseSource the source of the training data
+ */
+ public WSDDefaultParameters(int windowSize, int ngram,
+ SenseSource senseSource, String trainingDataDirectory) {
+
+ this.languageCode = DFLT_LANG_CODE;
+ this.windowSize = windowSize;
+ this.ngram = ngram;
+ this.senseSource = senseSource;
+ this.trainingDataDirectory = trainingDataDirectory;
+
+ File folder = new File(trainingDataDirectory);
+ if (!folder.exists())
+ folder.mkdirs();
+ }
+
+ public WSDDefaultParameters(String trainingDataDirectory) {
+ this(DFLT_WIN_SIZE, DFLT_NGRAM, DFLT_SOURCE, trainingDataDirectory);
+ }
+
+ public String getLanguageCode() {
+ return languageCode;
+ }
+
+ public void setLanguageCode(String languageCode) {
+ this.languageCode = languageCode;
+ }
+
+ public int getWindowSize() {
+ return windowSize;
+ }
+
+ public void setWindowSize(int windowSize) {
+ this.windowSize = windowSize;
+ }
+
+ public int getNgram() {
+ return ngram;
+ }
+
+ public void setNgram(int ngram) {
+ this.ngram = ngram;
+ }
+
+ public String getTrainingDataDirectory() {
+ return trainingDataDirectory;
+ }
+
+ public void setTrainingDataDirectory(String trainingDataDirectory) {
+ this.trainingDataDirectory = trainingDataDirectory;
+ }
+
+ @Override public boolean areValid() {
+ // TODO recheck this pattern
+ return true;
+ }
+
+}
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
Tue Jun 7 09:23:03 2016
@@ -23,7 +23,6 @@ package opennlp.tools.disambiguator;
* Disambiguation Parameters
*
*/
-// TODO make default params for supervised approaches
public abstract class WSDParameters {
public static enum SenseSource {
@@ -51,6 +50,6 @@ public abstract class WSDParameters {
/*
* @return checks if the parameters are valid or not
*/
- public abstract boolean isValid();
+ public abstract boolean areValid();
}
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
Tue Jun 7 09:23:03 2016
@@ -22,8 +22,6 @@ package opennlp.tools.disambiguator;
import java.security.InvalidParameterException;
import java.util.ArrayList;
import java.util.List;
-
-import opennlp.tools.disambiguator.ims.IMSParameters;
import opennlp.tools.util.Span;
/**
@@ -38,14 +36,9 @@ import opennlp.tools.util.Span;
*
* Otherwise for multiple words, you can set a word span instead of simply one
* index. For the moment the source of sense definitions is from WordNet. *
- * Please see {@link Lesk} for an un-supervised approach. Please see {@link
IMS}
- * {@link OSCC} for a supervised approach.
*
* Examples on how to use each approach are provided in the test section.
- *
- * @see Lesk
- * @see IMS
- * @see OSCC
+ *
*/
public abstract class WSDisambiguator {
@@ -59,8 +52,7 @@ public abstract class WSDisambiguator {
}
/**
- * @param the
- * disambiguation implementation specific parameters.
+ * @param params disambiguation implementation specific parameters.
* @throws InvalidParameterException
*/
public void setParams(WSDParameters params) throws InvalidParameterException
{
@@ -85,8 +77,8 @@ public abstract class WSDisambiguator {
*
* @param tokenizedContext
* @param tokenTags
+ * @param lemmas
* @param ambiguousTokenIndexSpan
- * @param ambiguousTokenLemma
* @return result as an array of WordNet IDs
*/
public List<String> disambiguate(String[] tokenizedContext,
@@ -147,7 +139,7 @@ public abstract class WSDisambiguator {
} else {
if (WSDHelper.getNonRelevWordsDef(tokenTags[i]) != null) {
- String sense = IMSParameters.SenseSource.WSDHELPER.name() + " "
+ String sense = WSDParameters.SenseSource.WSDHELPER.name() + " "
+ WSDHelper.getNonRelevWordsDef(tokenTags[i]);
senses.add(sense);
} else {
@@ -161,7 +153,7 @@ public abstract class WSDisambiguator {
}
/**
- * @param WSDSample
+ * @param sample
* @return result as an array of WordNet IDs
*/
public abstract String disambiguate(WSDSample sample);
Added:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java?rev=1747175&view=auto
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
(added)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
Tue Jun 7 09:23:03 2016
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import opennlp.tools.ml.EventTrainer;
+import opennlp.tools.ml.TrainerFactory;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.ObjectStreamUtils;
+import opennlp.tools.util.TrainingParameters;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+public class WSDisambiguatorME extends WSDisambiguator {
+
+ protected WSDModel model;
+
+ protected static WSDContextGenerator cg = new IMSWSDContextGenerator();
+
+ public WSDisambiguatorME(WSDParameters params) {
+ this.params = params;
+ }
+
+ public WSDisambiguatorME(WSDModel model, WSDParameters params) {
+ this.model = model;
+ this.params = params;
+ }
+
+ public WSDModel getModel() {
+ return model;
+ }
+
+ public void setModel(WSDModel model) {
+ this.model = model;
+ }
+
+ public void setParameters(WSDParameters parameters) {
+ this.params = parameters;
+ }
+
+ public static WSDModel train(String lang, ObjectStream<WSDSample> samples,
+ TrainingParameters mlParams, WSDParameters params) throws IOException {
+
+ ArrayList<String> surroundingContext = buildSurroundingContext(samples,
+ ((WSDDefaultParameters) params).getWindowSize());
+
+ HashMap<String, String> manifestInfoEntries = new HashMap<String,
String>();
+
+ MaxentModel meModel = null;
+
+ ArrayList<Event> events = new ArrayList<Event>();
+ ObjectStream<Event> es = null;
+
+ WSDSample sample = samples.read();
+ String wordTag = "";
+ if (sample != null) {
+ wordTag = sample.getTargetWordTag();
+ do {
+ String sense = sample.getSenseIDs()[0];
+ String[] context = cg
+ .getContext(sample, ((WSDDefaultParameters) params).ngram,
+ ((WSDDefaultParameters) params).windowSize, surroundingContext);
+ Event ev = new Event(sense + "", context);
+ events.add(ev);
+ } while ((sample = samples.read()) != null);
+ }
+
+ es = ObjectStreamUtils.createObjectStream(events);
+ EventTrainer trainer = TrainerFactory
+ .getEventTrainer(mlParams.getSettings(), manifestInfoEntries);
+
+ meModel = trainer.train(es);
+
+ return new WSDModel(lang, wordTag,
+ ((WSDDefaultParameters) params).windowSize,
+ ((WSDDefaultParameters) params).ngram, meModel, surroundingContext,
+ manifestInfoEntries);
+ }
+
+ public static ArrayList<String> buildSurroundingContext(
+ ObjectStream<WSDSample> samples, int windowSize) throws IOException {
+ IMSWSDContextGenerator contextGenerator = new IMSWSDContextGenerator();
+ ArrayList<String> surroundingWordsModel = new ArrayList<String>();
+ WSDSample sample;
+ while ((sample = samples.read()) != null) {
+ String[] words = contextGenerator
+ .extractSurroundingContext(sample.getTargetPosition(),
+ sample.getSentence(), sample.getLemmas(), windowSize);
+
+ if (words.length > 0) {
+ for (String word : words) {
+ surroundingWordsModel.add(word);
+ }
+ }
+ }
+ samples.reset();
+ return surroundingWordsModel;
+ }
+
+ @Override public String disambiguate(WSDSample sample) {
+ if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
+ String wordTag = sample.getTargetWordTag();
+
+ if (model == null || !model.getWordTag()
+ .equals(sample.getTargetWordTag())) {
+
+ String trainingFile =
+ ((WSDDefaultParameters) this.getParams()).getTrainingDataDirectory()
+ + sample.getTargetWordTag();
+
+ File file = new File(trainingFile + ".wsd.model");
+ if (file.exists() && !file.isDirectory()) {
+ try {
+ setModel(new WSDModel(file));
+
+ } catch (InvalidFormatException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ String outcome = "";
+
+ String[] context = cg
+ .getContext(sample, ((WSDDefaultParameters) this.params).ngram,
+ ((WSDDefaultParameters) this.params).windowSize,
+ this.model.getContextEntries());
+
+ double[] outcomeProbs = model.getWSDMaxentModel().eval(context);
+ outcome = model.getWSDMaxentModel().getBestOutcome(outcomeProbs);
+
+ if (outcome != null && !outcome.equals("")) {
+
+ return this.getParams().getSenseSource().name() + " " + wordTag
+ .split("\\.")[0] + "%" + outcome;
+
+ } else {
+ MFS mfs = new MFS();
+ return mfs.disambiguate(wordTag);
+ }
+
+ } else {
+
+ MFS mfs = new MFS();
+ return mfs.disambiguate(wordTag);
+ }
+ } else {
+ String outcome = "";
+
+ String[] context = cg
+ .getContext(sample, ((WSDDefaultParameters) this.params).ngram,
+ ((WSDDefaultParameters) this.params).windowSize,
+ this.model.getContextEntries());
+
+ double[] outcomeProbs = model.getWSDMaxentModel().eval(context);
+ outcome = model.getWSDMaxentModel().getBestOutcome(outcomeProbs);
+
+ if (outcome != null && !outcome.equals("")) {
+
+ return this.getParams().getSenseSource().name() + " " + wordTag
+ .split("\\.")[0] + "%" + outcome;
+ } else {
+
+ MFS mfs = new MFS();
+ return mfs.disambiguate(wordTag);
+ }
+ }
+ } else {
+
+ if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
+ return WSDParameters.SenseSource.WSDHELPER.name() + " " + sample
+ .getTargetTag();
+ } else {
+ return null;
+ }
+
+ }
+
+ }
+
+ /**
+ * The IMS disambiguation method for a single word
+ *
+ * @param tokenizedContext : the text containing the word to disambiguate
+ * @param tokenTags : the tags corresponding to the context
+ * @param lemmas : the lemmas of ALL the words in the context
+ * @param index : the index of the word to disambiguate
+ * @return an array of the senses of the word to disambiguate
+ */
+ public String disambiguate(String[] tokenizedContext, String[] tokenTags,
+ String[] lemmas, int index) {
+ return disambiguate(
+ new WSDSample(tokenizedContext, tokenTags, lemmas, index));
+ }
+
+}
Copied:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java
(from r1746846,
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java)
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java?p2=opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java&p1=opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java&r1=1746846&r2=1747175&rev=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java
Tue Jun 7 09:23:03 2016
@@ -17,7 +17,7 @@
* under the License.
*/
-package opennlp.tools.disambiguator.ims;
+package opennlp.tools.disambiguator;
import java.util.ArrayList;
import java.util.List;
@@ -43,7 +43,7 @@ public class WTDIMS {
protected String[] features;
public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
- int wordIndex) {
+ int wordIndex) {
this.sentence = sentence;
this.posTags = posTags;
this.wordIndex = wordIndex;
@@ -51,7 +51,7 @@ public class WTDIMS {
}
public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
- int wordIndex, String[] senseIDs) {
+ int wordIndex, String[] senseIDs) {
this.sentence = sentence;
this.posTags = posTags;
this.wordIndex = wordIndex;
@@ -61,7 +61,7 @@ public class WTDIMS {
}
public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
- String word, String[] senseIDs) {
+ String word, String[] senseIDs) {
super();
this.sentence = sentence;
@@ -148,16 +148,16 @@ public class WTDIMS {
if ((WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()]) != null)) {
if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
- .equals(POS.VERB)) {
+ .equals(POS.VERB)) {
ref = wordBaseForm + ".v";
} else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
- .equals(POS.NOUN)) {
+ .equals(POS.NOUN)) {
ref = wordBaseForm + ".n";
} else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
- .equals(POS.ADJECTIVE)) {
+ .equals(POS.ADJECTIVE)) {
ref = wordBaseForm + ".a";
} else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
- .equals(POS.ADVERB)) {
+ .equals(POS.ADVERB)) {
ref = wordBaseForm + ".r";
}
}
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
Tue Jun 7 09:23:03 2016
@@ -64,9 +64,9 @@ public class SemcorReaderExtended {
private static final String ELEMENT_PUNCTUATION = "punc";
- private static String semcorDirectory = "src\\test\\resources\\semcor3.0\\";
+ private static String semcorDirectory = "src/test/resources/semcor3.0/";
private static String[] folders = { "brown1", "brown2", "brownv" };
- private static String tagfiles = "\\tagfiles\\";
+ private static String tagfiles = "/tagfiles/";
public static String getSemcorDirectory() {
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
Tue Jun 7 09:23:03 2016
@@ -47,7 +47,7 @@ import opennlp.tools.util.ObjectStreamUt
*/
public class SensevalReader {
- protected String sensevalDirectory = "src\\test\\resources\\senseval3\\";
+ protected String sensevalDirectory = "src/test/resources/senseval3/";
protected String data = sensevalDirectory + "EnglishLS.train";
protected String sensemapFile = sensevalDirectory + "EnglishLS.sensemap";
@@ -72,7 +72,7 @@ public class SensevalReader {
/**
* This extracts the equivalent senses. This serves in the case of the
* coarse-grained disambiguation
- *
+ *
* @param sensemapFile
* the file containing the equivalent senses, each set of equivalent
* senses per line
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
Tue Jun 7 09:23:03 2016
@@ -21,6 +21,7 @@ package opennlp.tools.disambiguator.data
import opennlp.tools.disambiguator.WSDHelper;
+// TODO extend Word from Wordnet
public class Word {
public static enum Type {
Propchange: opennlp/sandbox/opennlp-wsd/src/test/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Jun 7 09:23:03 2016
@@ -0,0 +1 @@
+resources
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
Tue Jun 7 09:23:03 2016
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -32,7 +32,7 @@ public class LeskEvaluatorTest {
@Test
public static void main(String[] args) {
WSDHelper.print("Evaluation Started");
- String modelsDir = "src\\test\\resources\\models\\";
+ String modelsDir = "src/test/resources/models/";
WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
Tue Jun 7 09:23:03 2016
@@ -41,7 +41,7 @@ import org.junit.Test;
public class LeskTester {
// TODO write more tests
- static String modelsDir = "src\\test\\resources\\models\\";
+ static String modelsDir = "src/test/resources/models/";
static Lesk lesk;
@@ -131,7 +131,7 @@ public class LeskTester {
List<String> senses = lesk.disambiguate(sentence2, tags2, lemmas2, span);
assertEquals("Check number of returned words", 5, senses.size());
- assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01:: 4.8",
+ assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01:: 3.8",
senses.get(0));
assertEquals("Check 'radioactive' sense ID",
"WORDNET radioactive%3:00:00:: 6.0", senses.get(1));
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
Tue Jun 7 09:23:03 2016
@@ -33,7 +33,7 @@ public class MFSEvaluatorTest {
@Test
public static void main(String[] args) {
WSDHelper.print("Evaluation Started");
- String modelsDir = "src\\test\\resources\\models\\";
+ String modelsDir = "src/test/resources/models/";
WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
Modified:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
Tue Jun 7 09:23:03 2016
@@ -41,7 +41,7 @@ public class MFSTester {
// TODO write more tests
// TODO modify when we fix the parameter model
- static String modelsDir = "src\\test\\resources\\models\\";
+ static String modelsDir = "src/test/resources/models/";
static MFS mfs;
Added:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java?rev=1747175&view=auto
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java
(added)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java
Tue Jun 7 09:23:03 2016
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
+import opennlp.tools.disambiguator.datareader.SensevalReader;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
+// TODO improve the tests improve parameters
+public class WSDEvaluatorTest {
+
+ static SensevalReader seReader;
+
+ static String modelsDir = "src/test/resources/models/";
+ static String trainingDataDirectory =
"src/test/resources/supervised/models/";
+
+ static WSDDefaultParameters params = new WSDDefaultParameters("");
+ static WSDisambiguatorME wsdME;
+ static WSDModel model;
+
+ static ArrayList<String> testWords;
+
+ /*
+ * Setup the testing variables
+ */
+ public static void setUpAndTraining() {
+ WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
+ WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
+ WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+
+ seReader = new SensevalReader();
+ testWords = seReader.getSensevalWords();
+ params = new WSDDefaultParameters("");
+ params.setTrainingDataDirectory(trainingDataDirectory);
+
+ TrainingParameters trainingParams = new TrainingParameters();
+ SemcorReaderExtended sr = new SemcorReaderExtended();
+
+ WSDHelper.print("Training Started");
+ for (String word : testWords) {
+ // don't take verbs because they are not from WordNet
+ if (!word.split("\\.")[1].equals("v")) {
+
+ ArrayList<WSDSample> instances = seReader.getSensevalData(word);
+ if (instances != null && instances.size() > 1) {
+ WSDHelper.print("------------------" + word + "------------------");
+ ObjectStream<WSDSample> sampleStream = sr.getSemcorDataStream(word);
+
+ WSDModel writeModel = null;
+ /*
+ * Tests training the disambiguator We test both writing and reading a
model
+ * file trained by semcor
+ */
+ File outFile;
+ try {
+ writeModel = WSDisambiguatorME
+ .train("en", sampleStream, trainingParams, params);
+ assertNotNull("Checking the model to be written", writeModel);
+ writeModel.writeModel(params.getTrainingDataDirectory() + word);
+ outFile = new File(
+ params.getTrainingDataDirectory() + word + ".wsd.model");
+ model = new WSDModel(outFile);
+ assertNotNull("Checking the read model", model);
+ wsdME = new WSDisambiguatorME(model, params);
+ assertNotNull("Checking the disambiguator", wsdME);
+ } catch (IOException e1) {
+ e1.printStackTrace();
+ fail("Exception in training");
+ }
+ }
+ }
+ }
+ }
+
+ public static void disambiguationEval() {
+
+ WSDHelper.print("Evaluation Started");
+
+ for (String word : testWords) {
+ WSDEvaluator evaluator = new WSDEvaluator(wsdME);
+
+ // don't take verbs because they are not from WordNet
+ if (!word.split("\\.")[1].equals("v")) {
+
+ ArrayList<WSDSample> instances = seReader.getSensevalData(word);
+ if (instances != null && instances.size() > 1) {
+ WSDHelper.print("------------------" + word + "------------------");
+ for (WSDSample instance : instances) {
+ if (instance.getSenseIDs() != null && !instance.getSenseIDs()[0]
+ .equals("null")) {
+ evaluator.evaluateSample(instance);
+ }
+ }
+ WSDHelper.print(evaluator.toString());
+ } else {
+ WSDHelper.print("null instances");
+ }
+ }
+
+ }
+ }
+
+ public static void main(String[] args) {
+ setUpAndTraining();
+ disambiguationEval();
+ }
+}
Copied:
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java
(from r1746846,
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java)
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java?p2=opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java&p1=opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java&r1=1746846&r2=1747175&rev=1747175&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java
Tue Jun 7 09:23:03 2016
@@ -1,40 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
package opennlp.tools.disambiguator;
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
-import opennlp.tools.disambiguator.ims.IMSME;
-import opennlp.tools.disambiguator.ims.IMSParameters;
-
-public class Tester {
-
- public static void main(String[] args) {
-
- String modelsDir = "src\\test\\resources\\models\\";
+import opennlp.tools.util.ObjectStream;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
+import opennlp.tools.util.Span;
+import opennlp.tools.util.TrainingParameters;
+
+/**
+ * This is the test class for {@link WSDisambiguatorME}.
+ * <p/>
+ * The scope of this test is to make sure that the WSDisambiguatorME code can
be
+ * executed. This test can not detect mistakes which lead to incorrect feature
+ * generation or other mistakes which decrease the disambiguation performance
of
+ * the disambiguator.
+ * <p/>
+ * In this test the {@link WSDisambiguatorME} is trained with Semcor
+ * and then the computed model is used to predict sentences
+ * from the training sentences.
+ */
+
+public class WSDTester {
+ // TODO write more tests
+ // TODO modify when we fix the parameter model
+
+ static String modelsDir = "src/test/resources/models/";
+ static String trainingDataDirectory =
"src/test/resources/supervised/models/";
+
+ static WSDDefaultParameters params;
+ static WSDisambiguatorME wsdME;
+ static WSDModel model;
+
+ static String test = "please.v";
+ static File outFile;
+
+ static String test1 = "We need to discuss an important topic, please write
to me soon.";
+ static String test2 = "The component was highly radioactive to the point
that"
+ + " it has been activated the second it touched water";
+ static String test3 = "The summer is almost over and I did not go to the
beach even once";
+
+ static String[] sentence1;
+ static String[] sentence2;
+ static String[] sentence3;
+
+ static String[] tags1;
+ static String[] tags2;
+ static String[] tags3;
+
+ static String[] lemmas1;
+ static String[] lemmas2;
+ static String[] lemmas3;
+
+ /*
+ * Setup the testing variables
+ */
+ @BeforeClass public static void setUpAndTraining() {
WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
- IMSME ims = new IMSME(new IMSParameters("\\"));
+ sentence1 = WSDHelper.getTokenizer().tokenize(test1);
+ sentence2 = WSDHelper.getTokenizer().tokenize(test2);
+ sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+
+ tags1 = WSDHelper.getTagger().tag(sentence1);
+ tags2 = WSDHelper.getTagger().tag(sentence2);
+ tags3 = WSDHelper.getTagger().tag(sentence3);
+
+ List<String> tempLemmas1 = new ArrayList<String>();
+ for (int i = 0; i < sentence1.length; i++) {
+ tempLemmas1
+ .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]));
+ }
+ lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
+
+ List<String> tempLemmas2 = new ArrayList<String>();
+ for (int i = 0; i < sentence2.length; i++) {
+ tempLemmas2
+ .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]));
+ }
+ lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
- String test3 = "The summer is almost over and I haven't been to the beach
even once";
- String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
- String[] tags3 = WSDHelper.getTagger().tag(sentence3);
List<String> tempLemmas3 = new ArrayList<String>();
for (int i = 0; i < sentence3.length; i++) {
- String lemma = WSDHelper.getLemmatizer().lemmatize(sentence3[i],
- tags3[i]);
- tempLemmas3.add(lemma);
+ tempLemmas3
+ .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]));
}
- String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+ lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
- // output
- List<String> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
- for (int i = 0; i < sentence3.length; i++) {
- System.out.print(sentence3[i] + " : ");
- WSDHelper.printResults(ims, senses3.get(i));
- WSDHelper.print("----------");
+ params = new WSDDefaultParameters("");
+ params.setTrainingDataDirectory(trainingDataDirectory);
+ TrainingParameters trainingParams = new TrainingParameters();
+ SemcorReaderExtended sr = new SemcorReaderExtended();
+ ObjectStream<WSDSample> sampleStream = sr.getSemcorDataStream(test);
+
+ WSDModel writeModel = null;
+ /*
+ * Tests training the disambiguator We test both writing and reading a
model
+ * file trained by semcor
+ */
+
+ try {
+ writeModel = WSDisambiguatorME
+ .train("en", sampleStream, trainingParams, params);
+ assertNotNull("Checking the model to be written", writeModel);
+ writeModel.writeModel(params.getTrainingDataDirectory() + test);
+ outFile = new File(
+ params.getTrainingDataDirectory() + test + ".wsd.model");
+ model = new WSDModel(outFile);
+ assertNotNull("Checking the read model", model);
+ wsdME = new WSDisambiguatorME(model, params);
+ assertNotNull("Checking the disambiguator", wsdME);
+ } catch (IOException e1) {
+ e1.printStackTrace();
+ fail("Exception in training");
}
+ }
+
+ /*
+ * Tests disambiguating only one word : The ambiguous word "please"
+ */
+ @Test public void testOneWordDisambiguation() {
+ String sense = wsdME.disambiguate(sentence1, tags1, lemmas1, 8);
+ assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);
+ }
+ /*
+ * Tests disambiguating a word Span In this case we test a mix of monosemous
+ * and polysemous words as well as words that do not need disambiguation such
+ * as determiners
+ */
+ @Test public void testWordSpanDisambiguation() {
+ Span span = new Span(3, 7);
+ List<String> senses = wsdME.disambiguate(sentence2, tags2, lemmas2, span);
+
+ assertEquals("Check number of returned words", 5, senses.size());
+ assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",
+ senses.get(0));
+ assertEquals("Check 'radioactive' sense ID",
+ "WORDNET radioactive%3:00:00::", senses.get(1));
+ assertEquals("Check preposition", "WSDHELPER to", senses.get(2));
+ assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));
}
-}
\ No newline at end of file
+
+ /*
+ * Tests disambiguating all the words
+ */
+ @Test public void testAllWordsDisambiguation() {
+ List<String> senses = wsdME.disambiguate(sentence3, tags3, lemmas3);
+
+ assertEquals("Check number of returned words", 15, senses.size());
+ assertEquals("Check preposition", "WSDHELPER personal pronoun",
+ senses.get(6));
+ }
+
+}