Author: beylerian
Date: Tue Jun  7 09:23:03 2016
New Revision: 1747175

URL: http://svn.apache.org/viewvc?rev=1747175&view=rev
Log:
OPENNLP-843 - grouped the two supervised techniques into a common one with 
different context generators, the default context generator is from the IMS 
approach, updated the unit tests,  need to remove the useless classes.

Added:
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java
      - copied, changed from r1746846, 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java
      - copied, changed from r1746846, 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
Removed:
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClusterMembership.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/ClustersReader.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
Modified:
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
    opennlp/sandbox/opennlp-wsd/src/test/   (props changed)
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
 Tue Jun  7 09:23:03 2016
@@ -23,8 +23,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 
-import opennlp.tools.disambiguator.ims.WTDIMS;
-
 /**
  * Class for the extraction of features for the different Supervised
  * Disambiguation approaches.<br>

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
 Tue Jun  7 09:23:03 2016
@@ -42,7 +42,7 @@ public class IMSWSDContextGenerator impl
     return windowTags;
   }
 
-  public String[] extractSurroundingWords(int index, String[] toks,
+  public String[] extractSurroundingContext(int index, String[] toks,
     String[] lemmas, int windowSize) {
 
     // TODO consider the windowSize
@@ -117,7 +117,7 @@ public class IMSWSDContextGenerator impl
 
     HashSet<String> surroundingWords = new HashSet<>();
     surroundingWords.addAll(Arrays
-      .asList(extractSurroundingWords(index, tokens, lemmas, windowSize)));
+      .asList(extractSurroundingContext(index, tokens, lemmas, windowSize)));
 
     String[] localCollocations = extractLocalCollocations(index, tokens, 
ngram);
 

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java 
(original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java 
Tue Jun  7 09:23:03 2016
@@ -53,8 +53,8 @@ public class Lesk extends WSDisambiguato
 
   /**
    * Initializes the WSDParameters object and sets the input parameters
-   * 
-   * @param Input
+   *
+   * @param params
    *          Parameters
    * @throws InvalidParameterException
    */
@@ -65,8 +65,8 @@ public class Lesk extends WSDisambiguato
   /**
    * If the parameters are null set the default ones, else only set them if 
they
    * valid. Invalid parameters will return a exception
-   * 
-   * @param Input
+   *
+   * @param params
    *          parameters
    * @throws InvalidParameterException
    */
@@ -75,7 +75,7 @@ public class Lesk extends WSDisambiguato
     if (params == null) {
       this.params = new LeskParameters();
     } else {
-      if (params.isValid()) {
+      if (params.areValid()) {
         this.params = (LeskParameters) params;
       } else {
         throw new InvalidParameterException("wrong params");

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
 Tue Jun  7 09:23:03 2016
@@ -148,7 +148,7 @@ public class LeskParameters extends WSDP
    * 
    * @see opennlp.tools.disambiguator.WSDParameters#isValid()
    */
-  public boolean isValid() {
+  public boolean areValid() {
 
     switch (this.leskType) {
     case LESK_BASIC:

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/OSCCWSDContextGenerator.java
 Tue Jun  7 09:23:03 2016
@@ -30,7 +30,7 @@ import net.sf.extjwnl.data.Synset;
  */
 public class OSCCWSDContextGenerator implements WSDContextGenerator {
 
-  public String[] extractSurroundingContextClusters(int index, String[] toks,
+  public String[] extractSurroundingContext(int index, String[] toks,
     String[] tags, String[] lemmas, int windowSize) {
 
     // TODO consider windowSize
@@ -78,7 +78,7 @@ public class OSCCWSDContextGenerator imp
 
     HashSet<String> surroundingContextClusters = new HashSet<>();
     surroundingContextClusters.addAll(Arrays.asList(
-      extractSurroundingContextClusters(index, toks, tags, lemmas,
+      extractSurroundingContext(index, toks, tags, lemmas,
         windowSize)));
 
     String[] serializedFeatures = new String[model.size()];

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java?rev=1747175&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
 Tue Jun  7 09:23:03 2016
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.io.File;
+
+/**
+ * This class contains the parameters for the IMS approach as well as the
+ * directories containing the files used
+ */
+public class WSDDefaultParameters extends WSDParameters {
+
+  protected String languageCode;
+  protected int windowSize;
+  protected int ngram;
+
+  protected String trainingDataDirectory;
+
+  protected static final int DFLT_WIN_SIZE = 3;
+  protected static final int DFLT_NGRAM = 2;
+  protected static final String DFLT_LANG_CODE = "En";
+  protected static final SenseSource DFLT_SOURCE = SenseSource.WORDNET;
+
+  /**
+   * This constructor takes only two parameters. The default language used is
+   * <i>English</i>
+   *
+   * @param windowSize  the size of the window used for the extraction of the 
features
+   *                    qualified of Surrounding Words
+   * @param ngram       the number words used for the extraction of features 
qualified of
+   *                    Local Collocations
+   * @param senseSource the source of the training data
+   */
+  public WSDDefaultParameters(int windowSize, int ngram,
+    SenseSource senseSource, String trainingDataDirectory) {
+
+    this.languageCode = DFLT_LANG_CODE;
+    this.windowSize = windowSize;
+    this.ngram = ngram;
+    this.senseSource = senseSource;
+    this.trainingDataDirectory = trainingDataDirectory;
+
+    File folder = new File(trainingDataDirectory);
+    if (!folder.exists())
+      folder.mkdirs();
+  }
+
+  public WSDDefaultParameters(String trainingDataDirectory) {
+    this(DFLT_WIN_SIZE, DFLT_NGRAM, DFLT_SOURCE, trainingDataDirectory);
+  }
+
+  public String getLanguageCode() {
+    return languageCode;
+  }
+
+  public void setLanguageCode(String languageCode) {
+    this.languageCode = languageCode;
+  }
+
+  public int getWindowSize() {
+    return windowSize;
+  }
+
+  public void setWindowSize(int windowSize) {
+    this.windowSize = windowSize;
+  }
+
+  public int getNgram() {
+    return ngram;
+  }
+
+  public void setNgram(int ngram) {
+    this.ngram = ngram;
+  }
+
+  public String getTrainingDataDirectory() {
+    return trainingDataDirectory;
+  }
+
+  public void setTrainingDataDirectory(String trainingDataDirectory) {
+    this.trainingDataDirectory = trainingDataDirectory;
+  }
+
+  @Override public boolean areValid() {
+    // TODO recheck this pattern
+    return true;
+  }
+
+}

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
 Tue Jun  7 09:23:03 2016
@@ -23,7 +23,6 @@ package opennlp.tools.disambiguator;
  * Disambiguation Parameters
  *
  */
-// TODO make default params for supervised approaches
 public abstract class WSDParameters {
 
   public static enum SenseSource {
@@ -51,6 +50,6 @@ public abstract class WSDParameters {
   /*
    * @return checks if the parameters are valid or not
    */
-  public abstract boolean isValid();
+  public abstract boolean areValid();
 
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
 Tue Jun  7 09:23:03 2016
@@ -22,8 +22,6 @@ package opennlp.tools.disambiguator;
 import java.security.InvalidParameterException;
 import java.util.ArrayList;
 import java.util.List;
-
-import opennlp.tools.disambiguator.ims.IMSParameters;
 import opennlp.tools.util.Span;
 
 /**
@@ -38,14 +36,9 @@ import opennlp.tools.util.Span;
  * 
  * Otherwise for multiple words, you can set a word span instead of simply one
  * index. For the moment the source of sense definitions is from WordNet. *
- * Please see {@link Lesk} for an un-supervised approach. Please see {@link 
IMS}
- * {@link OSCC} for a supervised approach.
  * 
  * Examples on how to use each approach are provided in the test section.
- * 
- * @see Lesk
- * @see IMS
- * @see OSCC
+ *
  */
 public abstract class WSDisambiguator {
 
@@ -59,8 +52,7 @@ public abstract class WSDisambiguator {
   }
 
   /**
-   * @param the
-   *          disambiguation implementation specific parameters.
+   * @param params disambiguation implementation specific parameters.
    * @throws InvalidParameterException
    */
   public void setParams(WSDParameters params) throws InvalidParameterException 
{
@@ -85,8 +77,8 @@ public abstract class WSDisambiguator {
    * 
    * @param tokenizedContext
    * @param tokenTags
+   * @param lemmas
    * @param ambiguousTokenIndexSpan
-   * @param ambiguousTokenLemma
    * @return result as an array of WordNet IDs
    */
   public List<String> disambiguate(String[] tokenizedContext,
@@ -147,7 +139,7 @@ public abstract class WSDisambiguator {
       } else {
 
         if (WSDHelper.getNonRelevWordsDef(tokenTags[i]) != null) {
-          String sense = IMSParameters.SenseSource.WSDHELPER.name() + " "
+          String sense = WSDParameters.SenseSource.WSDHELPER.name() + " "
               + WSDHelper.getNonRelevWordsDef(tokenTags[i]);
           senses.add(sense);
         } else {
@@ -161,7 +153,7 @@ public abstract class WSDisambiguator {
   }
 
   /**
-   * @param WSDSample
+   * @param sample
    * @return result as an array of WordNet IDs
    */
   public abstract String disambiguate(WSDSample sample);

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java?rev=1747175&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorME.java
 Tue Jun  7 09:23:03 2016
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import opennlp.tools.ml.EventTrainer;
+import opennlp.tools.ml.TrainerFactory;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.ObjectStreamUtils;
+import opennlp.tools.util.TrainingParameters;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+public class WSDisambiguatorME extends WSDisambiguator {
+
+  protected WSDModel model;
+
+  protected static WSDContextGenerator cg = new IMSWSDContextGenerator();
+
+  public WSDisambiguatorME(WSDParameters params) {
+    this.params = params;
+  }
+
+  public WSDisambiguatorME(WSDModel model, WSDParameters params) {
+    this.model = model;
+    this.params = params;
+  }
+
+  public WSDModel getModel() {
+    return model;
+  }
+
+  public void setModel(WSDModel model) {
+    this.model = model;
+  }
+
+  public void setParameters(WSDParameters parameters) {
+    this.params = parameters;
+  }
+
+  public static WSDModel train(String lang, ObjectStream<WSDSample> samples,
+    TrainingParameters mlParams, WSDParameters params) throws IOException {
+
+    ArrayList<String> surroundingContext = buildSurroundingContext(samples,
+      ((WSDDefaultParameters) params).getWindowSize());
+
+    HashMap<String, String> manifestInfoEntries = new HashMap<String, 
String>();
+
+    MaxentModel meModel = null;
+
+    ArrayList<Event> events = new ArrayList<Event>();
+    ObjectStream<Event> es = null;
+
+    WSDSample sample = samples.read();
+    String wordTag = "";
+    if (sample != null) {
+      wordTag = sample.getTargetWordTag();
+      do {
+        String sense = sample.getSenseIDs()[0];
+        String[] context = cg
+          .getContext(sample, ((WSDDefaultParameters) params).ngram,
+            ((WSDDefaultParameters) params).windowSize, surroundingContext);
+        Event ev = new Event(sense + "", context);
+        events.add(ev);
+      } while ((sample = samples.read()) != null);
+    }
+
+    es = ObjectStreamUtils.createObjectStream(events);
+    EventTrainer trainer = TrainerFactory
+      .getEventTrainer(mlParams.getSettings(), manifestInfoEntries);
+
+    meModel = trainer.train(es);
+
+    return new WSDModel(lang, wordTag,
+      ((WSDDefaultParameters) params).windowSize,
+      ((WSDDefaultParameters) params).ngram, meModel, surroundingContext,
+      manifestInfoEntries);
+  }
+
+  public static ArrayList<String> buildSurroundingContext(
+    ObjectStream<WSDSample> samples, int windowSize) throws IOException {
+    IMSWSDContextGenerator contextGenerator = new IMSWSDContextGenerator();
+    ArrayList<String> surroundingWordsModel = new ArrayList<String>();
+    WSDSample sample;
+    while ((sample = samples.read()) != null) {
+      String[] words = contextGenerator
+        .extractSurroundingContext(sample.getTargetPosition(),
+          sample.getSentence(), sample.getLemmas(), windowSize);
+
+      if (words.length > 0) {
+        for (String word : words) {
+          surroundingWordsModel.add(word);
+        }
+      }
+    }
+    samples.reset();
+    return surroundingWordsModel;
+  }
+
+  @Override public String disambiguate(WSDSample sample) {
+    if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
+      String wordTag = sample.getTargetWordTag();
+
+      if (model == null || !model.getWordTag()
+        .equals(sample.getTargetWordTag())) {
+
+        String trainingFile =
+          ((WSDDefaultParameters) this.getParams()).getTrainingDataDirectory()
+            + sample.getTargetWordTag();
+
+        File file = new File(trainingFile + ".wsd.model");
+        if (file.exists() && !file.isDirectory()) {
+          try {
+            setModel(new WSDModel(file));
+
+          } catch (InvalidFormatException e) {
+            e.printStackTrace();
+          } catch (IOException e) {
+            e.printStackTrace();
+          }
+
+          String outcome = "";
+
+          String[] context = cg
+            .getContext(sample, ((WSDDefaultParameters) this.params).ngram,
+              ((WSDDefaultParameters) this.params).windowSize,
+              this.model.getContextEntries());
+
+          double[] outcomeProbs = model.getWSDMaxentModel().eval(context);
+          outcome = model.getWSDMaxentModel().getBestOutcome(outcomeProbs);
+
+          if (outcome != null && !outcome.equals("")) {
+
+            return this.getParams().getSenseSource().name() + " " + wordTag
+              .split("\\.")[0] + "%" + outcome;
+
+          } else {
+            MFS mfs = new MFS();
+            return mfs.disambiguate(wordTag);
+          }
+
+        } else {
+
+          MFS mfs = new MFS();
+          return mfs.disambiguate(wordTag);
+        }
+      } else {
+        String outcome = "";
+
+        String[] context = cg
+          .getContext(sample, ((WSDDefaultParameters) this.params).ngram,
+            ((WSDDefaultParameters) this.params).windowSize,
+            this.model.getContextEntries());
+
+        double[] outcomeProbs = model.getWSDMaxentModel().eval(context);
+        outcome = model.getWSDMaxentModel().getBestOutcome(outcomeProbs);
+
+        if (outcome != null && !outcome.equals("")) {
+
+          return this.getParams().getSenseSource().name() + " " + wordTag
+            .split("\\.")[0] + "%" + outcome;
+        } else {
+
+          MFS mfs = new MFS();
+          return mfs.disambiguate(wordTag);
+        }
+      }
+    } else {
+
+      if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
+        return WSDParameters.SenseSource.WSDHELPER.name() + " " + sample
+          .getTargetTag();
+      } else {
+        return null;
+      }
+
+    }
+
+  }
+
+  /**
+   * The IMS disambiguation method for a single word
+   *
+   * @param tokenizedContext : the text containing the word to disambiguate
+   * @param tokenTags        : the tags corresponding to the context
+   * @param lemmas           : the lemmas of ALL the words in the context
+   * @param index            : the index of the word to disambiguate
+   * @return an array of the senses of the word to disambiguate
+   */
+  public String disambiguate(String[] tokenizedContext, String[] tokenTags,
+    String[] lemmas, int index) {
+    return disambiguate(
+      new WSDSample(tokenizedContext, tokenTags, lemmas, index));
+  }
+
+}

Copied: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java
 (from r1746846, 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java)
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java?p2=opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java&p1=opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java&r1=1746846&r2=1747175&rev=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java
 Tue Jun  7 09:23:03 2016
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package opennlp.tools.disambiguator.ims;
+package opennlp.tools.disambiguator;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -43,7 +43,7 @@ public class WTDIMS {
   protected String[] features;
 
   public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
-      int wordIndex) {
+    int wordIndex) {
     this.sentence = sentence;
     this.posTags = posTags;
     this.wordIndex = wordIndex;
@@ -51,7 +51,7 @@ public class WTDIMS {
   }
 
   public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
-      int wordIndex, String[] senseIDs) {
+    int wordIndex, String[] senseIDs) {
     this.sentence = sentence;
     this.posTags = posTags;
     this.wordIndex = wordIndex;
@@ -61,7 +61,7 @@ public class WTDIMS {
   }
 
   public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
-      String word, String[] senseIDs) {
+    String word, String[] senseIDs) {
     super();
 
     this.sentence = sentence;
@@ -148,16 +148,16 @@ public class WTDIMS {
 
     if ((WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()]) != null)) {
       if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
-          .equals(POS.VERB)) {
+        .equals(POS.VERB)) {
         ref = wordBaseForm + ".v";
       } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
-          .equals(POS.NOUN)) {
+        .equals(POS.NOUN)) {
         ref = wordBaseForm + ".n";
       } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
-          .equals(POS.ADJECTIVE)) {
+        .equals(POS.ADJECTIVE)) {
         ref = wordBaseForm + ".a";
       } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
-          .equals(POS.ADVERB)) {
+        .equals(POS.ADVERB)) {
         ref = wordBaseForm + ".r";
       }
     }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
 Tue Jun  7 09:23:03 2016
@@ -64,9 +64,9 @@ public class SemcorReaderExtended {
 
   private static final String ELEMENT_PUNCTUATION = "punc";
 
-  private static String semcorDirectory = "src\\test\\resources\\semcor3.0\\";
+  private static String semcorDirectory = "src/test/resources/semcor3.0/";
   private static String[] folders = { "brown1", "brown2", "brownv" };
-  private static String tagfiles = "\\tagfiles\\";
+  private static String tagfiles = "/tagfiles/";
 
   
   public static String getSemcorDirectory() {

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
 Tue Jun  7 09:23:03 2016
@@ -47,7 +47,7 @@ import opennlp.tools.util.ObjectStreamUt
  */
 public class SensevalReader {
 
-  protected String sensevalDirectory = "src\\test\\resources\\senseval3\\";
+  protected String sensevalDirectory = "src/test/resources/senseval3/";
 
   protected String data = sensevalDirectory + "EnglishLS.train";
   protected String sensemapFile = sensevalDirectory + "EnglishLS.sensemap";
@@ -72,7 +72,7 @@ public class SensevalReader {
   /**
    * This extracts the equivalent senses. This serves in the case of the
    * coarse-grained disambiguation
-   * 
+   *
    * @param sensemapFile
    *          the file containing the equivalent senses, each set of equivalent
    *          senses per line

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
 Tue Jun  7 09:23:03 2016
@@ -21,6 +21,7 @@ package opennlp.tools.disambiguator.data
 
 import opennlp.tools.disambiguator.WSDHelper;
 
+// TODO extend Word from Wordnet
 public class Word {
 
   public static enum Type {

Propchange: opennlp/sandbox/opennlp-wsd/src/test/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Jun  7 09:23:03 2016
@@ -0,0 +1 @@
+resources

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
 Tue Jun  7 09:23:03 2016
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -32,7 +32,7 @@ public class LeskEvaluatorTest {
   @Test
   public static void main(String[] args) {
     WSDHelper.print("Evaluation Started");
-    String modelsDir = "src\\test\\resources\\models\\";
+    String modelsDir = "src/test/resources/models/";
     WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
     WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
     WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
 Tue Jun  7 09:23:03 2016
@@ -41,7 +41,7 @@ import org.junit.Test;
 public class LeskTester {
   // TODO write more tests
 
-  static String modelsDir = "src\\test\\resources\\models\\";
+  static String modelsDir = "src/test/resources/models/";
 
   static Lesk lesk;
 
@@ -131,7 +131,7 @@ public class LeskTester {
     List<String> senses = lesk.disambiguate(sentence2, tags2, lemmas2, span);
 
     assertEquals("Check number of returned words", 5, senses.size());
-    assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01:: 4.8",
+    assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01:: 3.8",
         senses.get(0));
     assertEquals("Check 'radioactive' sense ID",
         "WORDNET radioactive%3:00:00:: 6.0", senses.get(1));

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
 Tue Jun  7 09:23:03 2016
@@ -33,7 +33,7 @@ public class MFSEvaluatorTest {
   @Test
   public static void main(String[] args) {
     WSDHelper.print("Evaluation Started");
-    String modelsDir = "src\\test\\resources\\models\\";
+    String modelsDir = "src/test/resources/models/";
     WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
     WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
     WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java?rev=1747175&r1=1747174&r2=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
 Tue Jun  7 09:23:03 2016
@@ -41,7 +41,7 @@ public class MFSTester {
   // TODO write more tests
   // TODO modify when we fix the parameter model
 
-  static String modelsDir = "src\\test\\resources\\models\\";
+  static String modelsDir = "src/test/resources/models/";
 
   static MFS mfs;
 

Added: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java?rev=1747175&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDEvaluatorTest.java
 Tue Jun  7 09:23:03 2016
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
+import opennlp.tools.disambiguator.datareader.SensevalReader;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
+// TODO improve the tests improve parameters
+public class WSDEvaluatorTest {
+
+  static SensevalReader seReader;
+
+  static String modelsDir = "src/test/resources/models/";
+  static String trainingDataDirectory = 
"src/test/resources/supervised/models/";
+
+  static WSDDefaultParameters params = new WSDDefaultParameters("");
+  static WSDisambiguatorME wsdME;
+  static WSDModel model;
+
+  static ArrayList<String> testWords;
+
+  /*
+   * Setup the testing variables
+   */
+  public static void setUpAndTraining() {
+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+
+    seReader = new SensevalReader();
+    testWords = seReader.getSensevalWords();
+    params = new WSDDefaultParameters("");
+    params.setTrainingDataDirectory(trainingDataDirectory);
+
+    TrainingParameters trainingParams = new TrainingParameters();
+    SemcorReaderExtended sr = new SemcorReaderExtended();
+
+    WSDHelper.print("Training Started");
+    for (String word : testWords) {
+      // don't take verbs because they are not from WordNet
+      if (!word.split("\\.")[1].equals("v")) {
+
+        ArrayList<WSDSample> instances = seReader.getSensevalData(word);
+        if (instances != null && instances.size() > 1) {
+          WSDHelper.print("------------------" + word + "------------------");
+          ObjectStream<WSDSample> sampleStream = sr.getSemcorDataStream(word);
+
+          WSDModel writeModel = null;
+    /*
+     * Tests training the disambiguator We test both writing and reading a 
model
+     * file trained by semcor
+     */
+          File outFile;
+          try {
+            writeModel = WSDisambiguatorME
+              .train("en", sampleStream, trainingParams, params);
+            assertNotNull("Checking the model to be written", writeModel);
+            writeModel.writeModel(params.getTrainingDataDirectory() + word);
+            outFile = new File(
+              params.getTrainingDataDirectory() + word + ".wsd.model");
+            model = new WSDModel(outFile);
+            assertNotNull("Checking the read model", model);
+            wsdME = new WSDisambiguatorME(model, params);
+            assertNotNull("Checking the disambiguator", wsdME);
+          } catch (IOException e1) {
+            e1.printStackTrace();
+            fail("Exception in training");
+          }
+        }
+      }
+    }
+  }
+
+  public static void disambiguationEval() {
+
+    WSDHelper.print("Evaluation Started");
+
+    for (String word : testWords) {
+      WSDEvaluator evaluator = new WSDEvaluator(wsdME);
+
+      // don't take verbs because they are not from WordNet
+      if (!word.split("\\.")[1].equals("v")) {
+
+        ArrayList<WSDSample> instances = seReader.getSensevalData(word);
+        if (instances != null && instances.size() > 1) {
+          WSDHelper.print("------------------" + word + "------------------");
+          for (WSDSample instance : instances) {
+            if (instance.getSenseIDs() != null && !instance.getSenseIDs()[0]
+              .equals("null")) {
+              evaluator.evaluateSample(instance);
+            }
+          }
+          WSDHelper.print(evaluator.toString());
+        } else {
+          WSDHelper.print("null instances");
+        }
+      }
+
+    }
+  }
+
+  public static void main(String[] args) {
+    setUpAndTraining();
+    disambiguationEval();
+  }
+}

Copied: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java
 (from r1746846, 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java)
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java?p2=opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java&p1=opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java&r1=1746846&r2=1747175&rev=1747175&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/WSDTester.java
 Tue Jun  7 09:23:03 2016
@@ -1,40 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 package opennlp.tools.disambiguator;
 
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
-import opennlp.tools.disambiguator.ims.IMSME;
-import opennlp.tools.disambiguator.ims.IMSParameters;
-
-public class Tester {
-
-  public static void main(String[] args) {
-
-    String modelsDir = "src\\test\\resources\\models\\";
+import opennlp.tools.util.ObjectStream;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
+import opennlp.tools.util.Span;
+import opennlp.tools.util.TrainingParameters;
+
+/**
+ * This is the test class for {@link WSDisambiguatorME}.
+ * <p/>
+ * The scope of this test is to make sure that the WSDisambiguatorME code can 
be
+ * executed. This test can not detect mistakes which lead to incorrect feature
+ * generation or other mistakes which decrease the disambiguation performance 
of
+ * the disambiguator.
+ * <p/>
+ * In this test the {@link WSDisambiguatorME} is trained with Semcor
+ * and then the computed model is used to predict sentences
+ * from the training sentences.
+ */
+
+public class WSDTester {
+  // TODO write more tests
+  // TODO modify when we fix the parameter model
+
+  static String modelsDir = "src/test/resources/models/";
+  static String trainingDataDirectory = 
"src/test/resources/supervised/models/";
+
+  static WSDDefaultParameters params;
+  static WSDisambiguatorME wsdME;
+  static WSDModel model;
+
+  static String test = "please.v";
+  static File outFile;
+
+  static String test1 = "We need to discuss an important topic, please write 
to me soon.";
+  static String test2 = "The component was highly radioactive to the point 
that"
+    + " it has been activated the second it touched water";
+  static String test3 = "The summer is almost over and I did not go to the 
beach even once";
+
+  static String[] sentence1;
+  static String[] sentence2;
+  static String[] sentence3;
+
+  static String[] tags1;
+  static String[] tags2;
+  static String[] tags3;
+
+  static String[] lemmas1;
+  static String[] lemmas2;
+  static String[] lemmas3;
+
+  /*
+   * Setup the testing variables
+   */
+  @BeforeClass public static void setUpAndTraining() {
     WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
     WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
     WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
 
-    IMSME ims = new IMSME(new IMSParameters("\\"));
+    sentence1 = WSDHelper.getTokenizer().tokenize(test1);
+    sentence2 = WSDHelper.getTokenizer().tokenize(test2);
+    sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+
+    tags1 = WSDHelper.getTagger().tag(sentence1);
+    tags2 = WSDHelper.getTagger().tag(sentence2);
+    tags3 = WSDHelper.getTagger().tag(sentence3);
+
+    List<String> tempLemmas1 = new ArrayList<String>();
+    for (int i = 0; i < sentence1.length; i++) {
+      tempLemmas1
+        .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]));
+    }
+    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
+
+    List<String> tempLemmas2 = new ArrayList<String>();
+    for (int i = 0; i < sentence2.length; i++) {
+      tempLemmas2
+        .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]));
+    }
+    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
 
-    String test3 = "The summer is almost over and I haven't been to the beach 
even once";
-    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
-    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
     List<String> tempLemmas3 = new ArrayList<String>();
     for (int i = 0; i < sentence3.length; i++) {
-      String lemma = WSDHelper.getLemmatizer().lemmatize(sentence3[i],
-          tags3[i]);
-      tempLemmas3.add(lemma);
+      tempLemmas3
+        .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]));
     }
-    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
 
-    // output
-    List<String> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
-    for (int i = 0; i < sentence3.length; i++) {
-      System.out.print(sentence3[i] + " : ");
-      WSDHelper.printResults(ims, senses3.get(i));
-      WSDHelper.print("----------");
+    params = new WSDDefaultParameters("");
+    params.setTrainingDataDirectory(trainingDataDirectory);
+    TrainingParameters trainingParams = new TrainingParameters();
+    SemcorReaderExtended sr = new SemcorReaderExtended();
+    ObjectStream<WSDSample> sampleStream = sr.getSemcorDataStream(test);
+
+    WSDModel writeModel = null;
+    /*
+     * Tests training the disambiguator We test both writing and reading a 
model
+     * file trained by semcor
+     */
+
+    try {
+      writeModel = WSDisambiguatorME
+        .train("en", sampleStream, trainingParams, params);
+      assertNotNull("Checking the model to be written", writeModel);
+      writeModel.writeModel(params.getTrainingDataDirectory() + test);
+      outFile = new File(
+        params.getTrainingDataDirectory() + test + ".wsd.model");
+      model = new WSDModel(outFile);
+      assertNotNull("Checking the read model", model);
+      wsdME = new WSDisambiguatorME(model, params);
+      assertNotNull("Checking the disambiguator", wsdME);
+    } catch (IOException e1) {
+      e1.printStackTrace();
+      fail("Exception in training");
     }
+  }
+
+  /*
+   * Tests disambiguating only one word : The ambiguous word "please"
+   */
+  @Test public void testOneWordDisambiguation() {
+    String sense = wsdME.disambiguate(sentence1, tags1, lemmas1, 8);
+    assertEquals("Check 'please' sense ID", "WORDNET please%2:37:00::", sense);
+  }
 
+  /*
+   * Tests disambiguating a word Span In this case we test a mix of monosemous
+   * and polysemous words as well as words that do not need disambiguation such
+   * as determiners
+   */
+  @Test public void testWordSpanDisambiguation() {
+    Span span = new Span(3, 7);
+    List<String> senses = wsdME.disambiguate(sentence2, tags2, lemmas2, span);
+
+    assertEquals("Check number of returned words", 5, senses.size());
+    assertEquals("Check 'highly' sense ID", "WORDNET highly%4:02:01::",
+      senses.get(0));
+    assertEquals("Check 'radioactive' sense ID",
+      "WORDNET radioactive%3:00:00::", senses.get(1));
+    assertEquals("Check preposition", "WSDHELPER to", senses.get(2));
+    assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3));
   }
-}
\ No newline at end of file
+
+  /*
+   * Tests disambiguating all the words
+   */
+  @Test public void testAllWordsDisambiguation() {
+    List<String> senses = wsdME.disambiguate(sentence3, tags3, lemmas3);
+
+    assertEquals("Check number of returned words", 15, senses.size());
+    assertEquals("Check preposition", "WSDHELPER personal pronoun",
+      senses.get(6));
+  }
+
+}



Reply via email to