im...

joern Thu, 20 Aug 2015 15:02:38 -0700

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
 Thu Aug 20 22:01:59 2015
@@ -1,191 +1 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import net.sf.extjwnl.data.POS;
-
-public class WordToDisambiguate {
-
-  // TODO Check if it is necessary to add an attribute [word] since the word in
-  // the sentence is not necessarily in the base form ??
-
-  protected String[] sentence;
-  protected String[] posTags;
-
-  protected int wordIndex;
-
-  protected int sense;
-
-  protected ArrayList<String> senseIDs;
-
-  public WordToDisambiguate(String[] sentence, int wordIndex)
-      throws IllegalArgumentException {
-    super();
-
-    if (wordIndex > sentence.length) {
-      throw new IllegalArgumentException("The index is out of bounds !");
-    }
-
-    this.sentence = sentence;
-    this.posTags = WSDHelper.getTagger().tag(sentence);
-
-    this.wordIndex = wordIndex;
-
-    this.sense = -1;
-  }
-
-  public WordToDisambiguate(String[] sentence, int wordIndex, int sense)
-      throws IllegalArgumentException {
-    super();
-
-    if (wordIndex > sentence.length) {
-      throw new IllegalArgumentException("The index is out of bounds !");
-    }
-
-    this.sentence = sentence;
-    this.posTags = WSDHelper.getTagger().tag(sentence);
-
-    this.wordIndex = wordIndex;
-
-    this.sense = sense;
-  }
-
-  public WordToDisambiguate(String[] sentence, int wordIndex,
-      ArrayList<String> senseIDs) throws IllegalArgumentException {
-    super();
-
-    if (wordIndex > sentence.length) {
-      throw new IllegalArgumentException("The index is out of bounds !");
-    }
-
-    this.sentence = sentence;
-    this.posTags = WSDHelper.getTagger().tag(sentence);
-
-    this.wordIndex = wordIndex;
-
-    this.senseIDs = senseIDs;
-  }
-
-  public WordToDisambiguate(String[] sentence, String[] tokenTags, int 
wordIndex) {
-    this(sentence, wordIndex, -1);
-  }
-
-  public WordToDisambiguate() {
-    String[] emptyString = {};
-    int emptyInteger = 0;
-
-    this.sentence = emptyString;
-    this.wordIndex = emptyInteger;
-    this.sense = -1;
-
-  }
-
-  // Sentence
-  public String[] getSentence() {
-    return sentence;
-  }
-
-  public void setSentence(String[] sentence) {
-    this.sentence = sentence;
-  }
-
-  // Sentence Pos-Tags
-  public String[] getPosTags() {
-    return posTags;
-  }
-
-  public void setPosTags(String[] posTags) {
-    this.posTags = posTags;
-  }
-
-  // Word to disambiguate
-  public int getWordIndex() {
-    return wordIndex;
-  }
-
-  public String getRawWord() {
-
-    String wordBaseForm = WSDHelper.getLemmatizer().lemmatize(
-        this.sentence[wordIndex], this.posTags[wordIndex]);
-
-    String ref = "";
-
-    if ((WSDHelper.getPOS(this.posTags[wordIndex]) != null)) {
-      if (WSDHelper.getPOS(this.posTags[wordIndex]).equals(POS.VERB)) {
-        ref = wordBaseForm + ".v";
-      } else if (WSDHelper.getPOS(this.posTags[wordIndex]).equals(POS.NOUN)) {
-        ref = wordBaseForm + ".n";
-      } else if (WSDHelper.getPOS(this.posTags[wordIndex])
-          .equals(POS.ADJECTIVE)) {
-        ref = wordBaseForm + ".a";
-      } else if (WSDHelper.getPOS(this.posTags[wordIndex]).equals(POS.ADVERB)) 
{
-        ref = wordBaseForm + ".r";
-      }
-
-    }
-
-    return ref;
-  }
-
-  public String getWord() {
-    return this.sentence[this.wordIndex];
-  }
-
-  public String getPosTag() {
-    return this.posTags[this.wordIndex];
-  }
-
-  public void setWordIndex(int wordIndex) {
-    this.wordIndex = wordIndex;
-  }
-
-  // Word to disambiguate sense
-  public int getSense() {
-    return sense;
-  }
-
-  public void setSense(int sense) {
-    this.sense = sense;
-  }
-
-  // Sense as in the source
-  // TODO fix the conflict between this ID of the sense and that in the
-  // attribute [sense]
-  public ArrayList<String> getSenseIDs() {
-    return senseIDs;
-  }
-
-  public void setSenseIDs(ArrayList<String> senseIDs) {
-    this.senseIDs = senseIDs;
-  }
-
-  public String toString() {
-    return (wordIndex + "\t" + getWord() + "\n" + sentence);
-  }
-
-  public void print() {
-    WSDHelper.print("Sentence:  " + Arrays.asList(sentence) + "\n" + "Index: "
-        + wordIndex + "\n" + "Word: " + getWord() + "\n" + "Sense ID: "
-        + senseIDs.get(0));
-  }
-}
+// TODO to be removed
\ No newline at end of file


Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/contextclustering/ContextClusterer.java
 Thu Aug 20 22:01:59 2015
@@ -20,6 +20,7 @@
 package opennlp.tools.disambiguator.contextclustering;
 
 import java.security.InvalidParameterException;
+import java.util.List;
 
 import opennlp.tools.disambiguator.WSDParameters;
 import opennlp.tools.disambiguator.WSDSample;
@@ -32,7 +33,7 @@ import opennlp.tools.util.Span;
  * 
  * This implementation is based on {@link http://nlp.cs.rpi.edu/paper/wsd.pdf}
  */
-public class ContextClusterer implements WSDisambiguator {
+public class ContextClusterer extends WSDisambiguator {
 
   protected ContextClustererParameters params;
 
@@ -56,14 +57,7 @@ public class ContextClusterer implements
 
   @Override
   public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      int ambiguousTokenIndex, String ambiguousTokenLemma) {
-    // TODO Auto-generated method stub
-    return null;
-  }
-
-  @Override
-  public String[][] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      Span ambiguousTokenIndexSpan, String ambiguousTokenLemma) {
+      String[] lemmas, int ambiguousTokenIndex) {
     // TODO Auto-generated method stub
     return null;
   }
@@ -74,10 +68,6 @@ public class ContextClusterer implements
     return null;
   }
 
-  @Override
-  public String[] disambiguate(String[] inputText, int inputWordIndex) {
-    // TODO Auto-generated method stub
-    return null;
-  }
+
 
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SemcorReaderExtended.java
 Thu Aug 20 22:01:59 2015
@@ -25,7 +25,8 @@ import java.util.ArrayList;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 
-import opennlp.tools.disambiguator.WordToDisambiguate;
+import opennlp.tools.disambiguator.WSDHelper;
+import opennlp.tools.disambiguator.WSDSample;
 
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
@@ -175,10 +176,10 @@ public class SemcorReaderExtended {
     return result;
   }
 
-  public ArrayList<WordToDisambiguate> getSemcorOneFileData(String file,
+  public ArrayList<WSDSample> getSemcorOneFileData(String file,
       String wordTag) {
 
-    ArrayList<WordToDisambiguate> setInstances = new 
ArrayList<WordToDisambiguate>();
+    ArrayList<WSDSample> setInstances = new ArrayList<WSDSample>();
 
     try {
 
@@ -223,8 +224,15 @@ public class SemcorReaderExtended {
             }
 
             if (!senses.isEmpty()) {
-              WordToDisambiguate wtd = new WordToDisambiguate(
-                  sentence.split("\\s"), index, senses);
+              String[] words = sentence.split("\\s");
+              String[] tags = WSDHelper.getTagger().tag(words);
+              String[] lemmas = new String[words.length];
+              
+              for (int i = 0; i < words.length; i++) {
+                lemmas[i] = WSDHelper.getLemmatizer().lemmatize(words[i], 
tags[i]);
+              }
+              
+              WSDSample wtd = new WSDSample(words, tags, lemmas, index, 
senses);
               setInstances.add(wtd);
             }
 
@@ -253,10 +261,9 @@ public class SemcorReaderExtended {
    *          The word, of which we are looking for the instances
    * @return the list of the {@link WordToDisambiguate} instances
    */
-  public ArrayList<WordToDisambiguate> getSemcorFolderData(String folder,
-      String wordTag) {
+  public ArrayList<WSDSample> getSemcorFolderData(String folder, String 
wordTag) {
 
-    ArrayList<WordToDisambiguate> result = new ArrayList<WordToDisambiguate>();
+    ArrayList<WSDSample> result = new ArrayList<WSDSample>();
 
     String directory = path + folder + tagfiles;
     File tempFolder = new File(directory);
@@ -266,7 +273,7 @@ public class SemcorReaderExtended {
       listOfFiles = tempFolder.listFiles();
       for (File file : listOfFiles) {
 
-        ArrayList<WordToDisambiguate> list = getSemcorOneFileData(directory
+        ArrayList<WSDSample> list = getSemcorOneFileData(directory
             + file.getName(), wordTag);
         result.addAll(list);
       }
@@ -285,12 +292,12 @@ public class SemcorReaderExtended {
    * @return the list of the {@link WordToDisambiguate} instances of the word 
to
    *         disambiguate
    */
-  public ArrayList<WordToDisambiguate> getSemcorData(String wordTag) {
+  public ArrayList<WSDSample> getSemcorData(String wordTag) {
 
-    ArrayList<WordToDisambiguate> result = new ArrayList<WordToDisambiguate>();
+    ArrayList<WSDSample> result = new ArrayList<WSDSample>();
 
     for (String folder : folders) {
-      ArrayList<WordToDisambiguate> list = getSemcorFolderData(folder, 
wordTag);
+      ArrayList<WSDSample> list = getSemcorFolderData(folder, wordTag);
       result.addAll(list);
     }
 

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/SensevalReader.java
 Thu Aug 20 22:01:59 2015
@@ -36,8 +36,8 @@ import org.w3c.dom.Element;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 
-import opennlp.tools.disambiguator.WordToDisambiguate;
-import opennlp.tools.disambiguator.ims.WTDIMS;
+import opennlp.tools.disambiguator.WSDHelper;
+import opennlp.tools.disambiguator.WSDSample;
 
 /**
  * This class handles the extraction of Senseval-3 data from the different 
files
@@ -52,19 +52,6 @@ public class SensevalReader {
   protected String sensemapFile = sensevalDirectory + "EnglishLS.sensemap";
   protected String wordList = sensevalDirectory + "EnglishLS.train.key";
 
-  // protected String dict = sensevalDirectory + "EnglishLS.dictionary.xml";
-  // protected String map = sensevalDirectory + "EnglishLS.sensemap";
-
-  /**
-   * The XML file of Senseval presents some issues that need to be fixed first
-   */
-  private String fixXmlFile() {
-
-    // TODO fix this !
-
-    return null;
-  }
-
   public SensevalReader() {
     super();
   }
@@ -157,9 +144,9 @@ public class SensevalReader {
    * @return the list of the {@link WordToDisambiguate} instances of the word 
to
    *         disambiguate
    */
-  public ArrayList<WordToDisambiguate> getSensevalData(String wordTag) {
+  public ArrayList<WSDSample> getSensevalData(String wordTag) {
 
-    ArrayList<WordToDisambiguate> setInstances = new 
ArrayList<WordToDisambiguate>();
+    ArrayList<WSDSample> setInstances = new ArrayList<WSDSample>();
 
     try {
 
@@ -188,28 +175,7 @@ public class SensevalReader {
               Node nInstance = nInstances.item(j);
 
               if (nInstance.getNodeType() == Node.ELEMENT_NODE) {
-
-                Element eInstance = (Element) nInstance;
-
-                String[] wordPos = eLexelt.getAttribute("item").split("\\.");
-                String word = wordPos[0]; // Word
-                String tag; // Part of Speech
-
-                if (wordPos[1].equals("n")) {
-                  tag = "noun";
-                } else if (wordPos[1].equals("v")) {
-                  tag = "verb";
-                } else if (wordPos[1].equals("a")) {
-                  tag = "adjective";
-                } else {
-                  tag = "adverb";
-                }
-
-                String id = eInstance.getAttribute("id");
-                String source = eInstance.getAttribute("docsrc");
-
-                ArrayList<String> answers = new ArrayList<String>();
-                String sentence = "";
+                ArrayList<String> senseIDs = new ArrayList<String>();
                 String rawWord = "";
                 String[] finalText = null;
                 int index = 0;
@@ -227,11 +193,10 @@ public class SensevalReader {
 
                     String temp = senseid;
                     // String[] temp = { answer, senseid };
-                    answers.add(temp);
+                    senseIDs.add(temp);
                   }
 
                   if (nChild.getNodeName().equals("context")) {
-                    sentence = ((Element) nChild).getTextContent();
 
                     if (nChild.hasChildNodes()) {
                       String textBefore = nChild.getChildNodes().item(0)
@@ -272,9 +237,19 @@ public class SensevalReader {
 
                 }
 
-                WTDIMS wordToDisambiguate = new WTDIMS(finalText, index,
-                    answers);
-                setInstances.add(wordToDisambiguate);
+                String[] words = finalText;
+                String[] tags = WSDHelper.getTagger().tag(words);
+                String[] lemmas = new String[words.length];
+
+                for (int k = 0; k < words.length; k++) {
+                  lemmas[k] = WSDHelper.getLemmatizer().lemmatize(words[k],
+                      tags[k]);
+                }
+
+                WSDSample wtd = new WSDSample(words, tags, lemmas, index,
+                    senseIDs);
+                setInstances.add(wtd);
+
               }
             }
 

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
 Thu Aug 20 22:01:59 2015
@@ -46,13 +46,12 @@ import java.util.zip.GZIPInputStream;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.Span;
 import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.disambiguator.FeaturesExtractor;
+import opennlp.tools.disambiguator.WSDHelper;
 import opennlp.tools.disambiguator.WSDParameters;
 import opennlp.tools.disambiguator.WSDSample;
 import opennlp.tools.disambiguator.WSDisambiguator;
-import opennlp.tools.disambiguator.WordToDisambiguate;
 import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
 import opennlp.tools.disambiguator.datareader.SensevalReader;
 import opennlp.tools.disambiguator.mfs.MFS;
@@ -70,7 +69,7 @@ import opennlp.tools.disambiguator.mfs.M
  * check {@link https://www.comp.nus.edu.sg/~nght/pubs/ims.pdf} for details
  * about this approach
  */
-public class IMS implements WSDisambiguator {
+public class IMS extends WSDisambiguator {
 
   public IMSParameters parameters;
 
@@ -244,8 +243,6 @@ public class IMS implements WSDisambigua
       e.printStackTrace();
     }
 
-    System.out.println("Done");
-
   }
 
   private void extractFeature(WTDIMS word) {
@@ -344,15 +341,15 @@ public class IMS implements WSDisambigua
   }
 
   /**
-   * The disambiguation method for a single word
+   * The disambiguation method for a single word, it requires as input one
+   * object of type WTDIMS
    * 
    * @param inputText
    *          : the text containing the word to disambiguate
    * @param inputWordIndex
    *          : the index of the word to disambiguate
    */
-  @Override
-  public String[] disambiguate(String[] inputText, int inputWordIndex) {
+  public String[] disambiguate(WTDIMS wordToDisambiguate) {
 
     String trainingDataDirectory = IMSParameters.trainingDataDirectory;
 
@@ -362,11 +359,10 @@ public class IMS implements WSDisambigua
       file.mkdirs();
     }
 
-    WTDIMS word = new WTDIMS(inputText, inputWordIndex);
-    fExtractor.extractIMSFeatures(word, this.parameters.getWindowSize(),
-        this.parameters.getNgram());
+    fExtractor.extractIMSFeatures(wordToDisambiguate,
+        this.parameters.getWindowSize(), this.parameters.getNgram());
 
-    String wordTag = word.getWordTag();
+    String wordTag = wordToDisambiguate.getWordTag();
 
     String wordTrainingbinFile = trainingDataDirectory + wordTag + ".gz";
 
@@ -378,10 +374,10 @@ public class IMS implements WSDisambigua
     if (bf.exists() && !bf.isDirectory()) {
       // If the trained model exists
       ArrayList<String> surrWords = getAllSurroundingWords(wordTag);
-      fExtractor.serializeIMSFeatures(word, surrWords);
+      fExtractor.serializeIMSFeatures(wordToDisambiguate, surrWords);
 
       loadedMaxentModel = load(wordTrainingbinFile);
-      String[] context = cg.getContext(word);
+      String[] context = cg.getContext(wordToDisambiguate);
 
       double[] outcomeProbs = loadedMaxentModel.eval(context);
       outcome = loadedMaxentModel.getBestOutcome(outcomeProbs);
@@ -389,10 +385,10 @@ public class IMS implements WSDisambigua
     } else {
       // Depending on the source, go fetch the training data
       ArrayList<WTDIMS> trainingInstances = new ArrayList<WTDIMS>();
-      switch (this.parameters.getSource().code) {
-      case 1: {
+      switch (this.parameters.getTrainingSource()) {
+      case SEMCOR: {
         SemcorReaderExtended sReader = new SemcorReaderExtended();
-        for (WordToDisambiguate ti : sReader.getSemcorData(wordTag)) {
+        for (WSDSample ti : sReader.getSemcorData(wordTag)) {
           WTDIMS imsIT = new WTDIMS(ti);
           extractFeature(imsIT);
           trainingInstances.add(imsIT);
@@ -400,17 +396,17 @@ public class IMS implements WSDisambigua
         break;
       }
 
-      case 2: {
+      case SEMEVAL: {
         SensevalReader sReader = new SensevalReader();
-        for (WordToDisambiguate ti : sReader.getSensevalData(wordTag)) {
-          WTDIMS imsIT = (WTDIMS) ti;
+        for (WSDSample ti : sReader.getSensevalData(wordTag)) {
+          WTDIMS imsIT = new WTDIMS(ti);
           extractFeature(imsIT);
           trainingInstances.add(imsIT);
         }
         break;
       }
 
-      case 3: {
+      case OTHER: {
         // TODO check the case when the user selects his own data set (make an
         // interface to collect training data)
         break;
@@ -423,11 +419,11 @@ public class IMS implements WSDisambigua
 
         ArrayList<String> surrWords = getAllSurroundingWords(wordTag);
 
-        fExtractor.serializeIMSFeatures(word, surrWords);
+        fExtractor.serializeIMSFeatures(wordToDisambiguate, surrWords);
 
         bf = new File(wordTrainingbinFile);
         loadedMaxentModel = load(wordTrainingbinFile);
-        String[] context = cg.getContext(word);
+        String[] context = cg.getContext(wordToDisambiguate);
 
         double[] outcomeProbs = loadedMaxentModel.eval(context);
         outcome = loadedMaxentModel.getBestOutcome(outcomeProbs);
@@ -437,11 +433,8 @@ public class IMS implements WSDisambigua
 
     if (!outcome.equals("")) {
 
-      // System.out.println("The sense is [" + outcome + "] : " /*+
-      // 
Loader.getDictionary().getWordBySenseKey(outcome.split("%")[1]).getSynset().getGloss()*/);
-
-      outcome = parameters.source.name() + " " + wordTag.split("\\.")[0] + "%"
-          + outcome;
+      outcome = parameters.getSenseSource().name() + " "
+          + wordTag.split("\\.")[0] + "%" + outcome;
 
       String[] s = { outcome };
 
@@ -449,29 +442,63 @@ public class IMS implements WSDisambigua
 
     } else {
       // if no training data exist
-      return MFS.getMostFrequentSense(word);
+      MFS mfs = new MFS();
+      return mfs.disambiguate(wordTag);
     }
 
   }
 
   @Override
-  public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      int ambiguousTokenIndex, String ambiguousTokenLemma) {
-    // TODO Update
-    return null;
-  }
+  public String[] disambiguate(WSDSample sample) {
+    if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
+      WTDIMS wordToDisambiguate = new WTDIMS(sample);
+      return disambiguate(wordToDisambiguate);
+
+    } else {
+      if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
+        String s = IMSParameters.SenseSource.WSDHELPER.name() + " "
+            + sample.getTargetTag();
+        String[] sense = { s };
+        return sense;
+      } else {
+        return null;
+      }
+    }
 
-  @Override
-  public String[][] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      Span ambiguousTokenIndexSpan, String ambiguousTokenLemma) {
-    // TODO Update
-    return null;
   }
 
-  @Override
-  public String[] disambiguate(WSDSample sample) {
-    // TODO Update
-    return null;
+  /**
+   * The IMS disambiguation method for a single word
+   * 
+   * @param tokenizedContext
+   *          : the text containing the word to disambiguate
+   * @param tokenTags
+   *          : the tags corresponding to the context
+   * @param lemmas
+   *          : the lemmas of ALL the words in the context
+   * @param index
+   *          : the index of the word to disambiguate
+   * @return an array of the senses of the word to disambiguate
+   */
+  public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
+      String[] lemmas, int index) {
+
+    if (WSDHelper.isRelevantPOSTag(tokenTags[index])) {
+      WTDIMS wordToDisambiguate = new WTDIMS(tokenizedContext, tokenTags,
+          lemmas, index);
+      return disambiguate(wordToDisambiguate);
+
+    } else {
+      if (WSDHelper.getNonRelevWordsDef(tokenTags[index]) != null) {
+        String s = IMSParameters.SenseSource.WSDHELPER.name() + " "
+            + tokenTags[index];
+        String[] sense = { s };
+        return sense;
+      } else {
+        return null;
+      }
+    }
+
   }
 
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
 Thu Aug 20 22:01:59 2015
@@ -0,0 +1 @@
+// TODO To be removed
\ No newline at end of file

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
 Thu Aug 20 22:01:59 2015
@@ -29,22 +29,9 @@ import opennlp.tools.disambiguator.WSDPa
  */
 public class IMSParameters extends WSDParameters {
 
-  public static enum Source {
-    SEMCOR(1, "semcor"), SEMEVAL(2, "semeval"), OTHER(3, "other");
-
-    public int code;
-    public String src;
-
-    private Source(int code, String src) {
-      this.code = code;
-      this.src = src;
-    }
-  }
-
   protected String languageCode;
   protected int windowSize;
   protected int ngram;
-  protected Source source;
 
   public static final String resourcesFolder = "src\\test\\resources\\";
   public static final String trainingDataDirectory = resourcesFolder
@@ -63,12 +50,13 @@ public class IMSParameters extends WSDPa
    * @param source
    *          the source of the training data
    */
-  public IMSParameters(int windowSize, int ngram, Source source) {
-    super();
+  public IMSParameters(int windowSize, int ngram,
+      TrainingSource trainingSource, SenseSource senseSource) {
     this.languageCode = "En";
     this.windowSize = windowSize;
     this.ngram = ngram;
-    this.source = source;
+    this.trainingSource = trainingSource;
+    this.senseSource = senseSource;
     this.isCoarseSense = false;
 
     File folder = new File(trainingDataDirectory);
@@ -77,15 +65,15 @@ public class IMSParameters extends WSDPa
   }
 
   public IMSParameters() {
-    this(3, 2, Source.SEMCOR);
+    this(3, 2, TrainingSource.SEMCOR, SenseSource.WORDNET);
   }
 
-  public IMSParameters(Source source) {
-    this(3, 2, source);
+  public IMSParameters(TrainingSource source) {
+    this(3, 2, source, SenseSource.WORDNET);
   }
 
   public IMSParameters(int windowSize, int ngram) {
-    this(windowSize, ngram, Source.SEMCOR);
+    this(windowSize, ngram, TrainingSource.SEMCOR, SenseSource.WORDNET);
   }
 
   public String getLanguageCode() {
@@ -112,14 +100,6 @@ public class IMSParameters extends WSDPa
     this.ngram = ngram;
   }
 
-  public Source getSource() {
-    return source;
-  }
-
-  public void setSource(Source source) {
-    this.source = source;
-  }
-
   void init() {
   }
 

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
 Thu Aug 20 22:01:59 2015
@@ -20,39 +20,56 @@
 package opennlp.tools.disambiguator.ims;
 
 import java.util.ArrayList;
+import java.util.List;
 
 import net.sf.extjwnl.data.POS;
 import opennlp.tools.disambiguator.WSDHelper;
-import opennlp.tools.disambiguator.WordToDisambiguate;
+import opennlp.tools.disambiguator.WSDSample;
 
-public class WTDIMS extends WordToDisambiguate {
+public class WTDIMS {
 
+  // Attributes related to the context
+  protected String[] sentence;
+  protected String[] posTags;
+  protected String[] lemmas;
+  protected int wordIndex;
+  protected int sense;
+  protected List<String> senseIDs;
+
+  // Attributes related to IMS features
   protected String[] posOfSurroundingWords;
   protected String[] surroundingWords;
   protected String[] localCollocations;
-
   protected String[] features;
 
-  public WTDIMS(String[] sentence, int word, int sense) {
-    super(sentence, word, sense);
-
+  public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
+      int wordIndex) {
+    this.sentence = sentence;
+    this.posTags = posTags;
+    this.wordIndex = wordIndex;
+    this.lemmas = lemmas;
   }
 
-  public WTDIMS(String[] sentence, int word) {
-    super(sentence, word);
+  public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
+      int wordIndex, List<String> senseIDs) {
+    this.sentence = sentence;
+    this.posTags = posTags;
+    this.wordIndex = wordIndex;
+    this.lemmas = lemmas;
+    this.senseIDs = senseIDs;
+
   }
 
-  public WTDIMS(String xmlWord, ArrayList<String> senseIDs, String xmlSentence,
-      String xmlrawWord) {
+  public WTDIMS(String[] sentence, String[] posTags, String[] lemmas,
+      String word, List<String> senseIDs) {
     super();
 
-    // this.word = xmlWord;
-
-    this.sentence = WSDHelper.getTokenizer().tokenize(xmlSentence);
-    this.posTags = WSDHelper.getTagger().tag(this.sentence);
+    this.sentence = sentence;
+    this.posTags = posTags;
+    this.lemmas = lemmas;
 
     for (int i = 0; i < sentence.length; i++) {
-      if (xmlrawWord.equals(sentence[i])) {
+      if (word.equals(sentence[i])) {
         this.wordIndex = i;
         break;
       }
@@ -62,16 +79,93 @@ public class WTDIMS extends WordToDisamb
 
   }
 
-  public WTDIMS(WordToDisambiguate wtd) {
-    super(wtd.getSentence(), wtd.getWordIndex(), wtd.getSense());
-    this.senseIDs = wtd.getSenseIDs();
+  public WTDIMS(WSDSample sample) {
+    this.sentence = sample.getSentence();
+    this.posTags = sample.getTags();
+    this.lemmas = sample.getLemmas();
+    this.wordIndex = sample.getTargetPosition();
+    this.senseIDs = sample.getSenseIDs();
+
+  }
+  
+  public String[] getSentence() {
+    return sentence;
+  }
+
+  public void setSentence(String[] sentence) {
+    this.sentence = sentence;
+  }
+
+  public String[] getPosTags() {
+    return posTags;
+  }
+
+  public void setPosTags(String[] posTags) {
+    this.posTags = posTags;
+  }
+
+  public int getWordIndex() {
+    return wordIndex;
+  }
+
+  public void setWordIndex(int wordIndex) {
+    this.wordIndex = wordIndex;
+  }
+
+  public String[] getLemmas() {
+    return lemmas;
+  }
+
+  public void setLemmas(String[] lemmas) {
+    this.lemmas = lemmas;
   }
 
-  public WTDIMS(String[] sentence, int wordIndex, ArrayList<String> senseIDs) {
-    super(sentence, wordIndex);
+  public int getSense() {
+    return sense;
+  }
+
+  public void setSense(int sense) {
+    this.sense = sense;
+  }
+
+  public List<String> getSenseIDs() {
+    return senseIDs;
+  }
+
+  public void setSenseIDs(ArrayList<String> senseIDs) {
     this.senseIDs = senseIDs;
   }
 
+  public String getWord() {
+    return this.getSentence()[this.getWordIndex()];
+  }
+
+  public String getWordTag() {
+
+    String wordBaseForm = this.getLemmas()[this.getWordIndex()];
+
+    String ref = "";
+
+    if ((WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()]) != null)) {
+      if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()]).equals(
+          POS.VERB)) {
+        ref = wordBaseForm + ".v";
+      } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
+          .equals(POS.NOUN)) {
+        ref = wordBaseForm + ".n";
+      } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
+          .equals(POS.ADJECTIVE)) {
+        ref = wordBaseForm + ".a";
+      } else if (WSDHelper.getPOS(this.getPosTags()[this.getWordIndex()])
+          .equals(POS.ADVERB)) {
+        ref = wordBaseForm + ".r";
+      }
+    }
+
+    return ref;
+  }
+
+  
   public String[] getPosOfSurroundingWords() {
     return posOfSurroundingWords;
   }
@@ -104,25 +198,4 @@ public class WTDIMS extends WordToDisamb
     this.features = features;
   }
 
-  public String getWordTag() {
-
-    String wordBaseForm = WSDHelper.getLemmatizer().lemmatize(this.getWord(),
-        this.getPosTag());
-
-    String ref = "";
-
-    if ((WSDHelper.getPOS(this.getPosTag()) != null)) {
-      if (WSDHelper.getPOS(this.getPosTag()).equals(POS.VERB)) {
-        ref = wordBaseForm + ".v";
-      } else if (WSDHelper.getPOS(this.getPosTag()).equals(POS.NOUN)) {
-        ref = wordBaseForm + ".n";
-      } else if (WSDHelper.getPOS(this.getPosTag()).equals(POS.ADJECTIVE)) {
-        ref = wordBaseForm + ".a";
-      } else if (WSDHelper.getPOS(this.getPosTag()).equals(POS.ADVERB)) {
-        ref = wordBaseForm + ".r";
-      }
-    }
-
-    return ref;
-  }
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
 Thu Aug 20 22:01:59 2015
@@ -31,7 +31,6 @@ import opennlp.tools.disambiguator.WSDis
 import opennlp.tools.disambiguator.WordPOS;
 import opennlp.tools.disambiguator.WordSense;
 import opennlp.tools.disambiguator.mfs.MFS;
-import opennlp.tools.util.Span;
 import net.sf.extjwnl.JWNLException;
 import net.sf.extjwnl.data.Synset;
 import net.sf.extjwnl.data.Word;
@@ -44,7 +43,7 @@ import net.sf.extjwnl.data.Word;
  * the approach are included in this class.
  * 
  */
-public class Lesk implements WSDisambiguator {
+public class Lesk extends WSDisambiguator {
 
   /**
    * The lesk specific parameters
@@ -113,8 +112,12 @@ public class Lesk implements WSDisambigu
     ArrayList<SynNode> nodes = new ArrayList<SynNode>();
 
     for (int i = 0; i < sample.getSentence().length; i++) {
-      contextWords
-          .add(new WordPOS(sample.getSentence()[i], sample.getTags()[i]));
+      if (!WSDHelper.getStopCache().containsKey(sample.getSentence()[i])) {
+        if (WSDHelper.getRelvCache().containsKey(sample.getTags()[i])) {
+          contextWords.add(new WordPOS(sample.getSentence()[i], sample
+              .getTags()[i]));
+        }
+      }
     }
     for (Synset synset : synsets) {
       SynNode node = new SynNode(synset, contextWords);
@@ -158,8 +161,12 @@ public class Lesk implements WSDisambigu
     for (int i = index - getParams().win_b_size; i <= index
         + getParams().win_f_size; i++) {
       if (i >= 0 && i < sample.getSentence().length && i != index) {
-        contextWords.add(new WordPOS(sample.getSentence()[i],
-            sample.getTags()[i]));
+        if (!WSDHelper.getStopCache().containsKey(sample.getSentence()[i])) {
+          if (WSDHelper.getRelvCache().containsKey(sample.getTags()[i])) {
+            contextWords.add(new WordPOS(sample.getSentence()[i], sample
+                .getTags()[i]));
+          }
+        }
       }
     }
 
@@ -944,44 +951,18 @@ public class Lesk implements WSDisambigu
     return count;
   }
 
-  /**
-   * Disambiguates an ambiguous word in its context
-   * 
-   * @param tokenizedContext
-   * @param ambiguousTokenIndex
-   * @return array of sense indexes from WordNet ordered by their score. The
-   *         result format is <b>Source</b> <b>SenseID</b> If the input token 
is
-   *         non relevant a null is returned.
-   */
-  @Override
-  public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      int ambiguousTokenIndex, String ambiguousTokenLemma) {
-    return disambiguate(new WSDSample(tokenizedContext, tokenTags,
-        ambiguousTokenIndex, ambiguousTokenLemma));
-  }
-
-  /**
-   * Disambiguates an ambiguous word in its context The user can set a span of
-   * inputWords from the tokenized input
-   * 
-   * @param inputText
-   * @param inputWordSpans
-   * @return array of array of sense indexes from WordNet ordered by their
-   *         score. The result format is <b>Source</b> <b>SenseID</b> If the
-   *         input token is non relevant a null is returned.
-   */
-  @Override
-  public String[][] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      Span ambiguousTokenSpan, String ambiguousTokenLemma) {
-    // TODO need to work on spans
-    return null;
-  }
-
   @Override
   public String[] disambiguate(WSDSample sample) {
-    // if the word is not relevant return null
-    if (!WSDHelper.isRelevant(sample.getTargetTag())) {
-      return null;
+    // if not relevant POS tag
+    if (!WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
+      if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
+        String s = WSDParameters.SenseSource.WSDHELPER.name() + " "
+            + sample.getTargetTag();
+        String[] sense = { s };
+        return sense;
+      } else {
+        return null;
+      }
     }
 
     ArrayList<WordSense> wsenses = null;
@@ -1020,7 +1001,8 @@ public class Lesk implements WSDisambigu
       for (int i = 0; i < wsenses.size(); i++) {
         synsetWords = wsenses.get(i).getNode().synset.getWords();
         for (Word synWord : synsetWords) {
-          if (synWord.getLemma().equals(sample.getTargetLemma())) {
+          if (synWord.getLemma().equals(
+              sample.getLemmas()[sample.getTargetPosition()])) {
             try {
               senseKey = synWord.getSenseKey();
             } catch (JWNLException e) {
@@ -1041,9 +1023,10 @@ public class Lesk implements WSDisambigu
   }
 
   @Override
-  public String[] disambiguate(String[] inputText, int inputWordIndex) {
-    // TODO Deprecate
-    return null;
+  public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
+      String[] lemmas, int ambiguousTokenIndex) {
+    return disambiguate(new WSDSample(tokenizedContext, tokenTags, lemmas,
+        ambiguousTokenIndex));
   }
 
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
 Thu Aug 20 22:01:59 2015
@@ -37,7 +37,7 @@ public class LeskParameters extends WSDP
 
   // DEFAULTS
   protected static final LESK_TYPE DFLT_LESK_TYPE = 
LESK_TYPE.LESK_EXT_EXP_CTXT;
-  protected static final Source DFLT_SOURCE = Source.WORDNET;
+  protected static final SenseSource DFLT_SOURCE = SenseSource.WORDNET;
   protected static final int DFLT_WIN_SIZE = 10;
   protected static final int DFLT_DEPTH = 1;
   protected static final double DFLT_DEPTH_WEIGHT = 0.8;
@@ -46,7 +46,7 @@ public class LeskParameters extends WSDP
 
   protected LESK_TYPE leskType;
 
-  protected Source source;
+  protected SenseSource source;
   protected int win_f_size;
   protected int win_b_size;
   protected int depth;

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
 Thu Aug 20 22:01:59 2015
@@ -32,15 +32,13 @@ import opennlp.tools.disambiguator.WSDPa
 import opennlp.tools.disambiguator.WSDSample;
 import opennlp.tools.disambiguator.WSDisambiguator;
 import opennlp.tools.disambiguator.WordPOS;
-import opennlp.tools.disambiguator.WordToDisambiguate;
-import opennlp.tools.util.Span;
 
 /**
  * Implementation of the <b>Most Frequent Sense</b> baseline approach. This
  * approach returns the senses in order of frequency in WordNet. The first 
sense
  * is the most frequent.
  */
-public class MFS implements WSDisambiguator {
+public class MFS extends WSDisambiguator {
 
   public MFSParameters parameters;
 
@@ -52,47 +50,7 @@ public class MFS implements WSDisambigua
     this.parameters = new MFSParameters();
   }
 
-  @Deprecated
-  public static String[] getMostFrequentSense(
-      WordToDisambiguate wordToDisambiguate) {
-
-    String word = wordToDisambiguate.getRawWord().toLowerCase();
-    POS pos = WSDHelper.getPOS(wordToDisambiguate.getPosTag());
-
-    if (pos != null) {
-
-      WordPOS wordPOS = new WordPOS(word, pos);
-
-      ArrayList<Synset> synsets = wordPOS.getSynsets();
-
-      int size = synsets.size();
-
-      String[] senses = new String[size];
-
-      for (int i = 0; i < size; i++) {
-        String senseKey = null;
-        for (Word wd : synsets.get(i).getWords()) {
-          if (wd.getLemma().equals(
-              wordToDisambiguate.getRawWord().split("\\.")[0])) {
-            try {
-              senseKey = wd.getSenseKey();
-            } catch (JWNLException e) {
-              e.printStackTrace();
-            }
-            senses[i] = "WordNet " + senseKey;
-            break;
-          }
-        }
-
-      }
-      return senses;
-    } else {
-      System.out.println("The word has no definitions in WordNet !");
-      return null;
-    }
-
-  }
-
+ 
   /*
    * @return the most frequent senses from wordnet
    */
@@ -102,19 +60,23 @@ public class MFS implements WSDisambigua
     for (Word wd : synsets.get(0).getWords()) {
       if (WSDParameters.isStemCompare) {
         WordPOS wdPOS = new WordPOS(wd.getLemma(), wd.getPOS());
-        WordPOS samplePOS = new WordPOS(sample.getTargetLemma(),
+        WordPOS samplePOS = new WordPOS(
+            sample.getLemmas()[sample.getTargetPosition()],
             WSDHelper.getPOS(sample.getTargetTag()));
         if (wdPOS.isStemEquivalent(samplePOS)) {
           try {
-            return WSDParameters.Source.WORDNET.name() + " " + 
wd.getSenseKey();
+            return WSDParameters.SenseSource.WORDNET.name() + " "
+                + wd.getSenseKey();
           } catch (JWNLException e) {
             e.printStackTrace();
           }
         }
       } else {
-        if (wd.getLemma().equalsIgnoreCase((sample.getTargetLemma()))) {
+        if (wd.getLemma().equalsIgnoreCase(
+            (sample.getLemmas()[sample.getTargetPosition()]))) {
           try {
-            return WSDParameters.Source.WORDNET.name() + " " + 
wd.getSenseKey();
+            return WSDParameters.SenseSource.WORDNET.name() + " "
+                + wd.getSenseKey();
           } catch (JWNLException e) {
             e.printStackTrace();
           }
@@ -134,11 +96,12 @@ public class MFS implements WSDisambigua
       for (Word wd : synsets.get(i).getWords()) {
         if (WSDParameters.isStemCompare) {
           WordPOS wdPOS = new WordPOS(wd.getLemma(), wd.getPOS());
-          WordPOS samplePOS = new WordPOS(sample.getTargetLemma(),
+          WordPOS samplePOS = new WordPOS(
+              sample.getLemmas()[sample.getTargetPosition()],
               WSDHelper.getPOS(sample.getTargetTag()));
           if (wdPOS.isStemEquivalent(samplePOS)) {
             try {
-              senseKeys[i] = WSDParameters.Source.WORDNET.name() + " "
+              senseKeys[i] = WSDParameters.SenseSource.WORDNET.name() + " "
                   + wd.getSenseKey();
               break;
             } catch (JWNLException e) {
@@ -147,9 +110,10 @@ public class MFS implements WSDisambigua
             break;
           }
         } else {
-          if (wd.getLemma().equalsIgnoreCase((sample.getTargetLemma()))) {
+          if (wd.getLemma().equalsIgnoreCase(
+              (sample.getLemmas()[sample.getTargetPosition()]))) {
             try {
-              senseKeys[i] = WSDParameters.Source.WORDNET.name() + " "
+              senseKeys[i] = WSDParameters.SenseSource.WORDNET.name() + " "
                   + wd.getSenseKey();
               break;
             } catch (JWNLException e) {
@@ -185,27 +149,77 @@ public class MFS implements WSDisambigua
 
   @Override
   public String[] disambiguate(WSDSample sample) {
-    return getMostFrequentSenses(sample);
+
+    if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
+      return getMostFrequentSenses(sample);
+
+    } else {
+      if (WSDHelper.getNonRelevWordsDef(sample.getTargetTag()) != null) {
+        String s = WSDParameters.SenseSource.WSDHELPER.name() + " "
+            + sample.getTargetTag();
+        String[] sense = { s };
+        return sense;
+      } else {
+        return null;
+      }
+    }
   }
 
   @Override
   public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      int ambiguousTokenIndex, String lemma) {
-    return disambiguate(new WSDSample(tokenizedContext, tokenTags,
-        ambiguousTokenIndex, lemma));
-  }
+      String[] lemmas, int ambiguousTokenIndex) {
+    return disambiguate(new WSDSample(tokenizedContext, tokenTags, lemmas,
+        ambiguousTokenIndex));
+  }
+
+  public String[] disambiguate(String wordTag) {
+
+    String word = wordTag.split("\\.")[0];
+    String tag = wordTag.split("\\.")[1];
+
+    POS pos;
+
+    if (tag.equalsIgnoreCase("a")) {
+      pos = POS.ADJECTIVE;
+    } else if (tag.equalsIgnoreCase("r")) {
+      pos = POS.ADVERB;
+    } else if (tag.equalsIgnoreCase("n")) {
+      pos = POS.NOUN;
+    } else if (tag.equalsIgnoreCase("a")) {
+      pos = POS.VERB;
+    } else
+      pos = null;
 
-  @Override
-  public String[][] disambiguate(String[] tokenizedContext, String[] tokenTags,
-      Span ambiguousTokenIndexSpan, String ambiguousTokenLemma) {
-    // TODO A iterate over span
-    return null;
-  }
+    if (pos != null) {
 
-  @Override
-  public String[] disambiguate(String[] inputText, int inputWordIndex) {
-    // TODO Deprecate
-    return null;
-  }
+      WordPOS wordPOS = new WordPOS(word, pos);
+
+      ArrayList<Synset> synsets = wordPOS.getSynsets();
+
+      int size = synsets.size();
+
+      String[] senses = new String[size];
+
+      for (int i = 0; i < size; i++) {
+        String senseKey = null;
+        for (Word wd : synsets.get(i).getWords()) {
+          if (wd.getLemma().equals(word)) {
+            try {
+              senseKey = wd.getSenseKey();
+            } catch (JWNLException e) {
+              e.printStackTrace();
+            }
+            senses[i] = senseKey;
+            break;
+          }
+        }
 
+      }
+      return senses;
+    } else {
+      System.out.println("The word has no definitions in WordNet !");
+      return null;
+    }
+
+  }
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFSParameters.java
 Thu Aug 20 22:01:59 2015
@@ -27,22 +27,22 @@ public class MFSParameters extends WSDPa
 
   public MFSParameters() {
     this.isCoarseSense = false;
-    this.source = Source.WORDNET;
+    this.source = SenseSource.WORDNET;
   }
 
-  protected Source source;
+  protected SenseSource source;
 
-  public Source getSource() {
+  public SenseSource getSource() {
     return source;
   }
 
-  public void setSource(Source source) {
+  public void setSource(SenseSource source) {
     this.source = source;
   }
 
   @Override
   public boolean isValid() {
-    return EnumUtils.isValidEnum(Source.class, source.name());
+    return EnumUtils.isValidEnum(SenseSource.class, source.name());
   }
 
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
 Thu Aug 20 22:01:59 2015
@@ -39,11 +39,10 @@ public class IMSEvaluatorTest {
     WSDHelper.print("Evaluation Started");
 
     String modelsDir = "src\\test\\resources\\models\\";
-    WSDHelper.loadTokenizer(modelsDir+"en-token.bin");
-    WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict");
-    WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin");
-    
-    
+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+
     IMS ims = new IMS();
     IMSParameters imsParams = new IMSParameters();
     ims.setParams(imsParams);
@@ -56,7 +55,7 @@ public class IMSEvaluatorTest {
       // don't take verbs because they are not from WordNet
       if (!word.split("\\.")[1].equals("v")) {
 
-        ArrayList<WSDSample> instances = getTestData(word);
+        ArrayList<WSDSample> instances = seReader.getSensevalData(word);
         if (instances != null) {
           WSDHelper.print("------------------" + word + "------------------");
           for (WSDSample instance : instances) {
@@ -74,59 +73,4 @@ public class IMSEvaluatorTest {
     }
 
   }
-
-  /**
-   * For a specific word, return the Semeval3 corresponding instances in form 
of
-   * {@link WSDIMS}
-   * 
-   * @param wordTag
-   *          the word of which the instances are to be collected. wordTag has
-   *          to be in the format "word.POS" (e.g., "activate.v", "smart.a",
-   *          etc.)
-   * @return list of {@link WSDIMS} instances of the wordTag
-   */
-  @Deprecated
-  protected static ArrayList<WTDIMS> getTestDataOld(String wordTag) {
-
-    ArrayList<WTDIMS> instances = new ArrayList<WTDIMS>();
-    for (WordToDisambiguate wtd : seReader.getSensevalData(wordTag)) {
-      WTDIMS wtdims = new WTDIMS(wtd);
-      instances.add(wtdims);
-    }
-
-    return instances;
-  }
-  
-  protected static ArrayList<WSDSample> getTestData(String wordTag) {
-
-    ArrayList<WSDSample> instances = new ArrayList<WSDSample>();
-    for (WordToDisambiguate wtd : seReader.getSensevalData(wordTag)) {
-      List<WordPOS> words = WSDHelper.getAllRelevantWords(wtd);
-      int targetWordIndex=0;
-      for (int i=0; i<words.size();i++){
-        if(words.get(i).isTarget){
-          targetWordIndex = i;
-        }   
-      }
-      String[] tags = new String[words.size()];
-      String[] tokens = new String[words.size()];
-      for (int i=0;i<words.size();i++){
-        tags[i] = words.get(i).getPosTag();
-        tokens[i] = words.get(i).getWord();
-      }
-      String targetLemma = WSDHelper.getLemmatizer().lemmatize(
-          tokens[targetWordIndex], tags[targetWordIndex]);
-      
-      WSDSample sample = new 
WSDSample(tokens,tags,targetWordIndex,targetLemma);
-      sample.setSenseIDs(wtd.getSenseIDs());
-      if (sample != null) {
-        if (sample.getSenseIDs().get(0) != null
-            && !sample.getSenseIDs().get(0).equalsIgnoreCase("U")) {
-          instances.add(sample);
-        }
-      }
-    }
-    return instances;
-  }
-
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
 Thu Aug 20 22:01:59 2015
@@ -19,7 +19,11 @@
 
 package opennlp.tools.disambiguator;
 
+import java.util.ArrayList;
+import java.util.List;
+
 import opennlp.tools.disambiguator.ims.IMS;
+import opennlp.tools.util.Span;
 
 /**
  * This is a typical example of how to call the disambiguation function in the
@@ -34,26 +38,83 @@ import opennlp.tools.disambiguator.ims.I
 public class IMSTester {
 
   public static void main(String[] args) {
-
+    
     String modelsDir = "src\\test\\resources\\models\\";
     WSDHelper.loadTokenizer(modelsDir+"en-token.bin");
     WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict");
     WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin");
-    
+
     IMS ims = new IMS();
 
-    String test1 = "Please write to me soon.";
+    
+    /**
+     * This is how to make the context for one-word-disambiguation using IMS
+     */
+    String test1 = "We need to discuss important topic, please write to me 
soon.";
     String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
-    WSDHelper.print(ims.disambiguate(sentence1, 1));
+    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
+    List<String> tempLemmas1 = new ArrayList<String>();
+    for (int i = 0; i < sentence1.length; i++) {
+      String lemma = WSDHelper.getLemmatizer().lemmatize(sentence1[i], 
tags1[i]);
+      tempLemmas1.add(lemma);
+    }
+    String[] lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
+
+    // output
+    String[] senses1 = ims.disambiguate(sentence1, tags1, lemmas1, 8);
+    System.out.print(lemmas1[8] + " :\t");
+    WSDHelper.print(senses1);
+    WSDHelper.print("*****************************");
 
-    String test2 = "it was a strong argument that his hypothesis was true";
+    
+    /**
+     * This is how to make the context for disambiguation of span of words
+     */
+    String test2 = "The component was highly radioactive to the point that"
+        + " it has been activated the second it touched water";
     String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
-    WSDHelper.print(ims.disambiguate(sentence2, 3));
+    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
+    List<String> tempLemmas2 = new ArrayList<String>();
+    for (int i = 0; i < sentence2.length; i++) {
+      String lemma = WSDHelper.getLemmatizer().lemmatize(sentence2[i], 
tags2[i]);
+      tempLemmas2.add(lemma);
+    }
+    String[] lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
+    Span span = new Span(3, 7);
+
+    // output
+    List<String[]> senses2 = ims.disambiguate(sentence2, tags2, lemmas2, span);
+    for (int i = span.getStart(); i < span.getEnd() + 1; i++) {
+      String[] senses = senses2.get(i-span.getStart());
+      System.out.print(lemmas2[i] + " :\t");
+      WSDHelper.print(senses);
+      WSDHelper.print("----------");
+    }
 
-    String test3 = "the component was highly radioactive to the point that it 
has been activated the second it touched water";
-    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
-    WSDHelper.print(ims.disambiguate(sentence3, 12));
+    WSDHelper.print("*****************************");
 
+    
+    /**
+     * This is how to make the context for all-words-disambiguation
+     */
+    String test3 = "The summer almost over and I not to the beach even once";
+    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
+    List<String> tempLemmas3 = new ArrayList<String>();
+    for (int i = 0; i < sentence3.length; i++) {
+      String lemma = WSDHelper.getLemmatizer().lemmatize(sentence3[i], 
tags3[i]);
+      tempLemmas3.add(lemma);
+    }
+    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+
+    // output
+    List<String[]> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
+    for (int i = 0; i < sentence3.length; i++) {
+      String[] senses = senses3.get(i);
+      System.out.print(lemmas3[i] + " :\t");
+      WSDHelper.print(senses);
+      WSDHelper.print("----------");
+    }
   }
 
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
 Thu Aug 20 22:01:59 2015
@@ -20,10 +20,8 @@
 package opennlp.tools.disambiguator;
 
 import java.util.ArrayList;
-import java.util.List;
 
 import opennlp.tools.disambiguator.datareader.SensevalReader;
-import opennlp.tools.disambiguator.ims.WTDIMS;
 import opennlp.tools.disambiguator.lesk.Lesk;
 import opennlp.tools.disambiguator.lesk.LeskParameters;
 
@@ -56,7 +54,7 @@ public class LeskEvaluatorTest {
       // don't take verbs because they are not from WordNet
       if (!word.split("\\.")[1].equals("v")) {
 
-        ArrayList<WSDSample> instances = getTestData(word);
+        ArrayList<WSDSample> instances = seReader.getSensevalData(word);
         if (instances != null) {
           WSDHelper.print("------------------" + word + "------------------");
           for (WSDSample instance : instances) {
@@ -73,37 +71,5 @@ public class LeskEvaluatorTest {
     }
   }
 
-  protected static ArrayList<WSDSample> getTestData(String wordTag) {
-
-    ArrayList<WSDSample> instances = new ArrayList<WSDSample>();
-    for (WordToDisambiguate wtd : seReader.getSensevalData(wordTag)) {
-      List<WordPOS> words = WSDHelper.getAllRelevantWords(wtd);
-      int targetWordIndex = 0;
-      for (int i = 0; i < words.size(); i++) {
-        if (words.get(i).isTarget) {
-          targetWordIndex = i;
-        }
-      }
-      String[] tags = new String[words.size()];
-      String[] tokens = new String[words.size()];
-      for (int i = 0; i < words.size(); i++) {
-        tags[i] = words.get(i).getPosTag();
-        tokens[i] = words.get(i).getWord();
-      }
-      String targetLemma = WSDHelper.getLemmatizer().lemmatize(
-          tokens[targetWordIndex], tags[targetWordIndex]);
-
-      WSDSample sample = new WSDSample(tokens, tags, targetWordIndex,
-          targetLemma);
-      sample.setSenseIDs(wtd.getSenseIDs());
-      if (sample != null) {
-        if (sample.getSenseIDs().get(0) != null
-            && !sample.getSenseIDs().get(0).equalsIgnoreCase("U")) {
-          instances.add(sample);
-        }
-      }
-    }
-    return instances;
-  }
 
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
 Thu Aug 20 22:01:59 2015
@@ -19,7 +19,7 @@
 
 package opennlp.tools.disambiguator;
 
-
+import java.util.ArrayList;
 import java.util.List;
 
 import opennlp.tools.disambiguator.lesk.Lesk;
@@ -32,7 +32,6 @@ public class LeskTester {
   @Test
   public static void main(String[] args) {
 
-    
     Lesk lesk = new Lesk();
     LeskParameters params = new LeskParameters();
     params.setLeskType(LESK_TYPE.LESK_EXT);
@@ -40,72 +39,60 @@ public class LeskTester {
     params.setFeatures(a);
     lesk.setParams(params);
     String modelsDir = "src\\test\\resources\\models\\";
-    WSDHelper.loadTokenizer(modelsDir+"en-token.bin");
-    WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict");
-    WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin");
-    
+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+
     String test1 = "I went to the bank to deposit money.";
-    String[] sentence = WSDHelper.getTokenizer().tokenize(test1);
-    List<WordPOS> words = WSDHelper.getAllRelevantWords(sentence);
-    int targetWordIndex = 0;
-    String[] tags = new String[words.size()];
-    String[] tokens = new String[words.size()];
-    for (int i=0;i<words.size();i++){
-      tags[i] = words.get(i).getPosTag();
-      tokens[i] = words.get(i).getWord();
-      
-      WSDHelper.print("token : "+ tokens[i]  + "_" + tags[i]);
+    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
+    int targetWordIndex1 = 5;
+    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
+    List<String> tempLemmas1 = new ArrayList<String>();
+    for (int i = 0; i < sentence1.length; i++) {
+      String lemma = WSDHelper.getLemmatizer()
+          .lemmatize(sentence1[i], tags1[i]);
+      tempLemmas1.add(lemma);
     }
-    String targetLemma = WSDHelper.getLemmatizer().lemmatize(
-        tokens[targetWordIndex], tags[targetWordIndex]);
-   // Constants.print("lemma  : "+ targetLemma);
-    WSDHelper.print(lesk.disambiguate(tokens, tags, 
targetWordIndex,targetLemma));
-    WSDHelper.printResults(lesk,
-        lesk.disambiguate(tokens, tags, targetWordIndex, targetLemma));
-    
+    String[] lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
+    String[] results1 = lesk.disambiguate(sentence1, tags1, lemmas1,
+        targetWordIndex1);
+    WSDHelper.print(results1);
+    WSDHelper.printResults(lesk, results1);
+
     WSDHelper.print("----------------------------------------");
-    
+
     String test2 = "it was a strong argument that his hypothesis was true";
-    sentence = WSDHelper.getTokenizer().tokenize(test2);
-    words = WSDHelper.getAllRelevantWords(sentence);
-    targetWordIndex = 1;
-    tags = new String[words.size()];
-    tokens = new String[words.size()];
-    for (int i=0;i<words.size();i++){
-      tags[i] = words.get(i).getPosTag();
-      tokens[i] = words.get(i).getWord();
-      
-      //Constants.print("token : "+ tokens[i]  + "_" + tags[i]);
+    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
+    int targetWordIndex2 = 4;
+    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
+    List<String> tempLemmas2 = new ArrayList<String>();
+    for (int i = 0; i < sentence1.length; i++) {
+      String lemma = WSDHelper.getLemmatizer()
+          .lemmatize(sentence2[i], tags2[i]);
+      tempLemmas2.add(lemma);
     }
-    targetLemma = WSDHelper.getLemmatizer().lemmatize(
-        tokens[targetWordIndex], tags[targetWordIndex]);
-    //Constants.print("lemma  : "+ targetLemma);
-    
-    WSDHelper.print(lesk.disambiguate(tokens, tags, 
targetWordIndex,targetLemma));
-    WSDHelper.printResults(lesk,
-        lesk.disambiguate(tokens, tags, targetWordIndex, targetLemma));
+    String[] lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
+    String[] results2 = lesk.disambiguate(sentence2, tags2, lemmas2,
+        targetWordIndex2);
+    WSDHelper.print(results2);
+    WSDHelper.printResults(lesk, results2);
     WSDHelper.print("----------------------------------------");
-    
+
     String test3 = "the component was highly radioactive to the point that it 
has been activated the second it touched water";
-    
-    sentence = WSDHelper.getTokenizer().tokenize(test3);
-    words = WSDHelper.getAllRelevantWords(sentence);
-    targetWordIndex = 4;
-    tags = new String[words.size()];
-    tokens = new String[words.size()];
-    for (int i=0;i<words.size();i++){
-      tags[i] = words.get(i).getPosTag();
-      tokens[i] = words.get(i).getWord();
-      
-      //Constants.print("token : "+ tokens[i]  + "_" + tags[i]);
+    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+    int targetWordIndex3 = 3;
+    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
+    List<String> tempLemmas3 = new ArrayList<String>();
+    for (int i = 0; i < sentence3.length; i++) {
+      String lemma = WSDHelper.getLemmatizer()
+          .lemmatize(sentence3[i], tags3[i]);
+      tempLemmas3.add(lemma);
     }
-    targetLemma = WSDHelper.getLemmatizer().lemmatize(
-        tokens[targetWordIndex], tags[targetWordIndex]);
-    //Constants.print("lemma  : "+ targetLemma);
-    
-    WSDHelper.print(lesk.disambiguate(tokens, tags, 
targetWordIndex,targetLemma));
-    WSDHelper.printResults(lesk,
-        lesk.disambiguate(tokens, tags, targetWordIndex, targetLemma));
+    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+    String[] results3 = lesk.disambiguate(sentence3, tags3, lemmas3,
+        targetWordIndex3);
+    WSDHelper.print(results3);
+    WSDHelper.printResults(lesk, results3);
     WSDHelper.print("----------------------------------------");
   }
 

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
 Thu Aug 20 22:01:59 2015
@@ -36,9 +36,9 @@ public class MFSEvaluatorTest {
   public static void main(String[] args) {
     WSDHelper.print("Evaluation Started");
     String modelsDir = "src\\test\\resources\\models\\";
-    WSDHelper.loadTokenizer(modelsDir+"en-token.bin");
-    WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict");
-    WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin");
+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
     MFS mfs = new MFS();
     WSDParameters.isStemCompare = true;
 
@@ -50,7 +50,7 @@ public class MFSEvaluatorTest {
       // don't take verbs because they are not from WordNet
       if (!word.split("\\.")[1].equals("v")) {
 
-        ArrayList<WSDSample> instances = getTestData(word);
+        ArrayList<WSDSample> instances = seReader.getSensevalData(word);
 
         if (instances != null) {
           WSDHelper.print("------------------" + word + "------------------");
@@ -70,38 +70,4 @@ public class MFSEvaluatorTest {
 
   }
 
-  /**
-   * For a specific word, return the Semeval3 corresponding instances in form 
of
-   * {@link WSDSample}
-   * 
-   * @param wordTag
-   *          the word of which the instances are to be collected. wordTag has
-   *          to be in the format "word.POS" (e.g., "activate.v", "smart.a",
-   *          etc.)
-   * @return list of {@link WSDSample} instances of the wordTag
-   */
-  protected static ArrayList<WSDSample> getTestData(String wordTag) {
-
-    ArrayList<WSDSample> instances = new ArrayList<WSDSample>();
-    for (WordToDisambiguate wtd : seReader.getSensevalData(wordTag)) {
-
-      String targetLemma = WSDHelper.getLemmatizer().lemmatize(wtd.getWord(),
-          wtd.getPosTag());
-
-      WSDSample sample = new WSDSample(wtd.getSentence(), wtd.getPosTags(),
-          wtd.getWordIndex(), targetLemma);
-      sample.setSenseIDs(wtd.getSenseIDs());
-      
-      if (sample != null) {
-        if (sample.getSenseIDs().get(0) != null
-            && !sample.getSenseIDs().get(0).equalsIgnoreCase("U")) {
-          instances.add(sample);
-        }
-      }
-
-    }
-
-    return instances;
-  }
-
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
 Thu Aug 20 22:01:59 2015
@@ -19,9 +19,11 @@
 
 package opennlp.tools.disambiguator;
 
+import java.util.ArrayList;
 import java.util.List;
 
 import opennlp.tools.disambiguator.mfs.MFS;
+import opennlp.tools.util.Span;
 
 /**
  * This is a typical example of how to call the disambiguation function in the
@@ -30,78 +32,83 @@ import opennlp.tools.disambiguator.mfs.M
 public class MFSTester {
 
   public static void main(String[] args) {
-    
     String modelsDir = "src\\test\\resources\\models\\";
-    WSDHelper.loadTokenizer(modelsDir+"en-token.bin");
-    WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict");
-    WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin");
-    
-    
+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+
     MFS mfs = new MFS();
 
-    String test1 = "I went fishing for some sea bass.";
-    String[] sentence = WSDHelper.getTokenizer().tokenize(test1);
-    List<WordPOS> words = WSDHelper.getAllRelevantWords(sentence);
-    int targetWordIndex = 2;
-    String[] tags = new String[words.size()];
-    String[] tokens = new String[words.size()];
-    for (int i=0;i<words.size();i++){
-      tags[i] = words.get(i).getPosTag();
-      tokens[i] = words.get(i).getWord();
-      
-     // Constants.print("token : "+ tokens[i]  + "_" + tags[i]);
+    /**
+     * This is how to make the context for one-word-disambiguation using IMS
+     */
+    String test1 = "We need to discuss important topic, please write to me 
soon.";
+    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
+    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
+    List<String> tempLemmas1 = new ArrayList<String>();
+    for (int i = 0; i < sentence1.length; i++) {
+      String lemma = WSDHelper.getLemmatizer()
+          .lemmatize(sentence1[i], tags1[i]);
+      tempLemmas1.add(lemma);
     }
-    String targetLemma = WSDHelper.getLemmatizer().lemmatize(
-        tokens[targetWordIndex], tags[targetWordIndex]);
-   // Constants.print("lemma  : "+ targetLemma);
-    
-    WSDHelper.print(mfs.disambiguate(tokens, tags, 
targetWordIndex,targetLemma));
-    WSDHelper.printResults(mfs,
-        mfs.disambiguate(tokens, tags, targetWordIndex, targetLemma));
-    WSDHelper.print("----------------------------------------");
-    
-    String test2 = "it was a strong argument that his hypothesis was true";
-    sentence = WSDHelper.getTokenizer().tokenize(test2);
-    words = WSDHelper.getAllRelevantWords(sentence);
-    targetWordIndex = 1;
-    tags = new String[words.size()];
-    tokens = new String[words.size()];
-    for (int i=0;i<words.size();i++){
-      tags[i] = words.get(i).getPosTag();
-      tokens[i] = words.get(i).getWord();
-      
-      //Constants.print("token : "+ tokens[i]  + "_" + tags[i]);
+    String[] lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
+
+    // output
+    String[] senses1 = mfs.disambiguate(sentence1, tags1, lemmas1, 8);
+    System.out.print(lemmas1[8] + " :\t");
+    WSDHelper.print(senses1);
+    WSDHelper.print("*****************************");
+
+    /**
+     * This is how to make the context for disambiguation of span of words
+     */
+    String test2 = "The component was highly radioactive to the point that"
+        + " it has been activated the second it touched water";
+    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
+    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
+    List<String> tempLemmas2 = new ArrayList<String>();
+    for (int i = 0; i < sentence2.length; i++) {
+      String lemma = WSDHelper.getLemmatizer()
+          .lemmatize(sentence2[i], tags2[i]);
+      tempLemmas2.add(lemma);
     }
-    targetLemma = WSDHelper.getLemmatizer().lemmatize(
-        tokens[targetWordIndex], tags[targetWordIndex]);
-    //Constants.print("lemma  : "+ targetLemma);
-    
-    WSDHelper.print(mfs.disambiguate(tokens, tags, 
targetWordIndex,targetLemma));
-    WSDHelper.printResults(mfs,
-        mfs.disambiguate(tokens, tags, targetWordIndex, targetLemma));
-    WSDHelper.print("----------------------------------------");
-    
-    String test3 = "the component was highly radioactive to the point that it 
has been activated the second it touched water";
-   
-    sentence = WSDHelper.getTokenizer().tokenize(test3);
-    words = WSDHelper.getAllRelevantWords(sentence);
-    targetWordIndex = 4;
-    tags = new String[words.size()];
-    tokens = new String[words.size()];
-    for (int i=0;i<words.size();i++){
-      tags[i] = words.get(i).getPosTag();
-      tokens[i] = words.get(i).getWord();
-      
-      //Constants.print("token : "+ tokens[i]  + "_" + tags[i]);
+    String[] lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
+    Span span = new Span(3, 7);
+
+    // output
+    List<String[]> senses2 = mfs.disambiguate(sentence2, tags2, lemmas2, span);
+    for (int i = span.getStart(); i < span.getEnd() + 1; i++) {
+      String[] senses = senses2.get(i - span.getStart());
+      System.out.print(lemmas2[i] + " :\t");
+      WSDHelper.print(senses);
+      WSDHelper.print("----------");
     }
-    targetLemma = WSDHelper.getLemmatizer().lemmatize(
-        tokens[targetWordIndex], tags[targetWordIndex]);
-    //Constants.print("lemma  : "+ targetLemma);
-    
-    WSDHelper.print(mfs.disambiguate(tokens, tags, 
targetWordIndex,targetLemma));
-    WSDHelper.printResults(mfs,
-        mfs.disambiguate(tokens, tags, targetWordIndex, targetLemma));
-    WSDHelper.print("----------------------------------------");
+
+    WSDHelper.print("*****************************");
+
+    /**
+     * This is how to make the context for all-words-disambiguation
+     */
+    String test3 = "The summer is almost over and I have not been to the beach 
even once";
+    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
+    List<String> tempLemmas3 = new ArrayList<String>();
+    for (int i = 0; i < sentence3.length; i++) {
+      String lemma = WSDHelper.getLemmatizer()
+          .lemmatize(sentence3[i], tags3[i]);
+      tempLemmas3.add(lemma);
+    }
+    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+
+    // output
+    List<String[]> senses3 = mfs.disambiguate(sentence3, tags3, lemmas3);
+    for (int i = 0; i < sentence3.length; i++) {
+      String[] senses = senses3.get(i);
+      System.out.print(lemmas3[i] + " :\t");
+      WSDHelper.print(senses);
+      WSDHelper.print("----------");
+    }
+
   }
 
 }
\ No newline at end of file

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java?rev=1696865&r1=1696864&r2=1696865&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
 Thu Aug 20 22:01:59 2015
@@ -0,0 +1,39 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.disambiguator.ims.IMS;
+
+public class Tester {
+
+  public static void main(String[] args) {
+
+    String modelsDir = "src\\test\\resources\\models\\";
+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+
+    IMS ims = new IMS();
+
+    String test3 = "The summer is almost over and I haven't been to the beach 
even once";
+    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
+    List<String> tempLemmas3 = new ArrayList<String>();
+    for (int i = 0; i < sentence3.length; i++) {
+      String lemma = WSDHelper.getLemmatizer()
+          .lemmatize(sentence3[i], tags3[i]);
+      tempLemmas3.add(lemma);
+    }
+    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+
+    // output
+    List<String[]> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
+    for (int i = 0; i < sentence3.length; i++) {
+      System.out.print(sentence3[i] + " : ");
+      WSDHelper.printResults(ims, senses3.get(i));
+      WSDHelper.print("----------");
+    }
+
+  }
+}
\ No newline at end of file

svn commit: r1696865 [2/2] - in /opennlp/sandbox/opennlp-wsd/src: main/java/opennlp/tools/disambiguator/ main/java/opennlp/tools/disambiguator/contextclustering/ main/java/opennlp/tools/disambiguator/datareader/ main/java/opennlp/tools/disambiguator/im...

Reply via email to