tools...

beylerian Fri, 11 Mar 2016 09:38:23 -0800

Author: beylerian
Date: Fri Mar 11 17:37:07 2016
New Revision: 1734600

URL: http://svn.apache.org/viewvc?rev=1734600&view=rev
Log:
added unit tests, corrected some mistakes, need more unit tests


Added:
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
      - copied, changed from r1733577, 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
      - copied, changed from r1733577, 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCTester.java
Removed:
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCTester.java
Modified:
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContextGenerator.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerator.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
 Fri Mar 11 17:37:07 2016
@@ -27,6 +27,7 @@ public abstract class WSDParameters {
 
   protected boolean isCoarseSense;
   public static boolean isStemCompare;
+  protected boolean returnMultiple;
 
   public static enum SenseSource {
     WORDNET, WSDHELPER, OTHER;
@@ -61,8 +62,17 @@ public abstract class WSDParameters {
     this.senseSource = senseSource;
   }
 
+  public boolean isReturnMultiple() {
+    return returnMultiple;
+  }
+
+  public void setReturnMultiple(boolean returnMultiple) {
+    this.returnMultiple = returnMultiple;
+  }
+
   public WSDParameters() {
     this.isCoarseSense = false;
+    this.returnMultiple = false;
   }
 
   /**

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
 Fri Mar 11 17:37:07 2016
@@ -150,7 +150,7 @@ public abstract class WSDisambiguator {
 
         if (WSDHelper.getNonRelevWordsDef(tokenTags[i]) != null) {
           String s = IMSParameters.SenseSource.WSDHELPER.name() + " "
-              + tokenTags[i];
+              + WSDHelper.getNonRelevWordsDef(tokenTags[i]);
           String[] sense = { s };
 
           senses.add(sense);

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java
 Fri Mar 11 17:37:07 2016
@@ -55,8 +55,9 @@ public class DefaultIMSContextGenerator
   }
 
   public String[] extractSurroundingWords(int index, String[] toks,
-      String[] lemmas) {
+      String[] lemmas, int windowSize) {
 
+    // TODO consider the windowSize 
     ArrayList<String> contextWords = new ArrayList<String>();
 
     for (int i = 0; i < toks.length; i++) {
@@ -123,7 +124,7 @@ public class DefaultIMSContextGenerator
 
     HashSet<String> surroundingWords = new HashSet<>();
     surroundingWords.addAll(Arrays.asList(extractSurroundingWords(index, toks,
-        lemmas)));
+        lemmas, windowSize)));
 
     String[] localCollocations = extractLocalCollocations(index, toks, ngram);
 

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
 Fri Mar 11 17:37:07 2016
@@ -41,16 +41,17 @@ public class IMSME extends WSDisambiguat
 
   protected static IMSContextGenerator cg = new DefaultIMSContextGenerator();
 
-  public IMSME(IMSParameters params){
+  public IMSME(IMSParameters params) {
     this.params = params;
   }
-  
+
   public IMSME(IMSModel model, IMSParameters params) {
     this.imsModel = model;
     this.params = params;
-    
-//    Assert.assertEquals(model.getWindowSize(),params.getWindowSize());
-//    Assert.assertEquals(model.getNgram(),params.getNgram());
+  }
+
+  public IMSModel getModel() {
+    return imsModel;
   }
 
   public void setModel(IMSModel model) {
@@ -65,7 +66,7 @@ public class IMSME extends WSDisambiguat
       TrainingParameters mlParams, IMSParameters imsParams,
       IMSFactory imsfactory) throws IOException {
 
-    ArrayList<String> surroundingWordModel = buildSurroundingWords(samples);
+    ArrayList<String> surroundingWordModel = buildSurroundingWords(samples, 
imsParams.getWindowSize());
 
     HashMap<String, String> manifestInfoEntries = new HashMap<String, 
String>();
 
@@ -88,13 +89,13 @@ public class IMSME extends WSDisambiguat
 
         events.add(ev);
 
-        es = ObjectStreamUtils.createObjectStream(events);
-
       } while ((sample = samples.read()) != null);
     }
 
-    EventTrainer trainer = TrainerFactory.getEventTrainer(
-        mlParams.getSettings(), manifestInfoEntries);
+    es = ObjectStreamUtils.createObjectStream(events);
+
+    EventTrainer trainer = TrainerFactory
+        .getEventTrainer(mlParams.getSettings(), manifestInfoEntries);
     imsModel = trainer.train(es);
 
     return new IMSModel(lang, wordTag, imsParams.windowSize, imsParams.ngram,
@@ -102,13 +103,13 @@ public class IMSME extends WSDisambiguat
   }
 
   public static ArrayList<String> buildSurroundingWords(
-      ObjectStream<WSDSample> samples) throws IOException {
+      ObjectStream<WSDSample> samples, int windowSize) throws IOException {
     DefaultIMSContextGenerator imsCG = new DefaultIMSContextGenerator();
     ArrayList<String> surroundingWordsModel = new ArrayList<String>();
     WSDSample sample;
     while ((sample = samples.read()) != null) {
-      String[] words = imsCG.extractSurroundingWords(
-          sample.getTargetPosition(), sample.getSentence(), 
sample.getLemmas());
+      String[] words = 
imsCG.extractSurroundingWords(sample.getTargetPosition(),
+          sample.getSentence(), sample.getLemmas(), windowSize);
 
       if (words.length > 0) {
         for (String word : words) {
@@ -125,10 +126,11 @@ public class IMSME extends WSDisambiguat
     if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
       String wordTag = sample.getTargetWordTag();
 
-      String trainingFile = ((IMSParameters) this.getParams())
-          .getTrainingDataDirectory() + sample.getTargetWordTag();
+      if (imsModel == null
+          || !imsModel.getWordTag().equals(sample.getTargetWordTag())) {
 
-      if (imsModel==null || 
!imsModel.getWordTag().equals(sample.getTargetWordTag())) {
+        String trainingFile = ((IMSParameters) this.getParams())
+            .getTrainingDataDirectory() + sample.getTargetWordTag();
 
         File file = new File(trainingFile + ".ims.model");
         if (file.exists() && !file.isDirectory()) {
@@ -167,11 +169,11 @@ public class IMSME extends WSDisambiguat
           }
 
         } else {
-
           MFS mfs = new MFS();
           return mfs.disambiguate(wordTag);
         }
       } else {
+
         String outcome = "";
 
         String[] context = cg.getContext(sample,
@@ -226,8 +228,8 @@ public class IMSME extends WSDisambiguat
    */
   public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
       String[] lemmas, int index) {
-    return disambiguate(new WSDSample(tokenizedContext, tokenTags, lemmas,
-        index));
+    return disambiguate(
+        new WSDSample(tokenizedContext, tokenTags, lemmas, index));
   }
 
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
 Fri Mar 11 17:37:07 2016
@@ -20,6 +20,7 @@
 package opennlp.tools.disambiguator.ims;
 
 import java.io.File;
+import java.security.InvalidParameterException;
 
 import opennlp.tools.disambiguator.WSDParameters;
 
@@ -34,6 +35,11 @@ public class IMSParameters extends WSDPa
   protected int ngram;
 
   protected String trainingDataDirectory;
+  
+  protected static final int DFLT_WIN_SIZE = 3;
+  protected static final int DFLT_NGRAM = 2;
+  protected static final String DFLT_LANG_CODE = "En";
+  protected static final SenseSource DFLT_SOURCE = SenseSource.WORDNET;
 
   /**
    * This constructor takes only two parameters. The default language used is
@@ -49,8 +55,9 @@ public class IMSParameters extends WSDPa
    *          the source of the training data
    */
   public IMSParameters(int windowSize, int ngram, SenseSource senseSource,
-      String trainingDataDirectory) {
-    this.languageCode = "En";
+      String trainingDataDirectory){
+   
+    this.languageCode = DFLT_LANG_CODE;
     this.windowSize = windowSize;
     this.ngram = ngram;
     this.senseSource = senseSource;
@@ -63,19 +70,7 @@ public class IMSParameters extends WSDPa
   }
 
   public IMSParameters(String trainingDataDirectory) {
-    this(3, 2, SenseSource.WORDNET, trainingDataDirectory);
-
-    File folder = new File(trainingDataDirectory);
-    if (!folder.exists())
-      folder.mkdirs();
-  }
-
-  public IMSParameters() {
-    this(3, 2, SenseSource.WORDNET, null);
-  }
-
-  public IMSParameters(int windowSize, int ngram) {
-    this(windowSize, ngram, SenseSource.WORDNET, null);
+    this(DFLT_WIN_SIZE, DFLT_NGRAM, DFLT_SOURCE, trainingDataDirectory);
   }
 
   public String getLanguageCode() {
@@ -109,7 +104,6 @@ public class IMSParameters extends WSDPa
    * Creates the context generator of IMS
    */
   public IMSContextGenerator createContextGenerator() {
-
     return new DefaultIMSContextGenerator();
   }
 
@@ -123,7 +117,7 @@ public class IMSParameters extends WSDPa
 
   @Override
   public boolean isValid() {
-    // TODO Auto-generated method stub
+    // TODO recheck this pattern switch to maps
     return true;
   }
 

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
 Fri Mar 11 17:37:07 2016
@@ -194,9 +194,12 @@ public class MFS extends WSDisambiguator
       WordPOS wordPOS = new WordPOS(word, pos);
 
       ArrayList<Synset> synsets = wordPOS.getSynsets();
-
-      int size = synsets.size();
-
+      int size;
+      if (this.parameters.isReturnMultiple()) {
+        size = synsets.size();
+      } else {
+        size = 1;
+      }
       String[] senses = new String[size];
 
       for (int i = 0; i < size; i++) {

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContextGenerator.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContextGenerator.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContextGenerator.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContextGenerator.java
 Fri Mar 11 17:37:07 2016
@@ -39,6 +39,7 @@ public class DefaultOSCCContextGenerator
   public String[] extractSurroundingContextClusters(int index, String[] toks,
       String[] tags, String[] lemmas, int windowSize) {
 
+    // TODO consider windowSize
     ArrayList<String> contextClusters = new ArrayList<String>();
 
     for (int i = 0; i < toks.length; i++) {
@@ -49,19 +50,19 @@ public class DefaultOSCCContextGenerator
 
           String lemma = lemmas[i].toLowerCase().replaceAll("[^a-z_]", "")
               .trim();
-          
-          WordPOS word = new WordPOS(lemma, tags[i]);
 
-          // TODO check fix for "_" and null pointers
-          if (lemma.length() > 1 && !lemma.contains("_")) {
-            try{
-            ArrayList<Synset> synsets = word.getSynsets();
-            if (synsets!=null && synsets.size() > 0 ){
-              contextClusters.add(synsets.get(0).getOffset() + "");
-            }
-            }catch(NullPointerException ex)
-            {
-              //TODO tagger mistake add proper exception
+          WordPOS word = new WordPOS(lemma, tags[i]);
+      
+          if (lemma.length() > 1) {
+            try {
+              ArrayList<Synset> synsets = word.getSynsets();
+              if (synsets != null && synsets.size() > 0) {
+                for (Synset syn : synsets){
+                  contextClusters.add(syn.getOffset() + "");
+                }
+              }
+            } catch (NullPointerException ex) {
+              // TODO tagger mistake add proper exception
             }
           }
 
@@ -80,30 +81,32 @@ public class DefaultOSCCContextGenerator
    */
   @Override
   public String[] getContext(int index, String[] toks, String[] tags,
-      String[] lemmas, int windowSize) {
+      String[] lemmas, int windowSize, ArrayList<String> model) {
 
     HashSet<String> surroundingContextClusters = new HashSet<>();
-    surroundingContextClusters.addAll(Arrays
-        .asList(extractSurroundingContextClusters(index, toks, tags, lemmas,
-            windowSize)));
+    surroundingContextClusters
+        .addAll(Arrays.asList(extractSurroundingContextClusters(index, toks,
+            tags, lemmas, windowSize)));
 
-    String[] serializedFeatures = new 
String[surroundingContextClusters.size()];
+    String[] serializedFeatures = new String[model.size()];
 
     int i = 0;
-
-    for (String feature : surroundingContextClusters) {
-      serializedFeatures[i] = "F" + i + "=" + feature;
+    for (String word : model) {
+      if (surroundingContextClusters.contains(word.toString())) {
+        serializedFeatures[i] = "F" + i + "=1";
+      } else {
+        serializedFeatures[i] = "F" + i + "=0";
+      }
       i++;
     }
 
     return serializedFeatures;
-
   }
 
-  public String[] getContext(WSDSample sample, int windowSize) {
+  public String[] getContext(WSDSample sample, int windowSize, 
ArrayList<String> model) {
 
     return getContext(sample.getTargetPosition(), sample.getSentence(),
-        sample.getTags(), sample.getLemmas(), windowSize);
+        sample.getTags(), sample.getLemmas(), windowSize, model);
   }
 
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerator.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerator.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerator.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerator.java
 Fri Mar 11 17:37:07 2016
@@ -19,6 +19,8 @@
 
 package opennlp.tools.disambiguator.oscc;
 
+import java.util.ArrayList;
+
 import opennlp.tools.disambiguator.WSDSample;
 
 /**
@@ -27,7 +29,7 @@ import opennlp.tools.disambiguator.WSDSa
 public interface OSCCContextGenerator {
 
   String[] getContext(int index, String[] toks, String[] tags, String[] lemmas,
-    int windowSize);
+    int windowSize, ArrayList<String> model);
 
-  String[] getContext(WSDSample sample, int windowSize);
+  String[] getContext(WSDSample sample, int windowSize, ArrayList<String> 
model);
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
 Fri Mar 11 17:37:07 2016
@@ -22,7 +22,6 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 
-import junit.framework.Assert;
 import opennlp.tools.disambiguator.WSDHelper;
 import opennlp.tools.disambiguator.WSDSample;
 import opennlp.tools.disambiguator.WSDisambiguator;
@@ -53,11 +52,11 @@ import opennlp.tools.util.TrainingParame
  * Please see {@link DefaultOSCCContextGenerator}
  * 
  * The approach finds the context clusters surrounding the target and uses a
- * classifier to judge on the best case. 
+ * classifier to judge on the best case.
  * 
  * Here an ME classifier is used.
  * 
-*/
+ */
 public class OSCCME extends WSDisambiguator {
 
   protected OSCCModel osccModel;
@@ -69,10 +68,12 @@ public class OSCCME extends WSDisambigua
   }
 
   public OSCCME(OSCCModel model, OSCCParameters params) {
-    this.osccModel = osccModel;
+    this.osccModel = model;
     this.params = params;
+  }
 
-    Assert.assertEquals(model.getWindowSize(), params.getWindowSize());
+  public OSCCModel getModel() {
+    return osccModel;
   }
 
   public void setModel(OSCCModel model) {
@@ -85,7 +86,10 @@ public class OSCCME extends WSDisambigua
 
   public static OSCCModel train(String lang, ObjectStream<WSDSample> samples,
       TrainingParameters mlParams, OSCCParameters osccParams,
-      OSCCFactory imsfactory) throws IOException {
+      OSCCFactory osccFactory) throws IOException {
+
+    ArrayList<String> surroundingClusterModel = buildSurroundingClusters(
+        samples, osccParams.getWindowSize());
 
     HashMap<String, String> manifestInfoEntries = new HashMap<String, 
String>();
 
@@ -99,39 +103,57 @@ public class OSCCME extends WSDisambigua
     if (sample != null) {
       wordTag = sample.getTargetWordTag();
       do {
-
         String sense = sample.getSenseIDs().get(0);
-
-        String[] context = cg.getContext(sample, osccParams.windowSize);
+        String[] context = cg.getContext(sample, osccParams.windowSize,
+            surroundingClusterModel);
         Event ev = new Event(sense + "", context);
-
         events.add(ev);
-
-        es = ObjectStreamUtils.createObjectStream(events);
-
       } while ((sample = samples.read()) != null);
     }
 
-    EventTrainer trainer = TrainerFactory.getEventTrainer(
-        mlParams.getSettings(), manifestInfoEntries);
+    es = ObjectStreamUtils.createObjectStream(events);
+    EventTrainer trainer = TrainerFactory
+        .getEventTrainer(mlParams.getSettings(), manifestInfoEntries);
+
     osccModel = trainer.train(es);
 
-    return new OSCCModel(lang, wordTag, osccParams.windowSize, osccModel, 
manifestInfoEntries, imsfactory);
+    return new OSCCModel(lang, wordTag, osccParams.windowSize, osccModel,
+        surroundingClusterModel, manifestInfoEntries, osccFactory);
   }
 
+  public static ArrayList<String> buildSurroundingClusters(
+      ObjectStream<WSDSample> samples, int windowSize) throws IOException {
+    // TODO modify to clusters
+    DefaultOSCCContextGenerator osccCG = new DefaultOSCCContextGenerator();
+    ArrayList<String> surroundingWordsModel = new ArrayList<String>();
+    WSDSample sample;
+    while ((sample = samples.read()) != null) {
+      String[] words = osccCG.extractSurroundingContextClusters(
+          sample.getTargetPosition(), sample.getSentence(), sample.getTags(),
+          sample.getLemmas(), windowSize);
+
+      if (words.length > 0) {
+        for (String word : words) {
+          surroundingWordsModel.add(word);
+        }
+      }
+    }
+    samples.reset();
+    return surroundingWordsModel;
+  }
 
   @Override
   public String[] disambiguate(WSDSample sample) {
     if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
       String wordTag = sample.getTargetWordTag();
 
-      String trainingFile = ((OSCCParameters) this.getParams())
-          .getTrainingDataDirectory() + sample.getTargetWordTag();
-
       if (osccModel == null
           || !osccModel.getWordTag().equals(sample.getTargetWordTag())) {
 
-        File file = new File(trainingFile + ".ims.model");
+        String trainingFile = ((OSCCParameters) this.getParams())
+            .getTrainingDataDirectory() + sample.getTargetWordTag();
+
+        File file = new File(trainingFile + ".oscc.model");
         if (file.exists() && !file.isDirectory()) {
           try {
             setModel(new OSCCModel(file));
@@ -147,7 +169,8 @@ public class OSCCME extends WSDisambigua
           String outcome = "";
 
           String[] context = cg.getContext(sample,
-              ((OSCCParameters) this.params).windowSize);
+              ((OSCCParameters) this.params).windowSize,
+              osccModel.getContextClusters());
 
           double[] outcomeProbs = osccModel.getOSCCMaxentModel().eval(context);
           outcome = 
osccModel.getOSCCMaxentModel().getBestOutcome(outcomeProbs);
@@ -174,7 +197,8 @@ public class OSCCME extends WSDisambigua
         String outcome = "";
 
         String[] context = cg.getContext(sample,
-            ((OSCCParameters) this.params).windowSize);
+            ((OSCCParameters) this.params).windowSize,
+            osccModel.getContextClusters());
 
         double[] outcomeProbs = osccModel.getOSCCMaxentModel().eval(context);
         outcome = osccModel.getOSCCMaxentModel().getBestOutcome(outcomeProbs);
@@ -223,8 +247,8 @@ public class OSCCME extends WSDisambigua
    */
   public String[] disambiguate(String[] tokenizedContext, String[] tokenTags,
       String[] lemmas, int index) {
-    return disambiguate(new WSDSample(tokenizedContext, tokenTags, lemmas,
-        index));
+    return disambiguate(
+        new WSDSample(tokenizedContext, tokenTags, lemmas, index));
   }
 
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java
 Fri Mar 11 17:37:07 2016
@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Map;
 import java.util.Properties;
 import java.net.URL;
@@ -43,13 +44,13 @@ public class OSCCModel extends BaseModel
   private static final String WINSIZE = "winsize";
   private static final String CONTEXTCLUSTERS = "contextclusters";
 
-  //private ArrayList<String> contextClusters = new ArrayList<String>();
+  private ArrayList<String> contextClusters = new ArrayList<String>();
   private String wordTag;
   private int windowSize;
 
-  /*public ArrayList<String> getContextClusters() {
+  public ArrayList<String> getContextClusters() {
     return contextClusters;
-  }*/
+  }
 
   public int getWindowSize() {
     return windowSize;
@@ -59,9 +60,9 @@ public class OSCCModel extends BaseModel
     this.windowSize = windowSize;
   }
 
- /* public void setContextClusters(ArrayList<String> contextClusters) {
+  public void setContextClusters(ArrayList<String> contextClusters) {
     this.contextClusters = contextClusters;
-  }*/
+  }
 
   public String getWordTag() {
     return wordTag;
@@ -72,7 +73,7 @@ public class OSCCModel extends BaseModel
   }
 
    public OSCCModel(String languageCode, String wordTag, int windowSize,
-   MaxentModel osccModel,
+   MaxentModel osccModel, ArrayList<String> contextClusters,
       Map<String, String> manifestInfoEntries, OSCCFactory factory) {
     super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
 
@@ -80,17 +81,17 @@ public class OSCCModel extends BaseModel
     this.setManifestProperty(WORDTAG, wordTag);
     this.setManifestProperty(WINSIZE, windowSize + "");
     
-//    this.setManifestProperty(CONTEXTCLUSTERS,
-//        StringUtils.join(contextClusters, ","));
+    this.setManifestProperty(CONTEXTCLUSTERS,
+        StringUtils.join(contextClusters, ","));
 
-    //this.contextClusters = contextClusters;
+    this.contextClusters = contextClusters;
     checkArtifactMap();
   }
 
   public OSCCModel(String languageCode, String wordTag, int windowSize,
-      int ngram, MaxentModel osccModel, 
+      int ngram, MaxentModel osccModel, ArrayList<String> contextClusters,
       OSCCFactory factory) {
-    this(languageCode, wordTag, windowSize, osccModel,
+    this(languageCode, wordTag, windowSize, osccModel, contextClusters,
         null, factory);
   }
 
@@ -135,10 +136,10 @@ public class OSCCModel extends BaseModel
 
   public void updateAttributes() {
     Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
-    //String contextClusters = (String) manifest.get(CONTEXTCLUSTERS);
+    String contextClusters = (String) manifest.get(CONTEXTCLUSTERS);
 
-   /* this.contextClusters = new ArrayList(
-        Arrays.asList(contextClusters.split(",")));*/
+    this.contextClusters = new ArrayList(
+        Arrays.asList(contextClusters.split(",")));
     this.wordTag = (String) manifest.get(WORDTAG);
     this.windowSize = Integer.parseInt((String) manifest.get(WINSIZE));
   }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java
 Fri Mar 11 17:37:07 2016
@@ -70,13 +70,11 @@ public class OSCCParameters extends WSDP
   }
 
   public OSCCParameters() {
-    // TODO change the "" into null ??
-    this(DFLT_WIN_SIZE, DFLT_SOURCE, "");
+    this(DFLT_WIN_SIZE, DFLT_SOURCE, null);
   }
 
   public OSCCParameters(int windowSize) {
-    // TODO change the "" into null ??
-    this(windowSize, DFLT_SOURCE, "");
+    this(windowSize, DFLT_SOURCE, null);
   }
 
   public String getLanguageCode() {

Copied: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
 (from r1733577, 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java)
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java?p2=opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java&p1=opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java&r1=1733577&r2=1734600&rev=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
 Fri Mar 11 17:37:07 2016
@@ -19,107 +19,173 @@
 
 package opennlp.tools.disambiguator;
 
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
+import opennlp.tools.disambiguator.ims.IMSFactory;
 import opennlp.tools.disambiguator.ims.IMSME;
+import opennlp.tools.disambiguator.ims.IMSModel;
 import opennlp.tools.disambiguator.ims.IMSParameters;
+import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.Span;
+import opennlp.tools.util.TrainingParameters;
 
 /**
- * This is a typical example of how to call the disambiguation function in the
- * IMS class.
- * <ul>
- * <li>In the 2 first examples, the training data exist, therefore the IMS
- * approach is used.</li>
- * <li>In the 3rd example, the training data for the word to disambiguate are
- * absent, therefore the Most Frequent Sents (MFS) is returend</li>
- * </ul>
+ * This is the test class for {@link IMSME}.
+ * 
+ * The scope of this test is to make sure that the IMS disambiguator code can 
be
+ * executed. This test can not detect mistakes which lead to incorrect feature
+ * generation or other mistakes which decrease the disambiguation performance 
of the
+ * disambiguator.
+ * 
+ * In this test the {@link IMSME} is trained with Semcor and then the computed
+ * model is used to predict sentences from the training sentences.
  */
-public class IMSTester {
-
-  public static void main(String[] args) {
-
-    // TODO write unit test
-    
-    String modelsDir = "src\\test\\resources\\models\\";
+public class IMSMETester {
+  // TODO write more tests
+  // TODO modify when we fix the parameter model
+
+  static String modelsDir = "src\\test\\resources\\models\\";
+  static String trainingDataDirectory = 
"src\\test\\resources\\supervised\\models\\";
+
+  static IMSParameters IMSParams;
+  static IMSME ims;
+  static IMSFactory IMSFactory;
+  static IMSModel model;
+
+  static String test = "please.v";
+  static File outFile;
+
+  static String test1 = "We need to discuss an important topic, please write 
to me soon.";
+  static String test2 = "The component was highly radioactive to the point 
that"
+      + " it has been activated the second it touched water";
+  static String test3 = "The summer is almost over and I did not go to the 
beach even once";
+
+  static String[] sentence1;
+  static String[] sentence2;
+  static String[] sentence3;
+
+  static String[] tags1;
+  static String[] tags2;
+  static String[] tags3;
+
+  static String[] lemmas1;
+  static String[] lemmas2;
+  static String[] lemmas3;
+
+  /*
+   * Setup the testing variables
+   */
+  @BeforeClass
+  public static void setUpAndTraining() {
     WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
     WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
     WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
 
-    IMSParameters params = new IMSParameters("");
-
-    WSDHelper.print(params.getTrainingDataDirectory());
+    sentence1 = WSDHelper.getTokenizer().tokenize(test1);
+    sentence2 = WSDHelper.getTokenizer().tokenize(test2);
+    sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+
+    tags1 = WSDHelper.getTagger().tag(sentence1);
+    tags2 = WSDHelper.getTagger().tag(sentence2);
+    tags3 = WSDHelper.getTagger().tag(sentence3);
 
-    IMSME ims = new IMSME(params);
-
-  
-    // This is how to make the context for one-word-disambiguation using IMS
-     
-    String test1 = "We need to discuss important topic, please write to me 
soon.";
-    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
-    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
     List<String> tempLemmas1 = new ArrayList<String>();
     for (int i = 0; i < sentence1.length; i++) {
-      String lemma = WSDHelper.getLemmatizer()
-          .lemmatize(sentence1[i], tags1[i]);
-      tempLemmas1.add(lemma);
+      tempLemmas1
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]));
     }
-    String[] lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
+    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
 
-    // output
-    String[] senses1 = ims.disambiguate(sentence1, tags1, lemmas1, 8);
-    System.out.print(lemmas1[8] + " :\t");
-    WSDHelper.print(senses1);
-    WSDHelper.print("*****************************");
-
-    // This is how to make the context for disambiguation of span of words
-    
-    String test2 = "The component was highly radioactive to the point that"
-        + " it has been activated the second it touched water";
-    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
-    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
     List<String> tempLemmas2 = new ArrayList<String>();
     for (int i = 0; i < sentence2.length; i++) {
-      String lemma = WSDHelper.getLemmatizer()
-          .lemmatize(sentence2[i], tags2[i]);
-      tempLemmas2.add(lemma);
+      tempLemmas2
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]));
     }
-    String[] lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
-    Span span = new Span(3, 7);
-
-    // output
-    List<String[]> senses2 = ims.disambiguate(sentence2, tags2, lemmas2, span);
-    for (int i = span.getStart(); i < span.getEnd() + 1; i++) {
-      String[] senses = senses2.get(i - span.getStart());
-      System.out.print(lemmas2[i] + " :\t");
-      WSDHelper.print(senses);
-      WSDHelper.print("----------");
-    }
-
-    WSDHelper.print("*****************************");
+    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
 
-    // This is how to make the context for all-words-disambiguation
-    
-    String test3 = "The summer almost over and I not to the beach even once";
-    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
-    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
     List<String> tempLemmas3 = new ArrayList<String>();
     for (int i = 0; i < sentence3.length; i++) {
-      String lemma = WSDHelper.getLemmatizer()
-          .lemmatize(sentence3[i], tags3[i]);
-      tempLemmas3.add(lemma);
+      tempLemmas3
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]));
     }
-    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
 
-    // output
-    List<String[]> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
-    for (int i = 0; i < sentence3.length; i++) {
-      String[] senses = senses3.get(i);
-      System.out.print(lemmas3[i] + " :\t");
-      WSDHelper.print(senses);
-      WSDHelper.print("----------");
+    IMSParams = new IMSParameters("");
+    IMSParams.setTrainingDataDirectory(trainingDataDirectory);
+    IMSFactory = new IMSFactory();
+    TrainingParameters trainingParams = new TrainingParameters();
+    SemcorReaderExtended sr = new SemcorReaderExtended();
+    ObjectStream<WSDSample> sampleStream = sr.getSemcorDataStream(test);
+
+    IMSModel writeModel = null;
+    /*
+     * Tests training the disambiguator We test both writing and reading a 
model
+     * file trained by semcor
+     */
+
+    try {
+      writeModel = IMSME.train("en", sampleStream, trainingParams, IMSParams,
+          IMSFactory);
+      assertNotNull("Checking the model to be written", writeModel);
+      writeModel.writeModel(IMSParams.getTrainingDataDirectory() + test);
+      outFile = new File(
+          IMSParams.getTrainingDataDirectory() + test + ".ims.model");
+      model = new IMSModel(outFile);
+      assertNotNull("Checking the read model", model);
+      ims = new IMSME(model, IMSParams);
+      assertNotNull("Checking the disambiguator", ims);
+    } catch (IOException e1) {
+      e1.printStackTrace();
+      fail("Exception in training");
     }
   }
 
+  /*
+   * Tests disambiguating only one word : The ambiguous word "please"
+   */
+  @Test
+  public void testOneWordDisambiguation() {
+    String[] senses = ims.disambiguate(sentence1, tags1, lemmas1, 8);
+
+    assertEquals("Check number of senses", 1, senses.length);
+  }
+
+  /*
+   * Tests disambiguating a word Span In this case we test a mix of monosemous
+   * and polysemous words as well as words that do not need disambiguation such
+   * as determiners
+   */
+  @Test
+  public void testWordSpanDisambiguation() {
+    Span span = new Span(3, 7);
+    List<String[]> senses = ims.disambiguate(sentence2, tags2, lemmas2, span);
+
+    assertEquals("Check number of returned words", 5, senses.size());
+    assertEquals("Check number of senses", 1, senses.get(0).length);
+    assertEquals("Check monosemous word", 1, senses.get(1).length);
+    assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
+    assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+  }
+
+  /*
+   * Tests disambiguating all the words
+   */
+  @Test
+  public void testAllWordsDisambiguation() {
+    List<String[]> senses = ims.disambiguate(sentence3, tags3, lemmas3);
+
+    assertEquals("Check number of returned words", 15, senses.size());
+    assertEquals("Check preposition", "WSDHELPER personal pronoun",
+        senses.get(6)[0]);
+  }
+
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
 Fri Mar 11 17:37:07 2016
@@ -19,81 +19,137 @@
 
 package opennlp.tools.disambiguator;
 
+import static org.junit.Assert.assertEquals;
+
 import java.util.ArrayList;
 import java.util.List;
 
 import opennlp.tools.disambiguator.lesk.Lesk;
 import opennlp.tools.disambiguator.lesk.LeskParameters;
 import opennlp.tools.disambiguator.lesk.LeskParameters.LESK_TYPE;
+import opennlp.tools.util.Span;
 
+import org.junit.BeforeClass;
 import org.junit.Test;
 
+/**
+ * This is the test class for {@link Lesk}.
+ * 
+ * The scope of this test is to make sure that the Lesk disambiguator code can 
be
+ * executed. This test can not detect mistakes which lead to incorrect feature
+ * generation or other mistakes which decrease the disambiguation performance 
of the
+ * disambiguator.
+ */
 public class LeskTester {
-  @Test
-  public static void main(String[] args) {
+  // TODO write more tests
+
+  static String modelsDir = "src\\test\\resources\\models\\";
+
+  static Lesk lesk;
+
+  static String test1 = "We need to discuss an important topic, please write 
to me soon.";
+  static String test2 = "The component was highly radioactive to the point 
that"
+      + " it has been activated the second it touched water";
+  static String test3 = "The summer is almost over and I did not go to the 
beach even once";
+
+  static String[] sentence1;
+  static String[] sentence2;
+  static String[] sentence3;
+
+  static String[] tags1;
+  static String[] tags2;
+  static String[] tags3;
+
+  static String[] lemmas1;
+  static String[] lemmas2;
+  static String[] lemmas3;
+
+  /*
+   * Setup the testing variables
+   */
+  @BeforeClass
+  public static void setUp() {
 
-    Lesk lesk = new Lesk();
-    LeskParameters params = new LeskParameters();
-    params.setLeskType(LESK_TYPE.LESK_EXT);
-    boolean a[] = { true, true, true, true, true, true, true, true, true, true 
};
-    params.setFeatures(a);
-    lesk.setParams(params);
-    String modelsDir = "src\\test\\resources\\models\\";
     WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
     WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
     WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
 
-    String test1 = "I went to the bank to deposit money.";
-    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
-    int targetWordIndex1 = 5;
-    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
+    sentence1 = WSDHelper.getTokenizer().tokenize(test1);
+    sentence2 = WSDHelper.getTokenizer().tokenize(test2);
+    sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+
+    tags1 = WSDHelper.getTagger().tag(sentence1);
+    tags2 = WSDHelper.getTagger().tag(sentence2);
+    tags3 = WSDHelper.getTagger().tag(sentence3);
+
     List<String> tempLemmas1 = new ArrayList<String>();
     for (int i = 0; i < sentence1.length; i++) {
-      String lemma = WSDHelper.getLemmatizer()
-          .lemmatize(sentence1[i], tags1[i]);
-      tempLemmas1.add(lemma);
+      tempLemmas1
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]));
     }
-    String[] lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
-    String[] results1 = lesk.disambiguate(sentence1, tags1, lemmas1,
-        targetWordIndex1);
-    WSDHelper.print(results1);
-    WSDHelper.printResults(lesk, results1);
-
-    WSDHelper.print("----------------------------------------");
-
-    String test2 = "it was a strong argument that his hypothesis was true";
-    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
-    int targetWordIndex2 = 4;
-    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
+    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
+
     List<String> tempLemmas2 = new ArrayList<String>();
-    for (int i = 0; i < sentence1.length; i++) {
-      String lemma = WSDHelper.getLemmatizer()
-          .lemmatize(sentence2[i], tags2[i]);
-      tempLemmas2.add(lemma);
+    for (int i = 0; i < sentence2.length; i++) {
+      tempLemmas2
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]));
     }
-    String[] lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
-    String[] results2 = lesk.disambiguate(sentence2, tags2, lemmas2,
-        targetWordIndex2);
-    WSDHelper.print(results2);
-    WSDHelper.printResults(lesk, results2);
-    WSDHelper.print("----------------------------------------");
-
-    String test3 = "the component was highly radioactive to the point that it 
has been activated the second it touched water";
-    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
-    int targetWordIndex3 = 3;
-    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
+    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
+
     List<String> tempLemmas3 = new ArrayList<String>();
     for (int i = 0; i < sentence3.length; i++) {
-      String lemma = WSDHelper.getLemmatizer()
-          .lemmatize(sentence3[i], tags3[i]);
-      tempLemmas3.add(lemma);
+      tempLemmas3
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]));
     }
-    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
-    String[] results3 = lesk.disambiguate(sentence3, tags3, lemmas3,
-        targetWordIndex3);
-    WSDHelper.print(results3);
-    WSDHelper.printResults(lesk, results3);
-    WSDHelper.print("----------------------------------------");
+    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+
+    lesk = new Lesk();
+
+    LeskParameters params = new LeskParameters();
+    params.setLeskType(LESK_TYPE.LESK_EXT);
+    boolean a[] = { true, true, true, true, true, true, true, true, true,
+        true };
+    params.setFeatures(a);
+    lesk.setParams(params);
+  }
+
+  /*
+   * Tests disambiguating only one word : The ambiguous word "please"
+   */
+  @Test
+  public void testOneWordDisambiguation() {
+    String[] senses = lesk.disambiguate(sentence1, tags1, lemmas1, 8);
+
+    assertEquals("Check number of senses", 1, senses.length);
+  }
+
+  /*
+   * Tests disambiguating a word Span In this case we test a mix of monosemous
+   * and polysemous words as well as words that do not need disambiguation such
+   * as determiners
+   */
+  @Test
+  public void testWordSpanDisambiguation() {
+    Span span = new Span(3, 7);
+    List<String[]> senses = lesk.disambiguate(sentence2, tags2, lemmas2, span);
+
+    assertEquals("Check number of returned words", 5, senses.size());
+    assertEquals("Check number of senses", 3, senses.get(0).length);
+    assertEquals("Check monosemous word", 1, senses.get(1).length);
+    assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
+    assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+  }
+
+  /*
+   * Tests disambiguating all the words
+   */
+  @Test
+  public void testAllWordsDisambiguation() {
+    List<String[]> senses = lesk.disambiguate(sentence3, tags3, lemmas3);
+
+    assertEquals("Check number of returned words", 15, senses.size());
+    assertEquals("Check preposition", "WSDHELPER personal pronoun",
+        senses.get(6)[0]);
   }
 
 }
\ No newline at end of file

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
 Fri Mar 11 17:37:07 2016
@@ -19,96 +19,128 @@
 
 package opennlp.tools.disambiguator;
 
+import static org.junit.Assert.assertEquals;
+
 import java.util.ArrayList;
 import java.util.List;
+import org.junit.BeforeClass;
+import org.junit.Test;
 
 import opennlp.tools.disambiguator.mfs.MFS;
 import opennlp.tools.util.Span;
 
 /**
- * This is a typical example of how to call the disambiguation function in the
- * MFS class.
+ * This is the test class for {@link MFS}.
+ * 
+ * The scope of this test is to make sure that the MFS disambiguator code can 
be
+ * executed. This test can not detect mistakes which lead to incorrect feature
+ * generation or other mistakes which decrease the disambiguation performance 
of the
+ * disambiguator.
  */
 public class MFSTester {
+  // TODO write more tests
+  // TODO modify when we fix the parameter model
+
+  static String modelsDir = "src\\test\\resources\\models\\";
+
+  static MFS mfs;
+
+  static String test1 = "We need to discuss an important topic, please write 
to me soon.";
+  static String test2 = "The component was highly radioactive to the point 
that"
+      + " it has been activated the second it touched water";
+  static String test3 = "The summer is almost over and I did not go to the 
beach even once";
+
+  static String[] sentence1;
+  static String[] sentence2;
+  static String[] sentence3;
+
+  static String[] tags1;
+  static String[] tags2;
+  static String[] tags3;
+
+  static String[] lemmas1;
+  static String[] lemmas2;
+  static String[] lemmas3;
+
+  /*
+   * Setup the testing variables and the training files
+   */
+  @BeforeClass
+  public static void setUpAndTraining() {
 
-  public static void main(String[] args) {
-    String modelsDir = "src\\test\\resources\\models\\";
     WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
     WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
     WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
 
-    MFS mfs = new MFS();
+    sentence1 = WSDHelper.getTokenizer().tokenize(test1);
+    sentence2 = WSDHelper.getTokenizer().tokenize(test2);
+    sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+
+    tags1 = WSDHelper.getTagger().tag(sentence1);
+    tags2 = WSDHelper.getTagger().tag(sentence2);
+    tags3 = WSDHelper.getTagger().tag(sentence3);
 
-    /**
-     * This is how to make the context for one-word-disambiguation using IMS
-     */
-    String test1 = "We need to discuss important topic, please write to me 
soon.";
-    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
-    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
     List<String> tempLemmas1 = new ArrayList<String>();
     for (int i = 0; i < sentence1.length; i++) {
-      String lemma = WSDHelper.getLemmatizer()
-          .lemmatize(sentence1[i], tags1[i]);
-      tempLemmas1.add(lemma);
+      tempLemmas1
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]));
     }
-    String[] lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
+    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
 
-    // output
-    String[] senses1 = mfs.disambiguate(sentence1, tags1, lemmas1, 8);
-    System.out.print(lemmas1[8] + " :\t");
-    WSDHelper.print(senses1);
-    WSDHelper.print("*****************************");
-
-    /**
-     * This is how to make the context for disambiguation of span of words
-     */
-    String test2 = "The component was highly radioactive to the point that"
-        + " it has been activated the second it touched water";
-    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
-    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
     List<String> tempLemmas2 = new ArrayList<String>();
     for (int i = 0; i < sentence2.length; i++) {
-      String lemma = WSDHelper.getLemmatizer()
-          .lemmatize(sentence2[i], tags2[i]);
-      tempLemmas2.add(lemma);
+      tempLemmas2
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]));
     }
-    String[] lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
-    Span span = new Span(3, 7);
+    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
 
-    // output
-    List<String[]> senses2 = mfs.disambiguate(sentence2, tags2, lemmas2, span);
-    for (int i = span.getStart(); i < span.getEnd() + 1; i++) {
-      String[] senses = senses2.get(i - span.getStart());
-      System.out.print(lemmas2[i] + " :\t");
-      WSDHelper.print(senses);
-      WSDHelper.print("----------");
-    }
-
-    WSDHelper.print("*****************************");
-
-    /**
-     * This is how to make the context for all-words-disambiguation
-     */
-    String test3 = "The summer is almost over and I have not been to the beach 
even once";
-    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
-    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
     List<String> tempLemmas3 = new ArrayList<String>();
     for (int i = 0; i < sentence3.length; i++) {
-      String lemma = WSDHelper.getLemmatizer()
-          .lemmatize(sentence3[i], tags3[i]);
-      tempLemmas3.add(lemma);
+      tempLemmas3
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]));
     }
-    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
 
-    // output
-    List<String[]> senses3 = mfs.disambiguate(sentence3, tags3, lemmas3);
-    for (int i = 0; i < sentence3.length; i++) {
-      String[] senses = senses3.get(i);
-      System.out.print(lemmas3[i] + " :\t");
-      WSDHelper.print(senses);
-      WSDHelper.print("----------");
-    }
+    mfs = new MFS();
 
   }
 
+  /*
+   * Tests disambiguating only one word : The ambiguous word "please"
+   */
+  @Test
+  public void testOneWordDisambiguation() {
+    String[] senses = mfs.disambiguate(sentence1, tags1, lemmas1, 8);
+
+    assertEquals("Check number of senses", 1, senses.length);
+  }
+
+  /*
+   * Tests disambiguating a word Span In this case we test a mix of monosemous
+   * and polysemous words as well as words that do not need disambiguation such
+   * as determiners
+   */
+  @Test
+  public void testWordSpanDisambiguation() {
+    Span span = new Span(3, 7);
+    List<String[]> senses = mfs.disambiguate(sentence2, tags2, lemmas2, span);
+
+    assertEquals("Check number of returned words", 5, senses.size());
+    assertEquals("Check number of senses", 1, senses.get(0).length);
+    assertEquals("Check monosemous word", 1, senses.get(1).length);
+    assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
+    assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+  }
+
+  /*
+   * Tests disambiguating all the words
+   */
+  @Test
+  public void testAllWordsDisambiguation() {
+    List<String[]> senses = mfs.disambiguate(sentence3, tags3, lemmas3);
+
+    assertEquals("Check number of returned words", 15, senses.size());
+    assertEquals("Check preposition", "WSDHELPER personal pronoun",
+        senses.get(6)[0]);
+  }
 }
\ No newline at end of file

Copied: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
 (from r1733577, 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCTester.java)
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java?p2=opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java&p1=opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCTester.java&r1=1733577&r2=1734600&rev=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCTester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
 Fri Mar 11 17:37:07 2016
@@ -19,11 +19,18 @@
 
 package opennlp.tools.disambiguator;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.junit.BeforeClass;
+import org.junit.Test;
+
 import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
 import opennlp.tools.disambiguator.oscc.OSCCFactory;
 import opennlp.tools.disambiguator.oscc.OSCCME;
@@ -33,84 +40,154 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.Span;
 import opennlp.tools.util.TrainingParameters;
 
-public class OSCCTester {
-
-  public static void main(String[] args) {
-
-    SemcorReaderExtended sr = new SemcorReaderExtended();
-
-    String modelsDir = "src\\test\\resources\\models\\";
+/**
+ * This is the test class for {@link OSCCME}.
+ * 
+ * The scope of this test is to make sure that the OSCC disambiguator code can
+ * be executed. This test can not detect mistakes which lead to incorrect
+ * feature generation or other mistakes which decrease the disambiguation
+ * performance of the disambiguator.
+ * 
+ * In this test the {@link OSCCME} is trained with Semcor and then the computed
+ * model is used to predict sentences from the training sentences.
+ */
+public class OSCCMETester {
+  // TODO write more tests
+  // TODO modify when we fix the parameter model
+
+  static String modelsDir = "src\\test\\resources\\models\\";
+  static String trainingDataDirectory = 
"src\\test\\resources\\supervised\\models\\";
+
+  static OSCCParameters OSCCParams;
+  static OSCCME oscc;
+  static OSCCFactory osccFactory;
+  static OSCCModel model;
+
+  static String test = "please.v";
+  static File outFile;
+
+  static String test1 = "We need to discuss an important topic, please write 
to me soon.";
+  static String test2 = "The component was highly radioactive to the point 
that"
+      + " it has been activated the second it touched water";
+  static String test3 = "The summer is almost over and I did not go to the 
beach even once";
+
+  static String[] sentence1;
+  static String[] sentence2;
+  static String[] sentence3;
+
+  static String[] tags1;
+  static String[] tags2;
+  static String[] tags3;
+
+  static String[] lemmas1;
+  static String[] lemmas2;
+  static String[] lemmas3;
+
+  /*
+   * Setup the testing variables
+   */
+  @BeforeClass
+  public static void setUpAndTraining() {
     WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
     WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
     WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
 
-    String test = "write.v";
-    TrainingParameters trainingParams = new TrainingParameters();
-    OSCCParameters OSCCParams = new OSCCParameters("");
-    OSCCFactory OSCCFactory = new OSCCFactory();
+    sentence1 = WSDHelper.getTokenizer().tokenize(test1);
+    sentence2 = WSDHelper.getTokenizer().tokenize(test2);
+    sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+
+    tags1 = WSDHelper.getTagger().tag(sentence1);
+    tags2 = WSDHelper.getTagger().tag(sentence2);
+    tags3 = WSDHelper.getTagger().tag(sentence3);
+
+    List<String> tempLemmas1 = new ArrayList<String>();
+    for (int i = 0; i < sentence1.length; i++) {
+      tempLemmas1
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i], tags1[i]));
+    }
+    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
 
+    List<String> tempLemmas2 = new ArrayList<String>();
+    for (int i = 0; i < sentence2.length; i++) {
+      tempLemmas2
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i], tags2[i]));
+    }
+    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
+
+    List<String> tempLemmas3 = new ArrayList<String>();
+    for (int i = 0; i < sentence3.length; i++) {
+      tempLemmas3
+          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i], tags3[i]));
+    }
+    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+
+    OSCCParams = new OSCCParameters("");
+    OSCCParams.setTrainingDataDirectory(trainingDataDirectory);
+    osccFactory = new OSCCFactory();
+    TrainingParameters trainingParams = new TrainingParameters();
+    SemcorReaderExtended sr = new SemcorReaderExtended();
     ObjectStream<WSDSample> sampleStream = sr.getSemcorDataStream(test);
 
-    OSCCModel model = null;
-    OSCCModel readModel = null;
-    try {
-      model = OSCCME.train("en", sampleStream, trainingParams, OSCCParams,
-          OSCCFactory);
-      model.writeModel(test);
-      File outFile = new File(test + ".OSCC.model");
-      readModel = new OSCCModel(outFile);
+    OSCCModel writeModel = null;
+    /*
+     * Tests training the disambiguator We test both writing and reading a 
model
+     * file trained by semcor
+     */
 
+    try {
+      writeModel = OSCCME.train("en", sampleStream, trainingParams, OSCCParams,
+          osccFactory);
+      assertNotNull("Checking the model to be written", writeModel);
+      writeModel.writeModel(OSCCParams.getTrainingDataDirectory() + test);
+      outFile = new File(
+          OSCCParams.getTrainingDataDirectory() + test + ".oscc.model");
+      model = new OSCCModel(outFile);
+      assertNotNull("Checking the read model", model);
+      oscc = new OSCCME(model, OSCCParams);
+      assertNotNull("Checking the disambiguator", oscc);
     } catch (IOException e1) {
-      // TODO Auto-generated catch block
       e1.printStackTrace();
+      fail("Exception in training");
     }
-    OSCCME OSCC = new OSCCME(readModel, OSCCParams);
+  }
 
-    /**
-     * This is how to make the context for one-word-disambiguation using OSCC
-     */
-    String test1 = "We need to discuss important topic, please write to me 
soon.";
-    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
-    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
-    List<String> tempLemmas1 = new ArrayList<String>();
-    for (int i = 0; i < sentence1.length; i++) {
-      String lemma = WSDHelper.getLemmatizer()
-          .lemmatize(sentence1[i], tags1[i]);
-      tempLemmas1.add(lemma);
-    }
-    String[] lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
+  /*
+   * Tests disambiguating only one word : The ambiguous word "please"
+   */
+  @Test
+  public void testOneWordDisambiguation() {
+    String[] senses = oscc.disambiguate(sentence1, tags1, lemmas1, 8);
 
-    // output
-    String[] senses1 = OSCC.disambiguate(sentence1, tags1, lemmas1, 8);
-    System.out.print(lemmas1[8] + " :\t");
-    WSDHelper.print(senses1);
-    WSDHelper.print("*****************************");
+    assertEquals("Check number of senses", 1, senses.length);
+  }
 
-    /**
-     * This is how to make the context for disambiguation of span of words
-     */
-    String test2 = "The component was highly radioactive to the point that"
-        + " it has been activated the second it touched water";
-    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
-    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
-    List<String> tempLemmas2 = new ArrayList<String>();
-    for (int i = 0; i < sentence2.length; i++) {
-      String lemma = WSDHelper.getLemmatizer()
-          .lemmatize(sentence2[i], tags2[i]);
-      tempLemmas2.add(lemma);
-    }
-    String[] lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
+  /*
+   * Tests disambiguating a word Span In this case we test a mix of monosemous
+   * and polysemous words as well as words that do not need disambiguation such
+   * as determiners
+   */
+  @Test
+  public void testWordSpanDisambiguation() {
     Span span = new Span(3, 7);
+    List<String[]> senses = oscc.disambiguate(sentence2, tags2, lemmas2, span);
 
-    // output
-    List<String[]> senses2 = OSCC.disambiguate(sentence2, tags2, lemmas2, 
span);
-    for (int i = span.getStart(); i < span.getEnd() + 1; i++) {
-      String[] senses = senses2.get(i - span.getStart());
-      System.out.print(lemmas2[i] + " :\t");
-      WSDHelper.print(senses);
-      WSDHelper.print("----------");
-    }
+    assertEquals("Check number of returned words", 5, senses.size());
+    assertEquals("Check number of senses", 1, senses.get(0).length);
+    assertEquals("Check monosemous word", 1, senses.get(1).length);
+    assertEquals("Check preposition", "WSDHELPER to", senses.get(2)[0]);
+    assertEquals("Check determiner", "WSDHELPER determiner", senses.get(3)[0]);
+  }
 
-    WSDHelper.print("*****************************");
+  /*
+   * Tests disambiguating all the words
+   */
+  @Test
+  public void testAllWordsDisambiguation() {
+    List<String[]> senses = oscc.disambiguate(sentence3, tags3, lemmas3);
+
+    assertEquals("Check number of returned words", 15, senses.size());
+    assertEquals("Check preposition", "WSDHELPER personal pronoun",
+        senses.get(6)[0]);
   }
+
 }
\ No newline at end of file

Modified: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java?rev=1734600&r1=1734599&r2=1734600&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
 Fri Mar 11 17:37:07 2016
@@ -1,36 +1,40 @@
 package opennlp.tools.disambiguator;
 
+import java.util.ArrayList;
+import java.util.List;
 
+import opennlp.tools.disambiguator.ims.IMSME;
+import opennlp.tools.disambiguator.ims.IMSParameters;
 
 public class Tester {
 
   public static void main(String[] args) {
-//
-//    String modelsDir = "src\\test\\resources\\models\\";
-//    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
-//    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
-//    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
-//
-//    IMSME ims = new IMSME();
-//
-//    String test3 = "The summer is almost over and I haven't been to the 
beach even once";
-//    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
-//    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
-//    List<String> tempLemmas3 = new ArrayList<String>();
-//    for (int i = 0; i < sentence3.length; i++) {
-//      String lemma = WSDHelper.getLemmatizer()
-//          .lemmatize(sentence3[i], tags3[i]);
-//      tempLemmas3.add(lemma);
-//    }
-//    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
-//
-//    // output
-//    List<String[]> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
-//    for (int i = 0; i < sentence3.length; i++) {
-//      System.out.print(sentence3[i] + " : ");
-//      WSDHelper.printResults(ims, senses3.get(i));
-//      WSDHelper.print("----------");
-//    }
+
+    String modelsDir = "src\\test\\resources\\models\\";
+    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
+    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
+    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
+
+    IMSME ims = new IMSME(new IMSParameters("\\"));
+
+    String test3 = "The summer is almost over and I haven't been to the beach 
even once";
+    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
+    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
+    List<String> tempLemmas3 = new ArrayList<String>();
+    for (int i = 0; i < sentence3.length; i++) {
+      String lemma = WSDHelper.getLemmatizer().lemmatize(sentence3[i],
+          tags3[i]);
+      tempLemmas3.add(lemma);
+    }
+    String[] lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
+
+    // output
+    List<String[]> senses3 = ims.disambiguate(sentence3, tags3, lemmas3);
+    for (int i = 0; i < sentence3.length; i++) {
+      System.out.print(sentence3[i] + " : ");
+      WSDHelper.printResults(ims, senses3.get(i));
+      WSDHelper.print("----------");
+    }
 
   }
 }
\ No newline at end of file

svn commit: r1734600 - in /opennlp/sandbox/opennlp-wsd/src: main/java/opennlp/tools/disambiguator/ main/java/opennlp/tools/disambiguator/ims/ main/java/opennlp/tools/disambiguator/mfs/ main/java/opennlp/tools/disambiguator/oscc/ test/java/opennlp/tools...

Reply via email to