ja...

joern Tue, 18 Aug 2015 15:45:08 -0700

Author: joern
Date: Tue Aug 18 22:44:32 2015
New Revision: 1696509

URL: http://svn.apache.org/r1696509
Log:
OPENNLP-801 Also includes some more cleanups. Thanks to Anthony Beylerian for 
providing a patch!


Added:
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
   (with props)
Modified:
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java?rev=1696509&r1=1696508&r2=1696509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
 Tue Aug 18 22:44:32 2015
@@ -31,10 +31,12 @@ import opennlp.tools.cmdline.CmdLineTool
 import opennlp.tools.cmdline.CmdLineUtil;
 import opennlp.tools.cmdline.PerformanceMonitor;
 import opennlp.tools.cmdline.TerminateToolException;
-import opennlp.tools.disambiguator.Constants;
+import opennlp.tools.disambiguator.WSDHelper;
 import opennlp.tools.disambiguator.WSDSample;
 import opennlp.tools.disambiguator.WSDSampleStream;
 import opennlp.tools.disambiguator.WSDisambiguator;
+import opennlp.tools.disambiguator.ims.IMS;
+import opennlp.tools.disambiguator.lesk.Lesk;
 import opennlp.tools.disambiguator.mfs.MFS;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
@@ -85,7 +87,7 @@ public class DisambiguatorTool extends C
 
         WSDSample sample = WSDSample.parse(line);
 
-        Constants.printResults(disambiguator,
+        WSDHelper.printResults(disambiguator,
             disambiguator.disambiguate(sample));
 
         perfMon.incrementCounter();
@@ -105,7 +107,9 @@ public class DisambiguatorTool extends C
     if (params.getType().equalsIgnoreCase("mfs")) {
       wsd = new MFS();
     } else if (params.getType().equalsIgnoreCase("lesk")) {
+      wsd = new Lesk();
     } else if (params.getType().equalsIgnoreCase("ims")) {
+      wsd = new IMS();
     }
     return wsd;
 

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java?rev=1696509&r1=1696508&r2=1696509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
 Tue Aug 18 22:44:32 2015
@@ -55,7 +55,7 @@ public class FeaturesExtractor {
   private String[] extractPosOfSurroundingWords(String[] sentence,
       int wordIndex, int windowSize) {
 
-    String[] taggedSentence = Loader.getTagger().tag(sentence);
+    String[] taggedSentence = WSDHelper.getTagger().tag(sentence);
 
     String[] tags = new String[2 * windowSize + 1];
 
@@ -75,20 +75,20 @@ public class FeaturesExtractor {
 
   private String[] extractSurroundingWords(String[] sentence, int wordIndex) {
 
-    String[] posTags = Loader.getTagger().tag(sentence);
+    String[] posTags = WSDHelper.getTagger().tag(sentence);
 
     ArrayList<String> contextWords = new ArrayList<String>();
 
     for (int i = 0; i < sentence.length; i++) {
 
-      if (!Constants.stopWords.contains(sentence[i].toLowerCase())
+      if (!WSDHelper.stopWords.contains(sentence[i].toLowerCase())
           && (wordIndex != i)) {
 
         String word = sentence[i].toLowerCase().replaceAll("[^a-z]", 
"").trim();
 
         // if (!word.equals("") /*&& Constants.isRelevant(posTags[i])*/) {
-        if (Loader.getEnglishWords().containsKey(word)) {
-          String lemma = Loader.getLemmatizer().lemmatize(word, posTags[i]);
+        if (WSDHelper.getEnglishWords().containsKey(word)) {
+          String lemma = WSDHelper.getLemmatizer().lemmatize(word, posTags[i]);
           contextWords.add(lemma);
         }
 

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java?rev=1696509&r1=1696508&r2=1696509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
 Tue Aug 18 22:44:32 2015
@@ -42,6 +42,11 @@ public class SynNode {
   public ArrayList<Synset> hyponyms = new ArrayList<Synset>();
   public ArrayList<Synset> meronyms = new ArrayList<Synset>();
   public ArrayList<Synset> holonyms = new ArrayList<Synset>();
+  public ArrayList<Synset> entailments = new ArrayList<Synset>();
+  public ArrayList<Synset> coordinateTerms = new ArrayList<Synset>();
+  public ArrayList<Synset> causes = new ArrayList<Synset>();
+  public ArrayList<Synset> attributes = new ArrayList<Synset>();
+  public ArrayList<Synset> pertainyms = new ArrayList<Synset>();
 
   public ArrayList<WordPOS> synonyms = new ArrayList<WordPOS>();
 
@@ -139,6 +144,101 @@ public class SynNode {
     }
   }
 
+  public void setEntailements() {
+    // PointerUtils pointerUtils = PointerUtils.get();
+    PointerTargetNodeList pentailments = new PointerTargetNodeList();
+    try {
+      pentailments = PointerUtils.getEntailments(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  hypernyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < pentailments.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) pentailments.get(i);
+      this.entailments.add(ptn.getSynset());
+    }
+
+  }
+
+  public void setCoordinateTerms() {
+    // PointerUtils pointerUtils = PointerUtils.get();
+    PointerTargetNodeList pcoordinateTerms = new PointerTargetNodeList();
+    try {
+      pcoordinateTerms = PointerUtils.getCoordinateTerms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  coordinate terms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < pcoordinateTerms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) pcoordinateTerms.get(i);
+      this.coordinateTerms.add(ptn.getSynset());
+    }
+
+  }
+
+  public void setCauses() {
+    // PointerUtils pointerUtils = PointerUtils.get();
+    PointerTargetNodeList pcauses = new PointerTargetNodeList();
+    try {
+      pcauses = PointerUtils.getCauses(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the cause terms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < pcauses.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) pcauses.get(i);
+      this.causes.add(ptn.getSynset());
+    }
+
+  }
+
+  public void setAttributes() {
+    // PointerUtils pointerUtils = PointerUtils.get();
+    PointerTargetNodeList pattributes = new PointerTargetNodeList();
+    try {
+      pattributes = PointerUtils.getAttributes(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the attributes");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < pattributes.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) pattributes.get(i);
+      this.attributes.add(ptn.getSynset());
+    }
+
+  }
+
+  public void setPertainyms() {
+    // PointerUtils pointerUtils = PointerUtils.get();
+    PointerTargetNodeList ppertainyms = new PointerTargetNodeList();
+    try {
+      ppertainyms = PointerUtils.getPertainyms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the pertainyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < ppertainyms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) ppertainyms.get(i);
+      this.pertainyms.add(ptn.getSynset());
+    }
+
+  }
+
   public void setSynonyms() {
     for (Word word : synset.getWords())
       synonyms.add(new WordPOS(word.toString(), word.getPOS()));
@@ -160,18 +260,38 @@ public class SynNode {
     return holonyms;
   }
 
+  public ArrayList<Synset> getEntailments() {
+    return entailments;
+  }
+
+  public ArrayList<Synset> getCoordinateTerms() {
+    return coordinateTerms;
+  }
+
+  public ArrayList<Synset> getCauses() {
+    return causes;
+  }
+
+  public ArrayList<Synset> getAttributes() {
+    return attributes;
+  }
+
+  public ArrayList<Synset> getPertainyms() {
+    return pertainyms;
+  }
+
   public ArrayList<WordPOS> getSynonyms() {
     return synonyms;
   }
-  
+
   public String getGloss() {
     return this.synset.getGloss().toString();
   }
-  
+
   public long getSynsetID() {
     return this.synset.getOffset();
   }
-  
+
   /**
    * Gets the senses of the nodes
    * 
@@ -182,8 +302,9 @@ public class SynNode {
     ArrayList<WordSense> scoredSenses = new ArrayList<WordSense>();
 
     for (int i = 0; i < nodes.size(); i++) {
-      ArrayList<WordPOS> sensesComponents = PreProcessor
-          .getAllRelevantWords(PreProcessor.tokenize(nodes.get(i).getGloss()));
+      ArrayList<WordPOS> sensesComponents = WSDHelper
+          .getAllRelevantWords(WSDHelper.getTokenizer().tokenize(
+              nodes.get(i).getGloss()));
       WordSense wordSense = new WordSense();
       nodes.get(i).setSenseRelevantWords(sensesComponents);
       wordSense.setNode(nodes.get(i));

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java?rev=1696509&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
 Tue Aug 18 22:44:32 2015
@@ -0,0 +1,664 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.POS;
+import net.sf.extjwnl.dictionary.Dictionary;
+import net.sf.extjwnl.dictionary.MorphologicalProcessor;
+import opennlp.tools.cmdline.postag.POSModelLoader;
+import opennlp.tools.disambiguator.lesk.Lesk;
+import opennlp.tools.lemmatizer.SimpleLemmatizer;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+
+public class WSDHelper {
+
+  protected static TokenizerME tokenizer;
+  protected static POSTaggerME tagger;
+  protected static SimpleLemmatizer lemmatizer;
+  protected static Dictionary dictionary;
+  protected static MorphologicalProcessor morph;
+
+  protected static String tokenizerModelPath;
+  protected static String taggerModelPath;
+  protected static String lemmatizerDictionaryPath;
+
+  // local caches for faster lookup
+  private static HashMap<String, Object> stemCache;
+  private static HashMap<String, Object> stopCache;
+  private static HashMap<String, Object> relvCache;
+
+  private static HashMap<String, Object> englishWords;
+
+  // List of all the PoS tags
+  public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ",
+      "JJR", "JJS", "LS", "MD", "NN", "NNS", "NNP", "NNPS", "PDT", "POS",
+      "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH", "VB", "VBD",
+      "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB" };
+
+  // List of the PoS tags of which the senses are to be extracted
+  public static String[] relevantPOS = { "JJ", "JJR", "JJS", "NN", "NNS", "RB",
+      "RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" };
+
+  // List of Negation Words
+  public static ArrayList<String> negationWords = new ArrayList<String>(
+      Arrays.asList("not", "no", "never", "none", "nor", "non"));
+
+  // List of Stop Words
+  public static ArrayList<String> stopWords = new ArrayList<String>(
+      Arrays.asList("a", "able", "about", "above", "according", "accordingly",
+          "across", "actually", "after", "afterwards", "again", "against",
+          "ain't", "all", "allow", "allows", "almost", "alone", "along",
+          "already", "also", "although", "always", "am", "among", "amongst",
+          "an", "and", "another", "any", "anybody", "anyhow", "anyone",
+          "anything", "anyway", "anyways", "anywhere", "apart", "appear",
+          "appreciate", "appropriate", "are", "aren't", "around", "as",
+          "aside", "ask", "asking", "associated", "at", "available", "away",
+          "awfully", "be", "became", "because", "become", "becomes",
+          "becoming", "been", "before", "beforehand", "behind", "being",
+          "believe", "below", "beside", "besides", "best", "better", "between",
+          "beyond", "both", "brief", "but", "by", "came", "can", "cannot",
+          "cant", "can't", "cause", "causes", "certain", "certainly",
+          "changes", "clearly", "c'mon", "co", "com", "come", "comes",
+          "concerning", "consequently", "consider", "considering", "contain",
+          "containing", "contains", "corresponding", "could", "couldn't",
+          "course", "c's", "currently", "definitely", "described", "despite",
+          "did", "didn't", "different", "do", "does", "doesn't", "doing",
+          "done", "don't", "down", "downwards", "during", "each", "edu", "eg",
+          "eight", "either", "else", "elsewhere", "enough", "entirely",
+          "especially", "et", "etc", "even", "ever", "every", "everybody",
+          "everyone", "everything", "everywhere", "ex", "exactly", "example",
+          "except", "far", "few", "fifth", "first", "five", "followed",
+          "following", "follows", "for", "former", "formerly", "forth", "four",
+          "from", "further", "furthermore", "get", "gets", "getting", "given",
+          "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings",
+          "had", "hadn't", "happens", "hardly", "has", "hasn't", "have",
+          "haven't", "having", "he", "hello", "help", "hence", "her", "here",
+          "hereafter", "hereby", "herein", "here's", "hereupon", "hers",
+          "herself", "he's", "hi", "him", "himself", "his", "hither",
+          "hopefully", "how", "howbeit", "however", "i", "i'd", "ie", "if",
+          "ignored", "i'll", "i'm", "immediate", "in", "inasmuch", "inc",
+          "indeed", "indicate", "indicated", "indicates", "inner", "insofar",
+          "instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll",
+          "its", "it's", "itself", "i've", "just", "keep", "keeps", "kept",
+          "know", "known", "knows", "last", "lately", "later", "latter",
+          "latterly", "least", "less", "lest", "let", "let's", "like", "liked",
+          "likely", "little", "look", "looking", "looks", "ltd", "mainly",
+          "many", "may", "maybe", "me", "mean", "meanwhile", "merely", "might",
+          "more", "moreover", "most", "mostly", "much", "must", "my", "myself",
+          "name", "namely", "nd", "near", "nearly", "necessary", "need",
+          "needs", "neither", "never", "nevertheless", "new", "next", "nine",
+          "no", "nobody", "non", "none", "noone", "nor", "normally", "not",
+          "nothing", "novel", "now", "nowhere", "obviously", "of", "off",
+          "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones",
+          "only", "onto", "or", "other", "others", "otherwise", "ought", "our",
+          "ours", "ourselves", "out", "outside", "over", "overall", "own",
+          "particular", "particularly", "per", "perhaps", "placed", "please",
+          "plus", "possible", "presumably", "probably", "provides", "que",
+          "quite", "qv", "rather", "rd", "re", "really", "reasonably",
+          "regarding", "regardless", "regards", "relatively", "respectively",
+          "right", "said", "same", "saw", "say", "saying", "says", "second",
+          "secondly", "see", "seeing", "seem", "seemed", "seeming", "seems",
+          "seen", "self", "selves", "sensible", "sent", "serious", "seriously",
+          "seven", "several", "shall", "she", "should", "shouldn't", "since",
+          "six", "so", "some", "somebody", "somehow", "someone", "something",
+          "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry",
+          "specified", "specify", "specifying", "still", "sub", "such", "sup",
+          "sure", "take", "taken", "tell", "tends", "th", "than", "thank",
+          "thanks", "thanx", "that", "thats", "that's", "the", "their",
+          "theirs", "them", "themselves", "then", "thence", "there",
+          "thereafter", "thereby", "therefore", "therein", "theres", "there's",
+          "thereupon", "these", "they", "they'd", "they'll", "they're",
+          "they've", "think", "third", "this", "thorough", "thoroughly",
+          "those", "though", "three", "through", "throughout", "thru", "thus",
+          "to", "together", "too", "took", "toward", "towards", "tried",
+          "tries", "truly", "try", "trying", "t's", "twice", "two", "un",
+          "under", "unfortunately", "unless", "unlikely", "until", "unto",
+          "up", "upon", "us", "use", "used", "useful", "uses", "using",
+          "usually", "value", "various", "very", "via", "viz", "vs", "want",
+          "wants", "was", "wasn't", "way", "we", "we'd", "welcome", "well",
+          "we'll", "went", "were", "we're", "weren't", "we've", "what",
+          "whatever", "what's", "when", "whence", "whenever", "where",
+          "whereafter", "whereas", "whereby", "wherein", "where's",
+          "whereupon", "wherever", "whether", "which", "while", "whither",
+          "who", "whoever", "whole", "whom", "who's", "whose", "why", "will",
+          "willing", "wish", "with", "within", "without", "wonder", "won't",
+          "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll", "your",
+          "you're", "yours", "yourself", "yourselves", "you've", "zero"));
+
+  public static HashMap<String, Object> getRelvCache() {
+    if (relvCache == null || relvCache.keySet().isEmpty()) {
+      relvCache = new HashMap<String, Object>();
+      for (String t : relevantPOS) {
+        relvCache.put(t, null);
+      }
+    }
+    return relvCache;
+  }
+
+  public static HashMap<String, Object> getStopCache() {
+    if (stopCache == null || stopCache.keySet().isEmpty()) {
+      stopCache = new HashMap<String, Object>();
+      for (String s : stopWords) {
+        stopCache.put(s, null);
+      }
+    }
+    return stopCache;
+  }
+
+  public static HashMap<String, Object> getStemCache() {
+    if (stemCache == null || stemCache.keySet().isEmpty()) {
+      stemCache = new HashMap<String, Object>();
+      for (Object pos : POS.getAllPOS()) {
+        stemCache.put(((POS) pos).getKey(), new HashMap());
+      }
+    }
+    return stemCache;
+  }
+
+  public static HashMap<String, Object> getEnglishWords() {
+    if (englishWords == null || englishWords.keySet().isEmpty()) {
+      englishWords = getEnglishWords(lemmatizerDictionaryPath);
+    }
+    return englishWords;
+  }
+
+  public static MorphologicalProcessor getMorph() {
+    if (morph == null) {
+      getDictionary();
+      morph = dictionary.getMorphologicalProcessor();
+    }
+    return morph;
+  }
+
+  public static Dictionary getDictionary() {
+    if (dictionary == null) {
+      try {
+        dictionary = Dictionary.getDefaultResourceInstance();
+      } catch (JWNLException e) {
+        e.printStackTrace();
+      }
+    }
+    return dictionary;
+  }
+
+  public static SimpleLemmatizer getLemmatizer() {
+    if (lemmatizer == null) {
+      try {
+        lemmatizer = new SimpleLemmatizer(new FileInputStream(
+            lemmatizerDictionaryPath));
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    }
+
+    return lemmatizer;
+  }
+
+  public static POSTaggerME getTagger() {
+    if (tagger == null) {
+      tagger = new POSTaggerME(new POSModelLoader().load(new File(
+          taggerModelPath)));
+    }
+    return tagger;
+  }
+
+  public static TokenizerME getTokenizer() {
+    if (tokenizer == null) {
+      try {
+        tokenizer = new TokenizerME(new TokenizerModel(new FileInputStream(
+            tokenizerModelPath)));
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+
+    }
+    return tokenizer;
+  }
+
+  public static TokenizerME loadTokenizer(String path) {
+    tokenizerModelPath = path;
+    return getTokenizer();
+  }
+
+  public static POSTaggerME loadTagger(String path) {
+    taggerModelPath = path;
+    return getTagger();
+  }
+
+  public static SimpleLemmatizer loadLemmatizer(String path) {
+    lemmatizerDictionaryPath = path;
+    return getLemmatizer();
+  }
+
+  /*
+   * checks if the word is or contains a number
+   */
+  public static boolean containsNumbers(String word) {
+    return word.matches(".*[0-9].*");
+  }
+
+  // Print a text in the console
+  public static void printResults(WSDisambiguator disambiguator,
+      String[] results) {
+
+    if (results != null) {
+
+      String[] parts;
+      String sensekey;
+      if (disambiguator instanceof Lesk) {
+
+        Double score;
+
+        for (int i = 0; i < results.length; i++) {
+          parts = results[i].split(" ");
+          sensekey = parts[1];
+          score = Double.parseDouble(parts[2]);
+          try {
+            print("score : "
+                + score
+                + " for sense "
+                + i
+                + " : "
+                + sensekey
+                + " : "
+                + getDictionary().getWordBySenseKey(sensekey).getSynset()
+                    .getGloss());
+          } catch (JWNLException e) {
+            e.printStackTrace();
+          }
+        }
+      } else {
+        for (int i = 0; i < results.length; i++) {
+          parts = results[i].split(" ");
+          sensekey = parts[1];
+          try {
+            print("sense "
+                + i
+                + " : "
+                + sensekey
+                + " : "
+                + getDictionary().getWordBySenseKey(sensekey).getSynset()
+                    .getGloss());
+          } catch (JWNLException e) {
+            e.printStackTrace();
+          }
+        }
+      }
+    }
+
+  }
+
+  public static void print(Object in) {
+    if (in == null) {
+      System.out.println("object is null");
+    } else {
+      System.out.println(in);
+    }
+  }
+
+  public static void print(Object[] array) {
+    if (array == null) {
+      System.out.println("object is null");
+    } else {
+      System.out.println(Arrays.asList(array));
+    }
+  }
+
+  public static void print(Object[][] array) {
+    if (array == null) {
+      System.out.println("object is null");
+    } else {
+      System.out.print("[");
+      for (int i = 0; i < array.length; i++) {
+        print(array[i]);
+        if (i != array.length - 1) {
+          System.out.print("\n");
+        }
+        print("]");
+      }
+    }
+  }
+
+  /**
+   * Extract the list of ALL English words
+   * 
+   * @param dict
+   *          this file is the same that is used in the simple Lemmatizer
+   *          (i.e.,"en-lemmatizer.dict")
+   * 
+   * @return a list of all the English words
+   */
+  public static HashMap<String, Object> getEnglishWords(String dict) {
+
+    HashMap<String, Object> words = new HashMap<String, Object>();
+
+    BufferedReader br = null;
+
+    File file = new File(lemmatizerDictionaryPath);
+
+    if (file.exists()) {
+
+      try {
+        br = new BufferedReader(new FileReader(file));
+        String line = br.readLine();
+        while (line != null) {
+          line = br.readLine();
+          if (line != null) {
+            String word = line.split("\\t")[0];
+            words.put(word, null);
+          }
+        }
+      } catch (FileNotFoundException e) {
+        e.printStackTrace();
+      } catch (IOException e) {
+        e.printStackTrace();
+      } finally {
+        if (br != null) {
+          try {
+            br.close();
+          } catch (IOException e) {
+            e.printStackTrace();
+          }
+        }
+      }
+      return words;
+    } else {
+      return null;
+    }
+
+  }
+
+  /**
+   * return the PoS (Class POS) out of the PoS-tag
+   * 
+   * @param posTag
+   *          PoS tag (e.g., "JJS", "NNP", etc.)
+   * @return the Part of Speech (type {@link POS})
+   */
+  public static POS getPOS(String posTag) {
+
+    ArrayList<String> adjective = new ArrayList<String>(Arrays.asList("JJ",
+        "JJR", "JJS"));
+    ArrayList<String> adverb = new ArrayList<String>(Arrays.asList("RB", "RBR",
+        "RBS"));
+    ArrayList<String> noun = new ArrayList<String>(Arrays.asList("NN", "NNS",
+        "NNP", "NNPS"));
+    ArrayList<String> verb = new ArrayList<String>(Arrays.asList("VB", "VBD",
+        "VBG", "VBN", "VBP", "VBZ"));
+
+    if (adjective.contains(posTag))
+      return POS.ADJECTIVE;
+    else if (adverb.contains(posTag))
+      return POS.ADVERB;
+    else if (noun.contains(posTag))
+      return POS.NOUN;
+    else if (verb.contains(posTag))
+      return POS.VERB;
+    else
+      return null;
+
+  }
+
+  /**
+   * Check whether a PoS Tag is relevant of not. A PoS Tag is considered
+   * relevant when it corresponds to:
+   * <ul>
+   * <li>VERB</li>
+   * <li>ADJECTIVE</li>
+   * <li>ADVERB</li>
+   * <li>NOUN</li>
+   * </ul>
+   * 
+   * @param posTag
+   *          the PoS Tag to verify the relevance.
+   * @return whether a PoS Tag corresponds to a relevant Part of Speech (type
+   *         {@link POS}) or not ( true} if it is, false} otherwise)
+   */
+  public static boolean isRelevant(String posTag) {
+    return getPOS(posTag) != null;
+  }
+
+  /**
+   * Check whether a PoS Tag is relevant of not. A PoS Tag is considered
+   * relevant when it is:
+   * <ul>
+   * <li>VERB</li>
+   * <li>ADJECTIVE</li>
+   * <li>ADVERB</li>
+   * <li>NOUN</li>
+   * </ul>
+   * 
+   * @param pos
+   *          The Part of Speech of Type {@link POS}
+   * @return whether a Part of Speech is relevant (true) or not (false)
+   */
+  public static boolean isRelevant(POS pos) {
+    return pos.equals(POS.ADJECTIVE) || pos.equals(POS.ADVERB)
+        || pos.equals(POS.NOUN) || pos.equals(POS.VERB);
+  }
+
+  public static String getPOSabbreviation(String posTag) {
+
+    if (posTag == null) {
+      return null;
+    }
+    if (posTag.startsWith("JJ")) {
+      return "a";
+    } else if (posTag.startsWith("RB")) {
+      return "r";
+    } else if (posTag.startsWith("VB") || posTag.equals("MD")) {
+      return "v";
+    } else if (posTag.startsWith("NN")) {
+      return "n";
+    }
+
+    return null;
+
+  }
+
+  /**
+   * Check whether a list of arrays contains an array
+   * 
+   * @param array
+   *          The array To check
+   * @param fullList
+   *          The full list of Arrays
+   * @return whether the {@link ArrayList} of arrays contains the array (true)
+   *         or not (false)
+   */
+  public static boolean belongsTo(String[] array, ArrayList<String[]> 
fullList) {
+    for (String[] refArray : fullList) {
+      if (areStringArraysEqual(array, refArray))
+        return true;
+    }
+    return false;
+  }
+
+  /**
+   * Check whether two arrays of strings are equal
+   * 
+   * @param array1
+   *          first array
+   * @param array2
+   *          second array
+   * @return whether the two arrays are identical (true) or not (false)
+   */
+  public static boolean areStringArraysEqual(String[] array1, String[] array2) 
{
+
+    if (array1.equals(null) || array2.equals(null))
+      return false;
+
+    if (array1.length != array2.length) {
+      return false;
+    }
+    for (int i = 0; i < array1.length; i++) {
+      if (!array1[i].equals(array2[i])) {
+        return false;
+      }
+    }
+
+    return true;
+
+  }
+
+  public static ArrayList<WordPOS> getAllRelevantWords(String[] sentence) {
+
+    ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
+
+    String[] tags = WSDHelper.getTagger().tag(sentence);
+
+    for (int i = 0; i < sentence.length; i++) {
+      if (!WSDHelper.getStopCache().containsKey(sentence[i])) {
+        if (WSDHelper.getRelvCache().containsKey(tags[i])) {
+          relevantWords.add(new WordPOS(sentence[i], tags[i]));
+        }
+
+      }
+    }
+    return relevantWords;
+  }
+
+  public static ArrayList<WordPOS> getAllRelevantWords(WordToDisambiguate 
word) {
+    ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
+
+    String[] tags = WSDHelper.getTagger().tag(word.getSentence());
+
+    for (int i = 0; i < word.getSentence().length; i++) {
+      if (!WSDHelper.getStopCache().containsKey(word.getSentence()[i])) {
+        if (WSDHelper.getRelvCache().containsKey(tags[i])) {
+          WordPOS wordpos = new WordPOS(word.getSentence()[i], tags[i]);
+          if (i == word.getWordIndex()) {
+            wordpos.isTarget = true;
+          }
+          relevantWords.add(wordpos);
+        }
+
+      }
+    }
+    return relevantWords;
+  }
+
+  public static ArrayList<WordPOS> getRelevantWords(WordToDisambiguate word,
+      int winBackward, int winForward) {
+
+    ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
+
+    String[] sentence = word.getSentence();
+    String[] tags = WSDHelper.getTagger().tag(sentence);
+
+    int index = word.getWordIndex();
+
+    for (int i = index - winBackward; i <= index + winForward; i++) {
+
+      if (i >= 0 && i < sentence.length && i != index) {
+        if (!WSDHelper.getStopCache().containsKey(sentence[i])) {
+
+          if (WSDHelper.getRelvCache().containsKey(tags[i])) {
+            relevantWords.add(new WordPOS(sentence[i], tags[i]));
+          }
+
+        }
+      }
+    }
+    return relevantWords;
+  }
+
+  /**
+   * Stem a single word with WordNet dictionnary
+   * 
+   * @param wordToStem
+   *          word to be stemmed
+   * @return stemmed list of words
+   */
+  public static ArrayList<String> StemWordWithWordNet(WordPOS wordToStem) {
+    if (wordToStem == null)
+      return null;
+    ArrayList<String> stems = new ArrayList<String>();
+    try {
+      for (Object pos : POS.getAllPOS()) {
+        stems.addAll(WSDHelper.getMorph().lookupAllBaseForms((POS) pos,
+            wordToStem.getWord()));
+      }
+
+      if (stems.size() > 0)
+        return stems;
+      else {
+        return null;
+      }
+
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    }
+    return null;
+  }
+
+  /**
+   * Stem a single word tries to look up the word in the stemCache HashMap If
+   * the word is not found it is stemmed with WordNet and put into stemCache
+   * 
+   * @param wordToStem
+   *          word to be stemmed
+   * @return stemmed word list, null means the word is incorrect
+   */
+  public static ArrayList<String> Stem(WordPOS wordToStem) {
+
+    // check if we already cached the stem map
+    HashMap posMap = (HashMap) WSDHelper.getStemCache().get(
+        wordToStem.getPOS().getKey());
+
+    // don't check words with digits in them
+    if (WSDHelper.containsNumbers(wordToStem.getWord())) {
+      return null;
+    }
+
+    ArrayList<String> stemList = (ArrayList<String>) posMap.get(wordToStem
+        .getWord());
+    if (stemList != null) { // return it if we already cached it
+      return stemList;
+
+    } else { // unCached list try to stem it
+      stemList = StemWordWithWordNet(wordToStem);
+      if (stemList != null) {
+        // word was recognized and stemmed with wordnet:
+        // add it to cache and return the stemmed list
+        posMap.put(wordToStem.getWord(), stemList);
+        WSDHelper.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
+        return stemList;
+      } else { // could not be stemmed add it anyway (as incorrect with null
+               // list)
+        posMap.put(wordToStem.getWord(), null);
+        WSDHelper.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
+        return null;
+      }
+    }
+  }
+}

Propchange: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java?rev=1696509&r1=1696508&r2=1696509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
 Tue Aug 18 22:44:32 2015
@@ -24,10 +24,10 @@ package opennlp.tools.disambiguator;
  *
  */
 public abstract class WSDParameters {
-  
+
   protected boolean isCoarseSense;
   public static boolean isStemCompare;
-  
+
   public static enum Source {
     WORDNET
   }
@@ -42,14 +42,14 @@ public abstract class WSDParameters {
   public void setCoarseSense(boolean isCoarseSense) {
     this.isCoarseSense = isCoarseSense;
   }
-  
-  public WSDParameters(){
+
+  public WSDParameters() {
     this.isCoarseSense = true;
   }
-  
+
   /**
    * @return checks if the parameters are valid or not
    */
   public abstract boolean isValid();
-  
+
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java?rev=1696509&r1=1696508&r2=1696509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
 Tue Aug 18 22:44:32 2015
@@ -230,7 +230,7 @@ public class WSDSample {
     try {
       return Dictionary
           .getDefaultResourceInstance()
-          .lookupIndexWord(Constants.getPOS(this.getTargetTag()),
+          .lookupIndexWord(WSDHelper.getPOS(this.getTargetTag()),
               this.getTargetWord()).getSenses();
     } catch (JWNLException e) {
       e.printStackTrace();

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java?rev=1696509&r1=1696508&r2=1696509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
 Tue Aug 18 22:44:32 2015
@@ -37,7 +37,7 @@ public class WordPOS {
   private POS pos;
   private String posTag;
   private int wordIndex;
-  public boolean isTarget=false;
+  public boolean isTarget = false;
 
   public WordPOS(String word, String tag) throws IllegalArgumentException {
     if (word == null || tag == null) {
@@ -45,9 +45,9 @@ public class WordPOS {
     }
     this.word = word;
     this.posTag = tag;
-    this.pos = Constants.getPOS(tag);
+    this.pos = WSDHelper.getPOS(tag);
   }
-  
+
   public WordPOS(String word, POS pos) throws IllegalArgumentException {
     if (word == null || pos == null) {
       throw new IllegalArgumentException("Args are null");
@@ -70,7 +70,7 @@ public class WordPOS {
 
   public List getStems() {
     if (stems == null) {
-      return PreProcessor.Stem(this);
+      return WSDHelper.Stem(this);
     } else {
       return stems;
     }
@@ -81,9 +81,9 @@ public class WordPOS {
 
     IndexWord indexWord;
     try {
-      indexWord = Loader.getDictionary().lookupIndexWord(pos, word);
+      indexWord = WSDHelper.getDictionary().lookupIndexWord(pos, word);
       if (indexWord == null) {
-        Constants
+        WSDHelper
             .print("NULL synset probably a POS tagger mistake ! :: [POS] : "
                 + pos.getLabel() + " [word] : " + word);
         return null;
@@ -101,20 +101,17 @@ public class WordPOS {
     // check if there is intersection in the stems;
     List originalList = this.getStems();
     List listToCompare = wordToCompare.getStems();
-    
-    
-    if (originalList == null || listToCompare == null) { 
+
+    if (originalList == null || listToCompare == null) {
       return false;
     } else {
       ListIterator<String> iterator = originalList.listIterator();
-      while (iterator.hasNext())
-      {
-          iterator.set(iterator.next().toLowerCase());
+      while (iterator.hasNext()) {
+        iterator.set(iterator.next().toLowerCase());
       }
       iterator = listToCompare.listIterator();
-      while (iterator.hasNext())
-      {
-          iterator.set(iterator.next().toLowerCase());
+      while (iterator.hasNext()) {
+        iterator.set(iterator.next().toLowerCase());
       }
       return !Collections.disjoint(originalList, listToCompare);
     }
@@ -127,10 +124,10 @@ public class WordPOS {
     ArrayList<String> lemmas_word = new ArrayList();
     ArrayList<String> lemmas_wordToCompare = new ArrayList();
 
-    for (String pos : Constants.allPOS) {
-      Loader.getLemmatizer().lemmatize(wordToCompare.getWord(), pos);
+    for (String pos : WSDHelper.allPOS) {
+      WSDHelper.getLemmatizer().lemmatize(wordToCompare.getWord(), pos);
     }
     return false;
   }
-  
+
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java?rev=1696509&r1=1696508&r2=1696509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
 Tue Aug 18 22:44:32 2015
@@ -19,7 +19,8 @@
 
 package opennlp.tools.disambiguator;
 
-import opennlp.tools.disambiguator.WSDSample;;
+import opennlp.tools.disambiguator.WSDSample;
+import opennlp.tools.disambiguator.SynNode;
 
 public class WordSense implements Comparable {
 

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java?rev=1696509&r1=1696508&r2=1696509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
 Tue Aug 18 22:44:32 2015
@@ -37,7 +37,7 @@ public class WordToDisambiguate {
   protected int sense;
 
   protected ArrayList<String> senseIDs;
-  
+
   public WordToDisambiguate(String[] sentence, int wordIndex)
       throws IllegalArgumentException {
     super();
@@ -47,7 +47,7 @@ public class WordToDisambiguate {
     }
 
     this.sentence = sentence;
-    this.posTags = PreProcessor.tag(sentence);
+    this.posTags = WSDHelper.getTagger().tag(sentence);
 
     this.wordIndex = wordIndex;
 
@@ -63,7 +63,7 @@ public class WordToDisambiguate {
     }
 
     this.sentence = sentence;
-    this.posTags = PreProcessor.tag(sentence);
+    this.posTags = WSDHelper.getTagger().tag(sentence);
 
     this.wordIndex = wordIndex;
 
@@ -79,14 +79,14 @@ public class WordToDisambiguate {
     }
 
     this.sentence = sentence;
-    this.posTags = PreProcessor.tag(sentence);
+    this.posTags = WSDHelper.getTagger().tag(sentence);
 
     this.wordIndex = wordIndex;
 
     this.senseIDs = senseIDs;
   }
 
-  public WordToDisambiguate(String[] sentence,  String[] tokenTags, int 
wordIndex) {
+  public WordToDisambiguate(String[] sentence, String[] tokenTags, int 
wordIndex) {
     this(sentence, wordIndex, -1);
   }
 
@@ -125,20 +125,20 @@ public class WordToDisambiguate {
 
   public String getRawWord() {
 
-    String wordBaseForm = Loader.getLemmatizer().lemmatize(
+    String wordBaseForm = WSDHelper.getLemmatizer().lemmatize(
         this.sentence[wordIndex], this.posTags[wordIndex]);
 
     String ref = "";
 
-    if ((Constants.getPOS(this.posTags[wordIndex]) != null)) {
-      if (Constants.getPOS(this.posTags[wordIndex]).equals(POS.VERB)) {
+    if ((WSDHelper.getPOS(this.posTags[wordIndex]) != null)) {
+      if (WSDHelper.getPOS(this.posTags[wordIndex]).equals(POS.VERB)) {
         ref = wordBaseForm + ".v";
-      } else if (Constants.getPOS(this.posTags[wordIndex]).equals(POS.NOUN)) {
+      } else if (WSDHelper.getPOS(this.posTags[wordIndex]).equals(POS.NOUN)) {
         ref = wordBaseForm + ".n";
-      } else if (Constants.getPOS(this.posTags[wordIndex])
+      } else if (WSDHelper.getPOS(this.posTags[wordIndex])
           .equals(POS.ADJECTIVE)) {
         ref = wordBaseForm + ".a";
-      } else if (Constants.getPOS(this.posTags[wordIndex]).equals(POS.ADVERB)) 
{
+      } else if (WSDHelper.getPOS(this.posTags[wordIndex]).equals(POS.ADVERB)) 
{
         ref = wordBaseForm + ".r";
       }
 
@@ -182,11 +182,10 @@ public class WordToDisambiguate {
   public String toString() {
     return (wordIndex + "\t" + getWord() + "\n" + sentence);
   }
-  
+
   public void print() {
-    Constants.print("Sentence:  " + Arrays.asList(sentence) + "\n" + 
-        "Index: " + wordIndex + "\n" + 
-        "Word: "+ getWord() + "\n" +
-        "Sense ID: " + senseIDs.get(0));
+    WSDHelper.print("Sentence:  " + Arrays.asList(sentence) + "\n" + "Index: "
+        + wordIndex + "\n" + "Word: " + getWord() + "\n" + "Sense ID: "
+        + senseIDs.get(0));
   }
 }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java?rev=1696509&r1=1696508&r2=1696509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/datareader/Word.java
 Tue Aug 18 22:44:32 2015
@@ -19,7 +19,7 @@
 
 package opennlp.tools.disambiguator.datareader;
 
-import opennlp.tools.disambiguator.Constants;
+import opennlp.tools.disambiguator.WSDHelper;
 
 public class Word {
 
@@ -245,14 +245,14 @@ public class Word {
 
     if (this.lemma != null && iword.getLemma() != null) {
       if (iword.getLemma().equals(this.getLemma())
-          && Constants.getPOS(iword.getPos()).equals(
-              Constants.getPOS(this.getPos()))) {
+          && WSDHelper.getPOS(iword.getPos()).equals(
+              WSDHelper.getPOS(this.getPos()))) {
         return true;
       }
     } else {
       if (this.word.equals(iword.getWord())
-          && Constants.getPOSabbreviation(this.getPos()).equals(
-              Constants.getPOSabbreviation(iword.getPos()))) {
+          && WSDHelper.getPOSabbreviation(this.getPos()).equals(
+              WSDHelper.getPOSabbreviation(iword.getPos()))) {
         return true;
       }
     }
@@ -261,7 +261,7 @@ public class Word {
 
   public boolean isInstanceOf(String wordTag) {
 
-    String tag = Constants.getPOSabbreviation(this.getPos());
+    String tag = WSDHelper.getPOSabbreviation(this.getPos());
 
     String oword = wordTag.split("\\.")[0];
     String otag = wordTag.split("\\.")[1];
@@ -286,8 +286,8 @@ public class Word {
     Word iword = (Word) oword;
 
     if (iword.getLemma().equals(this.getLemma())
-        && Constants.getPOS(iword.getPos()).equals(
-            Constants.getPOS(this.getPos()))
+        && WSDHelper.getPOS(iword.getPos()).equals(
+            WSDHelper.getPOS(this.getPos()))
         && iword.getLexsn().equals(this.getLexsn())) {
       return true;
     }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java?rev=1696509&r1=1696508&r2=1696509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
 Tue Aug 18 22:44:32 2015
@@ -22,8 +22,7 @@ package opennlp.tools.disambiguator.ims;
 import java.util.ArrayList;
 
 import net.sf.extjwnl.data.POS;
-import opennlp.tools.disambiguator.Constants;
-import opennlp.tools.disambiguator.PreProcessor;
+import opennlp.tools.disambiguator.WSDHelper;
 import opennlp.tools.disambiguator.WordToDisambiguate;
 
 public class WTDIMS extends WordToDisambiguate {
@@ -49,8 +48,8 @@ public class WTDIMS extends WordToDisamb
 
     // this.word = xmlWord;
 
-    this.sentence = PreProcessor.tokenize(xmlSentence);
-    this.posTags = PreProcessor.tag(this.sentence);
+    this.sentence = WSDHelper.getTokenizer().tokenize(xmlSentence);
+    this.posTags = WSDHelper.getTagger().tag(this.sentence);
 
     for (int i = 0; i < sentence.length; i++) {
       if (xmlrawWord.equals(sentence[i])) {
@@ -67,7 +66,7 @@ public class WTDIMS extends WordToDisamb
     super(wtd.getSentence(), wtd.getWordIndex(), wtd.getSense());
     this.senseIDs = wtd.getSenseIDs();
   }
-  
+
   public WTDIMS(String[] sentence, int wordIndex, ArrayList<String> senseIDs) {
     super(sentence, wordIndex);
     this.senseIDs = senseIDs;
@@ -107,19 +106,19 @@ public class WTDIMS extends WordToDisamb
 
   public String getWordTag() {
 
-    String wordBaseForm = PreProcessor.lemmatize(this.getWord(),
+    String wordBaseForm = WSDHelper.getLemmatizer().lemmatize(this.getWord(),
         this.getPosTag());
 
     String ref = "";
 
-    if ((Constants.getPOS(this.getPosTag()) != null)) {
-      if (Constants.getPOS(this.getPosTag()).equals(POS.VERB)) {
+    if ((WSDHelper.getPOS(this.getPosTag()) != null)) {
+      if (WSDHelper.getPOS(this.getPosTag()).equals(POS.VERB)) {
         ref = wordBaseForm + ".v";
-      } else if (Constants.getPOS(this.getPosTag()).equals(POS.NOUN)) {
+      } else if (WSDHelper.getPOS(this.getPosTag()).equals(POS.NOUN)) {
         ref = wordBaseForm + ".n";
-      } else if (Constants.getPOS(this.getPosTag()).equals(POS.ADJECTIVE)) {
+      } else if (WSDHelper.getPOS(this.getPosTag()).equals(POS.ADJECTIVE)) {
         ref = wordBaseForm + ".a";
-      } else if (Constants.getPOS(this.getPosTag()).equals(POS.ADVERB)) {
+      } else if (WSDHelper.getPOS(this.getPosTag()).equals(POS.ADVERB)) {
         ref = wordBaseForm + ".r";
       }
     }

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java?rev=1696509&r1=1696508&r2=1696509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
 Tue Aug 18 22:44:32 2015
@@ -23,10 +23,8 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
-import opennlp.tools.disambiguator.Constants;
-import opennlp.tools.disambiguator.Loader;
+import opennlp.tools.disambiguator.WSDHelper;
 import opennlp.tools.disambiguator.SynNode;
-import opennlp.tools.disambiguator.PreProcessor;
 import opennlp.tools.disambiguator.WSDParameters;
 import opennlp.tools.disambiguator.WSDSample;
 import opennlp.tools.disambiguator.WSDisambiguator;
@@ -62,7 +60,7 @@ public class Lesk implements WSDisambigu
   }
 
   /**
-   * Initializes the loader object and sets the input parameters
+   * Initializes the WSDParameters object and sets the input parameters
    * 
    * @param Input
    *          Parameters
@@ -103,8 +101,8 @@ public class Lesk implements WSDisambigu
   /**
    * The basic Lesk method where the entire context is considered for overlaps
    * 
-   * @param The
-   *          word to disambiguate
+   * @param sample
+   *          the word sample to disambiguate
    * @return The array of WordSenses with their scores
    */
   public ArrayList<WordSense> basic(WSDSample sample) {
@@ -144,37 +142,11 @@ public class Lesk implements WSDisambigu
   /**
    * The basic Lesk method but applied to a default context windows
    * 
-   * @param The
-   *          word to disambiguate
+   * @param sample
+   *          the word sample to disambiguate
    * @return The array of WordSenses with their scores
    */
   public ArrayList<WordSense> basicContextual(WSDSample sample) {
-    return this.basicContextual(sample, LeskParameters.DFLT_WIN_SIZE);
-  }
-
-  /**
-   * The basic Lesk method but applied to a custom context windows
-   * 
-   * @param The
-   *          word to disambiguate
-   * @param windowSize
-   * @return The array of WordSenses with their scores
-   */
-  public ArrayList<WordSense> basicContextual(WSDSample sample, int 
windowSize) {
-    return this.basicContextual(sample, windowSize, windowSize);
-  }
-
-  /**
-   * The basic Lesk method but applied to a context windows set by custom
-   * backward and forward window lengths
-   * 
-   * @param wtd
-   *          the word to disambiguate
-   * @param windowBackward
-   * @return the array of WordSenses with their scores
-   */
-  public ArrayList<WordSense> basicContextual(WSDSample sample,
-      int windowBackward, int windowForward) {
 
     WordPOS word = new WordPOS(sample.getTargetWord(), sample.getTargetTag());
 
@@ -183,7 +155,8 @@ public class Lesk implements WSDisambigu
 
     int index = sample.getTargetPosition();
 
-    for (int i = index - windowBackward; i <= index + windowForward; i++) {
+    for (int i = index - getParams().win_b_size; i <= index
+        + getParams().win_f_size; i++) {
       if (i >= 0 && i < sample.getSentence().length && i != index) {
         contextWords.add(new WordPOS(sample.getSentence()[i],
             sample.getTags()[i]));
@@ -222,27 +195,14 @@ public class Lesk implements WSDisambigu
    * semantically related feature overlaps across the entire context The 
scoring
    * function uses linear weights.
    * 
-   * @param wtd
-   *          the word to disambiguate
-   * @param depth
-   *          how deep to go into each feature tree
-   * @param depthScoreWeight
-   *          the weighing per depth level
-   * @param includeSynonyms
-   * @param includeHypernyms
-   * @param includeHyponyms
-   * @param includeMeronyms
-   * @param includeHolonyms
+   * @param sample
+   *          the word sample to disambiguate
    * @return the array of WordSenses with their scores
    */
-  public ArrayList<WordSense> extended(WSDSample sample, int depth,
-      double depthScoreWeight, boolean includeSynonyms,
-      boolean includeHypernyms, boolean includeHyponyms,
-      boolean includeMeronyms, boolean includeHolonyms) {
-
-    return extendedContextual(sample, 0, depth, depthScoreWeight,
-        includeSynonyms, includeHypernyms, includeHyponyms, includeMeronyms,
-        includeHolonyms);
+  public ArrayList<WordSense> extended(WSDSample sample) {
+    params.setWin_b_size(0);
+    params.setWin_f_size(0);
+    return extendedContextual(sample);
 
   }
 
@@ -251,117 +211,69 @@ public class Lesk implements WSDisambigu
    * semantically related feature overlaps in a default context window The
    * scoring function uses linear weights.
    * 
-   * @param wtd
-   *          the word to disambiguate
-   * @param depth
-   *          how deep to go into each feature tree
-   * @param depthScoreWeight
-   *          the weighing per depth level
-   * @param includeSynonyms
-   * @param includeHypernyms
-   * @param includeHyponyms
-   * @param includeMeronyms
-   * @param includeHolonyms
-   * @return the array of WordSenses with their scores
-   */
-  public ArrayList<WordSense> extendedContextual(WSDSample sample, int depth,
-      double depthScoreWeight, boolean includeSynonyms,
-      boolean includeHypernyms, boolean includeHyponyms,
-      boolean includeMeronyms, boolean includeHolonyms) {
-
-    return extendedContextual(sample, LeskParameters.DFLT_WIN_SIZE, depth,
-        depthScoreWeight, includeSynonyms, includeHypernyms, includeHyponyms,
-        includeMeronyms, includeHolonyms);
-
-  }
-
-  /**
-   * An extended version of the Lesk approach that takes into consideration
-   * semantically related feature overlaps in a custom context window The
-   * scoring function uses linear weights.
-   * 
-   * @param wtd
-   *          the word to disambiguate
-   * @param windowSize
-   *          the custom context window size
-   * @param depth
-   *          how deep to go into each feature tree
-   * @param depthScoreWeight
-   *          the weighing per depth level
-   * @param includeSynonyms
-   * @param includeHypernyms
-   * @param includeHyponyms
-   * @param includeMeronyms
-   * @param includeHolonyms
-   * @return the array of WordSenses with their scores
-   */
-  public ArrayList<WordSense> extendedContextual(WSDSample sample,
-      int windowSize, int depth, double depthScoreWeight,
-      boolean includeSynonyms, boolean includeHypernyms,
-      boolean includeHyponyms, boolean includeMeronyms, boolean 
includeHolonyms) {
-
-    return extendedContextual(sample, windowSize, windowSize, depth,
-        depthScoreWeight, includeSynonyms, includeHypernyms, includeHyponyms,
-        includeMeronyms, includeHolonyms);
-  }
-
-  /**
-   * An extended version of the Lesk approach that takes into consideration
-   * semantically related feature overlaps in a custom context window The
-   * scoring function uses linear weights.
-   * 
-   * @param wtd
-   *          the word to disambiguate
-   * @param windowBackward
-   *          the custom context backward window size
-   * @param windowForward
-   *          the custom context forward window size
-   * @param depth
-   *          how deep to go into each feature tree
-   * @param depthScoreWeight
-   *          the weighing per depth level
-   * @param includeSynonyms
-   * @param includeHypernyms
-   * @param includeHyponyms
-   * @param includeMeronyms
-   * @param includeHolonyms
+   * @param sample
+   *          the word sample to disambiguate
    * @return the array of WordSenses with their scores
    */
-  public ArrayList<WordSense> extendedContextual(WSDSample sample,
-      int windowBackward, int windowForward, int depth,
-      double depthScoreWeight, boolean includeSynonyms,
-      boolean includeHypernyms, boolean includeHyponyms,
-      boolean includeMeronyms, boolean includeHolonyms) {
-
-    ArrayList<WordSense> scoredSenses = basicContextual(sample, windowBackward,
-        windowForward);
-
+  public ArrayList<WordSense> extendedContextual(WSDSample sample) {
+    ArrayList<WordSense> scoredSenses;
+    if (params.getWin_b_size() == 0 && params.getWin_f_size() == 0) {
+      scoredSenses = basic(sample);
+    } else {
+      scoredSenses = basicContextual(sample);
+    }
     for (WordSense wordSense : scoredSenses) {
 
-      if (includeSynonyms) {
-        wordSense.setScore(wordSense.getScore() + depthScoreWeight
+      if (getParams().getFeatures()[0]) {
+        wordSense.setScore(wordSense.getScore() + getParams().depth_weight
             * assessSynonyms(wordSense.getNode().getSynonyms(), contextWords));
       }
 
-      if (includeHypernyms) {
+      if (getParams().getFeatures()[1]) {
         fathomHypernyms(wordSense, wordSense.getNode().synset, contextWords,
-            depth, depth, depthScoreWeight);
+            params.depth, params.depth, params.depth_weight);
       }
 
-      if (includeHyponyms) {
+      if (getParams().getFeatures()[2]) {
         fathomHyponyms(wordSense, wordSense.getNode().synset, contextWords,
-            depth, depth, depthScoreWeight);
+            params.depth, params.depth, params.depth_weight);
       }
 
-      if (includeMeronyms) {
+      if (getParams().getFeatures()[3]) {
         fathomMeronyms(wordSense, wordSense.getNode().synset, contextWords,
-            depth, depth, depthScoreWeight);
+            params.depth, params.depth, params.depth_weight);
 
       }
 
-      if (includeHolonyms) {
+      if (getParams().getFeatures()[4]) {
         fathomHolonyms(wordSense, wordSense.getNode().synset, contextWords,
-            depth, depth, depthScoreWeight);
+            params.depth, params.depth, params.depth_weight);
+
+      }
+
+      if (getParams().getFeatures()[5]) {
+        fathomEntailments(wordSense, wordSense.getNode().synset, contextWords,
+            params.depth, params.depth, params.depth_weight);
+
+      }
+      if (getParams().getFeatures()[6]) {
+        fathomCoordinateTerms(wordSense, wordSense.getNode().synset,
+            contextWords, params.depth, params.depth, params.depth_weight);
+
+      }
+      if (getParams().getFeatures()[7]) {
+        fathomCauses(wordSense, wordSense.getNode().synset, contextWords,
+            params.depth, params.depth, params.depth_weight);
+
+      }
+      if (getParams().getFeatures()[8]) {
+        fathomAttributes(wordSense, wordSense.getNode().synset, contextWords,
+            params.depth, params.depth, params.depth_weight);
+
+      }
+      if (getParams().getFeatures()[9]) {
+        fathomPertainyms(wordSense, wordSense.getNode().synset, contextWords,
+            params.depth, params.depth, params.depth_weight);
 
       }
 
@@ -371,90 +283,20 @@ public class Lesk implements WSDisambigu
 
   }
 
-  /**
+  /*
    * An extended version of the Lesk approach that takes into consideration
    * semantically related feature overlaps in all the context. The scoring
    * function uses exponential weights.
    * 
-   * @param wtd
-   *          the word to disambiguate
-   * @param depth
-   *          how deep to go into each feature tree
-   * @param intersectionExponent
-   * @param depthExponent
-   * @param includeSynonyms
-   * @param includeHypernyms
-   * @param includeHyponyms
-   * @param includeMeronyms
-   * @param includeHolonyms
-   * @return the array of WordSenses with their scores
-   */
-  public ArrayList<WordSense> extendedExponential(WSDSample sample, int depth,
-      double intersectionExponent, double depthExponent,
-      boolean includeSynonyms, boolean includeHypernyms,
-      boolean includeHyponyms, boolean includeMeronyms, boolean 
includeHolonyms) {
-
-    return extendedExponentialContextual(sample, 0, depth,
-        intersectionExponent, depthExponent, includeSynonyms, includeHypernyms,
-        includeHyponyms, includeMeronyms, includeHolonyms);
-
-  }
-
-  /**
-   * An extended version of the Lesk approach that takes into consideration
-   * semantically related feature overlaps in a default window in the context.
-   * The scoring function uses exponential weights.
+   * @param sample the word sample to disambiguate
    * 
-   * @param wtd
-   *          the word to disambiguate
-   * @param depth
-   *          how deep to go into each feature tree
-   * @param intersectionExponent
-   * @param depthExponent
-   * @param includeSynonyms
-   * @param includeHypernyms
-   * @param includeHyponyms
-   * @param includeMeronyms
-   * @param includeHolonyms
    * @return the array of WordSenses with their scores
    */
-  public ArrayList<WordSense> extendedExponentialContextual(WSDSample sample,
-      int depth, double intersectionExponent, double depthExponent,
-      boolean includeSynonyms, boolean includeHypernyms,
-      boolean includeHyponyms, boolean includeMeronyms, boolean 
includeHolonyms) {
-
-    return extendedExponentialContextual(sample, LeskParameters.DFLT_WIN_SIZE,
-        depth, intersectionExponent, depthExponent, includeSynonyms,
-        includeHypernyms, includeHyponyms, includeMeronyms, includeHolonyms);
-  }
+  public ArrayList<WordSense> extendedExponential(WSDSample sample) {
+    params.setWin_b_size(0);
+    params.setWin_f_size(0);
+    return extendedExponentialContextual(sample);
 
-  /**
-   * An extended version of the Lesk approach that takes into consideration
-   * semantically related feature overlaps in a custom window in the context.
-   * The scoring function uses exponential weights.
-   * 
-   * @param wtd
-   *          the word to disambiguate
-   * @param windowSize
-   * @param depth
-   *          how deep to go into each feature tree
-   * @param intersectionExponent
-   * @param depthExponent
-   * @param includeSynonyms
-   * @param includeHypernyms
-   * @param includeHyponyms
-   * @param includeMeronyms
-   * @param includeHolonyms
-   * @return the array of WordSenses with their scores
-   */
-  public ArrayList<WordSense> extendedExponentialContextual(WSDSample sample,
-      int windowSize, int depth, double intersectionExponent,
-      double depthExponent, boolean includeSynonyms, boolean includeHypernyms,
-      boolean includeHyponyms, boolean includeMeronyms, boolean 
includeHolonyms) {
-
-    return extendedExponentialContextual(sample, windowSize, windowSize, depth,
-        intersectionExponent, depthExponent, includeSynonyms, includeHypernyms,
-        includeHyponyms, includeMeronyms, includeHolonyms);
   }
 
   /**
@@ -462,58 +304,73 @@ public class Lesk implements WSDisambigu
    * semantically related feature overlaps in a custom window in the context.
    * The scoring function uses exponential weights.
    * 
-   * @param wtd
-   *          the word to disambiguate
-   * @param windowBackward
-   * @param windowForward
-   * @param depth
-   * @param intersectionExponent
-   * @param depthExponent
-   * @param includeSynonyms
-   * @param includeHypernyms
-   * @param includeHyponyms
-   * @param includeMeronyms
-   * @param includeHolonyms
+   * @param sample
+   *          the word sample to disambiguate
    * @return the array of WordSenses with their scores
    */
-  public ArrayList<WordSense> extendedExponentialContextual(WSDSample sample,
-      int windowBackward, int windowForward, int depth,
-      double intersectionExponent, double depthExponent,
-      boolean includeSynonyms, boolean includeHypernyms,
-      boolean includeHyponyms, boolean includeMeronyms, boolean 
includeHolonyms) {
-
-    ArrayList<WordSense> scoredSenses = basicContextual(sample, windowForward,
-        windowBackward);
+  public ArrayList<WordSense> extendedExponentialContextual(WSDSample sample) {
+    ArrayList<WordSense> scoredSenses;
+    if (params.getWin_b_size() == 0 && params.getWin_f_size() == 0) {
+      scoredSenses = basic(sample);
+    } else {
+      scoredSenses = basicContextual(sample);
+    }
 
     for (WordSense wordSense : scoredSenses) {
 
-      if (includeSynonyms) {
+      if (params.features[0]) {
         wordSense.setScore(wordSense.getScore()
             + Math
                 .pow(
                     assessSynonyms(wordSense.getNode().getSynonyms(),
-                        contextWords), intersectionExponent));
+                        contextWords), params.iexp));
       }
 
-      if (includeHypernyms) {
+      if (params.features[1]) {
         fathomHypernymsExponential(wordSense, wordSense.getNode().synset,
-            contextWords, depth, depth, intersectionExponent, depthExponent);
+            contextWords, params.depth, params.depth, params.iexp, 
params.dexp);
       }
 
-      if (includeHyponyms) {
+      if (params.features[2]) {
         fathomHyponymsExponential(wordSense, wordSense.getNode().synset,
-            contextWords, depth, depth, intersectionExponent, depthExponent);
+            contextWords, params.depth, params.depth, params.iexp, 
params.dexp);
       }
 
-      if (includeMeronyms) {
+      if (params.features[3]) {
         fathomMeronymsExponential(wordSense, wordSense.getNode().synset,
-            contextWords, depth, depth, intersectionExponent, depthExponent);
+            contextWords, params.depth, params.depth, params.iexp, 
params.dexp);
 
       }
 
-      if (includeHolonyms) {
+      if (params.features[4]) {
         fathomHolonymsExponential(wordSense, wordSense.getNode().synset,
-            contextWords, depth, depth, intersectionExponent, depthExponent);
+            contextWords, params.depth, params.depth, params.iexp, 
params.dexp);
+
+      }
+
+      if (params.features[5]) {
+        fathomEntailmentsExponential(wordSense, wordSense.getNode().synset,
+            contextWords, params.depth, params.depth, params.iexp, 
params.dexp);
+      }
+
+      if (params.features[6]) {
+        fathomCoordinateTermsExponential(wordSense, wordSense.getNode().synset,
+            contextWords, params.depth, params.depth, params.iexp, 
params.dexp);
+
+      }
+      if (params.features[7]) {
+        fathomCausesExponential(wordSense, wordSense.getNode().synset,
+            contextWords, params.depth, params.depth, params.iexp, 
params.dexp);
+
+      }
+      if (params.features[8]) {
+        fathomAttributesExponential(wordSense, wordSense.getNode().synset,
+            contextWords, params.depth, params.depth, params.iexp, 
params.dexp);
+
+      }
+      if (params.features[9]) {
+        fathomPertainymsExponential(wordSense, wordSense.getNode().synset,
+            contextWords, params.depth, params.depth, params.iexp, 
params.dexp);
 
       }
 
@@ -539,9 +396,9 @@ public class Lesk implements WSDisambigu
     if (depth == 0)
       return;
 
-    String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
-    ArrayList<WordPOS> relvGlossWords = PreProcessor
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);
 
     SynNode childNode = new SynNode(child, relvGlossWords);
@@ -573,9 +430,9 @@ public class Lesk implements WSDisambigu
     if (depth == 0)
       return;
 
-    String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
-    ArrayList<WordPOS> relvGlossWords = PreProcessor
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);
 
     SynNode childNode = new SynNode(child, relvGlossWords);
@@ -606,9 +463,9 @@ public class Lesk implements WSDisambigu
     if (depth == 0)
       return;
 
-    String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
-    ArrayList<WordPOS> relvGlossWords = PreProcessor
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);
 
     SynNode childNode = new SynNode(child, relvGlossWords);
@@ -641,9 +498,9 @@ public class Lesk implements WSDisambigu
     if (depth == 0)
       return;
 
-    String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
-    ArrayList<WordPOS> relvGlossWords = PreProcessor
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);
 
     SynNode childNode = new SynNode(child, relvGlossWords);
@@ -675,9 +532,9 @@ public class Lesk implements WSDisambigu
     if (depth == 0)
       return;
 
-    String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
-    ArrayList<WordPOS> relvGlossWords = PreProcessor
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);
 
     SynNode childNode = new SynNode(child, relvGlossWords);
@@ -710,9 +567,9 @@ public class Lesk implements WSDisambigu
     if (depth == 0)
       return;
 
-    String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
-    ArrayList<WordPOS> relvGlossWords = PreProcessor
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);
 
     SynNode childNode = new SynNode(child, relvGlossWords);
@@ -744,9 +601,9 @@ public class Lesk implements WSDisambigu
     if (depth == 0)
       return;
 
-    String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
-    ArrayList<WordPOS> relvGlossWords = PreProcessor
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);
 
     SynNode childNode = new SynNode(child, relvGlossWords);
@@ -779,9 +636,9 @@ public class Lesk implements WSDisambigu
     if (depth == 0)
       return;
 
-    String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
         child.getGloss().toString());
-    ArrayList<WordPOS> relvGlossWords = PreProcessor
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
         .getAllRelevantWords(tokenizedGloss);
 
     SynNode childNode = new SynNode(child, relvGlossWords);
@@ -797,6 +654,246 @@ public class Lesk implements WSDisambigu
     }
   }
 
+  private void fathomEntailments(WordSense wordSense, Synset child,
+      ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+      double depthScoreWeight) {
+    if (depth == 0)
+      return;
+
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
+        child.getGloss().toString());
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
+        .getAllRelevantWords(tokenizedGloss);
+
+    SynNode childNode = new SynNode(child, relvGlossWords);
+
+    childNode.setEntailements();
+    wordSense.setScore(wordSense.getScore()
+        + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+        * assessFeature(childNode.getEntailments(), relvWords));
+    for (Synset entailment : childNode.getEntailments()) {
+      fathomEntailments(wordSense, entailment, relvGlossWords, depth - 1,
+          maxDepth, depthScoreWeight);
+    }
+
+  }
+
+  private void fathomEntailmentsExponential(WordSense wordSense, Synset child,
+      ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+      double intersectionExponent, double depthScoreExponent) {
+    if (depth == 0)
+      return;
+
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
+        child.getGloss().toString());
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
+        .getAllRelevantWords(tokenizedGloss);
+
+    SynNode childNode = new SynNode(child, relvGlossWords);
+
+    childNode.setEntailements();
+    wordSense.setScore(wordSense.getScore()
+        + Math.pow(assessFeature(childNode.getEntailments(), relvWords),
+            intersectionExponent) / Math.pow(depth, depthScoreExponent));
+    for (Synset entailment : childNode.getEntailments()) {
+      fathomEntailmentsExponential(wordSense, entailment, relvGlossWords,
+          depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
+    }
+
+  }
+
+  private void fathomCoordinateTerms(WordSense wordSense, Synset child,
+      ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+      double depthScoreWeight) {
+    if (depth == 0)
+      return;
+
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
+        child.getGloss().toString());
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
+        .getAllRelevantWords(tokenizedGloss);
+
+    SynNode childNode = new SynNode(child, relvGlossWords);
+
+    childNode.setCoordinateTerms();
+    wordSense.setScore(wordSense.getScore()
+        + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+        * assessFeature(childNode.getCoordinateTerms(), relvWords));
+    for (Synset coordinate : childNode.getCoordinateTerms()) {
+      fathomCoordinateTerms(wordSense, coordinate, relvGlossWords, depth - 1,
+          maxDepth, depthScoreWeight);
+    }
+
+  }
+
+  private void fathomCoordinateTermsExponential(WordSense wordSense,
+      Synset child, ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+      double intersectionExponent, double depthScoreExponent) {
+    if (depth == 0)
+      return;
+
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
+        child.getGloss().toString());
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
+        .getAllRelevantWords(tokenizedGloss);
+
+    SynNode childNode = new SynNode(child, relvGlossWords);
+
+    childNode.setCoordinateTerms();
+    wordSense.setScore(wordSense.getScore()
+        + Math.pow(assessFeature(childNode.getCoordinateTerms(), relvWords),
+            intersectionExponent) / Math.pow(depth, depthScoreExponent));
+    for (Synset coordinate : childNode.getCoordinateTerms()) {
+      fathomCoordinateTermsExponential(wordSense, coordinate, relvGlossWords,
+          depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
+    }
+
+  }
+
+  private void fathomCauses(WordSense wordSense, Synset child,
+      ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+      double depthScoreWeight) {
+    if (depth == 0)
+      return;
+
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
+        child.getGloss().toString());
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
+        .getAllRelevantWords(tokenizedGloss);
+
+    SynNode childNode = new SynNode(child, relvGlossWords);
+
+    childNode.setCauses();
+    wordSense.setScore(wordSense.getScore()
+        + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+        * assessFeature(childNode.getCauses(), relvWords));
+    for (Synset cause : childNode.getCauses()) {
+      fathomEntailments(wordSense, cause, relvGlossWords, depth - 1, maxDepth,
+          depthScoreWeight);
+    }
+
+  }
+
+  private void fathomCausesExponential(WordSense wordSense, Synset child,
+      ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+      double intersectionExponent, double depthScoreExponent) {
+    if (depth == 0)
+      return;
+
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
+        child.getGloss().toString());
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
+        .getAllRelevantWords(tokenizedGloss);
+
+    SynNode childNode = new SynNode(child, relvGlossWords);
+
+    childNode.setCauses();
+    wordSense.setScore(wordSense.getScore()
+        + Math.pow(assessFeature(childNode.getCauses(), relvWords),
+            intersectionExponent) / Math.pow(depth, depthScoreExponent));
+    for (Synset cause : childNode.getCauses()) {
+      fathomCausesExponential(wordSense, cause, relvGlossWords, depth - 1,
+          maxDepth, intersectionExponent, depthScoreExponent);
+    }
+
+  }
+
+  private void fathomAttributes(WordSense wordSense, Synset child,
+      ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+      double depthScoreWeight) {
+    if (depth == 0)
+      return;
+
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
+        child.getGloss().toString());
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
+        .getAllRelevantWords(tokenizedGloss);
+
+    SynNode childNode = new SynNode(child, relvGlossWords);
+
+    childNode.setAttributes();
+    wordSense.setScore(wordSense.getScore()
+        + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+        * assessFeature(childNode.getAttributes(), relvWords));
+    for (Synset attribute : childNode.getAttributes()) {
+      fathomAttributes(wordSense, attribute, relvGlossWords, depth - 1,
+          maxDepth, depthScoreWeight);
+    }
+
+  }
+
+  private void fathomAttributesExponential(WordSense wordSense, Synset child,
+      ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+      double intersectionExponent, double depthScoreExponent) {
+    if (depth == 0)
+      return;
+
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
+        child.getGloss().toString());
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
+        .getAllRelevantWords(tokenizedGloss);
+
+    SynNode childNode = new SynNode(child, relvGlossWords);
+
+    childNode.setAttributes();
+    wordSense.setScore(wordSense.getScore()
+        + Math.pow(assessFeature(childNode.getAttributes(), relvWords),
+            intersectionExponent) / Math.pow(depth, depthScoreExponent));
+    for (Synset attribute : childNode.getAttributes()) {
+      fathomAttributesExponential(wordSense, attribute, relvGlossWords,
+          depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
+    }
+
+  }
+
+  private void fathomPertainyms(WordSense wordSense, Synset child,
+      ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+      double depthScoreWeight) {
+    if (depth == 0)
+      return;
+
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
+        child.getGloss().toString());
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
+        .getAllRelevantWords(tokenizedGloss);
+
+    SynNode childNode = new SynNode(child, relvGlossWords);
+
+    childNode.setPertainyms();
+    wordSense.setScore(wordSense.getScore()
+        + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+        * assessFeature(childNode.getPertainyms(), relvWords));
+    for (Synset pertainym : childNode.getPertainyms()) {
+      fathomPertainyms(wordSense, pertainym, relvGlossWords, depth - 1,
+          maxDepth, depthScoreWeight);
+    }
+
+  }
+
+  private void fathomPertainymsExponential(WordSense wordSense, Synset child,
+      ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+      double intersectionExponent, double depthScoreExponent) {
+    if (depth == 0)
+      return;
+
+    String[] tokenizedGloss = WSDHelper.getTokenizer().tokenize(
+        child.getGloss().toString());
+    ArrayList<WordPOS> relvGlossWords = WSDHelper
+        .getAllRelevantWords(tokenizedGloss);
+
+    SynNode childNode = new SynNode(child, relvGlossWords);
+
+    childNode.setPertainyms();
+    wordSense.setScore(wordSense.getScore()
+        + Math.pow(assessFeature(childNode.getPertainyms(), relvWords),
+            intersectionExponent) / Math.pow(depth, depthScoreExponent));
+    for (Synset pertainym : childNode.getPertainyms()) {
+      fathomPertainymsExponential(wordSense, pertainym, relvGlossWords,
+          depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
+    }
+
+  }
+
   /**
    * Checks if the feature should be counted in the score
    * 
@@ -810,9 +907,9 @@ public class Lesk implements WSDisambigu
     for (Synset synset : featureSynsets) {
       SynNode subNode = new SynNode(synset, relevantWords);
 
-      String[] tokenizedSense = Loader.getTokenizer().tokenize(
+      String[] tokenizedSense = WSDHelper.getTokenizer().tokenize(
           subNode.getGloss());
-      ArrayList<WordPOS> relvSenseWords = PreProcessor
+      ArrayList<WordPOS> relvSenseWords = WSDHelper
           .getAllRelevantWords(tokenizedSense);
 
       for (WordPOS senseWord : relvSenseWords) {
@@ -883,7 +980,7 @@ public class Lesk implements WSDisambigu
   @Override
   public String[] disambiguate(WSDSample sample) {
     // if the word is not relevant return null
-    if (!Constants.isRelevant(sample.getTargetTag())) {
+    if (!WSDHelper.isRelevant(sample.getTargetTag())) {
       return null;
     }
 
@@ -896,70 +993,20 @@ public class Lesk implements WSDisambigu
     case LESK_BASIC_CTXT:
       wsenses = basicContextual(sample);
       break;
-    case LESK_BASIC_CTXT_WIN:
-      wsenses = basicContextual(sample, this.params.win_b_size);
-      break;
-    case LESK_BASIC_CTXT_WIN_BF:
-      wsenses = basicContextual(sample, this.params.win_b_size,
-          this.params.win_f_size);
-      break;
     case LESK_EXT:
-      wsenses = extended(sample, this.params.depth, this.params.depth_weight,
-          this.params.fathom_synonyms, this.params.fathom_hypernyms,
-          this.params.fathom_hyponyms, this.params.fathom_meronyms,
-          this.params.fathom_holonyms);
+      wsenses = extended(sample);
       break;
     case LESK_EXT_CTXT:
-      wsenses = extendedContextual(sample, this.params.depth,
-          this.params.depth_weight, this.params.fathom_synonyms,
-          this.params.fathom_hypernyms, this.params.fathom_hyponyms,
-          this.params.fathom_meronyms, this.params.fathom_holonyms);
-      break;
-    case LESK_EXT_CTXT_WIN:
-      wsenses = extendedContextual(sample, this.params.win_b_size,
-          this.params.depth, this.params.depth_weight,
-          this.params.fathom_synonyms, this.params.fathom_hypernyms,
-          this.params.fathom_hyponyms, this.params.fathom_meronyms,
-          this.params.fathom_holonyms);
-      break;
-    case LESK_EXT_CTXT_WIN_BF:
-      wsenses = extendedContextual(sample, this.params.win_b_size,
-          this.params.win_f_size, this.params.depth, this.params.depth_weight,
-          this.params.fathom_synonyms, this.params.fathom_hypernyms,
-          this.params.fathom_hyponyms, this.params.fathom_meronyms,
-          this.params.fathom_holonyms);
+      wsenses = extendedContextual(sample);
       break;
     case LESK_EXT_EXP:
-      wsenses = extendedExponential(sample, this.params.depth,
-          this.params.iexp, this.params.dexp, this.params.fathom_synonyms,
-          this.params.fathom_hypernyms, this.params.fathom_hyponyms,
-          this.params.fathom_meronyms, this.params.fathom_holonyms);
+      wsenses = extendedExponential(sample);
       break;
     case LESK_EXT_EXP_CTXT:
-      wsenses = extendedExponentialContextual(sample, this.params.depth,
-          this.params.iexp, this.params.dexp, this.params.fathom_synonyms,
-          this.params.fathom_hypernyms, this.params.fathom_hyponyms,
-          this.params.fathom_meronyms, this.params.fathom_holonyms);
-      break;
-    case LESK_EXT_EXP_CTXT_WIN:
-      wsenses = extendedExponentialContextual(sample, this.params.win_b_size,
-          this.params.depth, this.params.iexp, this.params.dexp,
-          this.params.fathom_synonyms, this.params.fathom_hypernyms,
-          this.params.fathom_hyponyms, this.params.fathom_meronyms,
-          this.params.fathom_holonyms);
-      break;
-    case LESK_EXT_EXP_CTXT_WIN_BF:
-      wsenses = extendedExponentialContextual(sample, this.params.win_b_size,
-          this.params.win_f_size, this.params.depth, this.params.iexp,
-          this.params.dexp, this.params.fathom_synonyms,
-          this.params.fathom_hypernyms, this.params.fathom_hyponyms,
-          this.params.fathom_meronyms, this.params.fathom_holonyms);
+      wsenses = extendedExponentialContextual(sample);
       break;
     default:
-      wsenses = extendedExponentialContextual(sample,
-          LeskParameters.DFLT_WIN_SIZE, LeskParameters.DFLT_DEPTH,
-          LeskParameters.DFLT_IEXP, LeskParameters.DFLT_DEXP, true, true, true,
-          true, true);
+      wsenses = extendedExponentialContextual(sample);
       break;
     }

svn commit: r1696509 [1/2] - in /opennlp/sandbox/opennlp-wsd/src: main/java/opennlp/tools/cmdline/disambiguator/ main/java/opennlp/tools/disambiguator/ main/java/opennlp/tools/disambiguator/datareader/ main/java/opennlp/tools/disambiguator/ims/ main/ja...

Reply via email to