Author: joern
Date: Wed Jun 24 20:19:05 2015
New Revision: 1687358

URL: http://svn.apache.org/r1687358
Log:
Added initial version of the wsd component. Thanks to Anthony Beylerian and 
Mondher Bouazizi for the contribution.

Added:
    opennlp/sandbox/opennlp-wsd/
    opennlp/sandbox/opennlp-wsd/src/
    opennlp/sandbox/opennlp-wsd/src/main/
    opennlp/sandbox/opennlp-wsd/src/main/java/
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
    opennlp/sandbox/opennlp-wsd/src/test/
    opennlp/sandbox/opennlp-wsd/src/test/java/
    opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/
    opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/
    opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/
    
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,134 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import net.sf.extjwnl.data.POS;
+
+
+public class Constants {
+       
+       public static String osPathChar = "\\";
+
+       // List of all the PoS tags
+       public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", 
"JJ",
+                       "JJR", "JJS", "LS", "MD", "NN", "NNS", "NNP", "NNPS", 
"PDT", "POS",
+                       "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", 
"UH", "VB",
+                       "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", 
"WRB" };
+
+       // List of the PoS tags of which the senses are to be extracted
+       public static String[] relevantPOS = { "JJ", "JJR", "JJS", "NN", "NNS", 
"RB", "RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" };
+
+       
+       // List of Negation Words
+       public static ArrayList<String> negationWords = new ArrayList<String>(
+                       Arrays.asList("not", "no", "never", "none", "nor", 
"non"));
+               
+       // List of Stop Words
+       public static ArrayList<String> stopWords = new 
ArrayList<String>(Arrays.asList( "a", "able", "about", "above", "according", 
"accordingly", "across", "actually", "after",
+               "afterwards", "again", "against", "ain't", "all", "allow", 
"allows", "almost", "alone", "along", "already", "also",
+               "although", "always", "am", "among", "amongst", "an", "and", 
"another", "any", "anybody", "anyhow", "anyone", "anything",
+               "anyway", "anyways", "anywhere", "apart", "appear", 
"appreciate", "appropriate", "are", "aren't", "around", "as", "aside", "ask",
+               "asking", "associated", "at", "available", "away", "awfully", 
"be", "became", "because", "become", "becomes", "becoming", "been",
+               "before", "beforehand", "behind", "being", "believe", "below", 
"beside", "besides", "best", "better", "between", "beyond", "both",
+               "brief", "but", "by", "came", "can", "cannot", "cant", "can't", 
"cause", "causes", "certain", "certainly", "changes", "clearly",
+               "c'mon", "co", "com", "come", "comes", "concerning", 
"consequently", "consider", "considering", "contain", "containing",
+               "contains", "corresponding", "could", "couldn't", "course", 
"c's", "currently", "definitely", "described", "despite", "did", "didn't",
+               "different", "do", "does", "doesn't", "doing", "done", "don't", 
"down", "downwards", "during", "each", "edu", "eg", "eight",
+               "either", "else", "elsewhere", "enough", "entirely", 
"especially", "et", "etc", "even", "ever", "every", "everybody", "everyone",
+               "everything", "everywhere", "ex", "exactly", "example", 
"except", "far", "few", "fifth", "first", "five", "followed", "following",
+               "follows", "for", "former", "formerly", "forth", "four", 
"from", "further", "furthermore", "get", "gets", "getting", "given",
+               "gives", "go", "goes", "going", "gone", "got", "gotten", 
"greetings", "had", "hadn't", "happens", "hardly", "has", "hasn't",
+               "have", "haven't", "having", "he", "hello", "help", "hence", 
"her", "here", "hereafter", "hereby", "herein", "here's", "hereupon",
+               "hers", "herself", "he's", "hi", "him", "himself", "his", 
"hither", "hopefully", "how", "howbeit", "however", "i", "i'd", "ie", "if",
+               "ignored", "i'll", "i'm", "immediate", "in", "inasmuch", "inc", 
"indeed", "indicate", "indicated", "indicates", "inner", "insofar",
+               "instead", "into", "inward", "is", "isn't", "it", "it'd", 
"it'll", "its", "it's", "itself", "i've", "just", "keep", "keeps", "kept",
+               "know", "known", "knows", "last", "lately", "later", "latter", 
"latterly", "least", "less", "lest", "let", "let's", "like",
+               "liked", "likely", "little", "look", "looking", "looks", "ltd", 
"mainly", "many", "may", "maybe", "me", "mean", "meanwhile",
+               "merely", "might", "more", "moreover", "most", "mostly", 
"much", "must", "my", "myself", "name", "namely", "nd", "near", "nearly",
+               "necessary", "need", "needs", "neither", "never", 
"nevertheless", "new", "next", "nine", "no", "nobody", "non", "none", "noone",
+               "nor", "normally", "not", "nothing", "novel", "now", "nowhere", 
"obviously", "of", "off", "often", "oh", "ok", "okay", "old", "on",
+               "once", "one", "ones", "only", "onto", "or", "other", "others", 
"otherwise", "ought", "our", "ours", "ourselves", "out", "outside",
+               "over", "overall", "own", "particular", "particularly", "per", 
"perhaps", "placed", "please", "plus", "possible", "presumably",
+               "probably", "provides", "que", "quite", "qv", "rather", "rd", 
"re", "really", "reasonably", "regarding", "regardless", "regards",
+               "relatively", "respectively", "right", "said", "same", "saw", 
"say", "saying", "says", "second", "secondly", "see", "seeing",
+               "seem", "seemed", "seeming", "seems", "seen", "self", "selves", 
"sensible", "sent", "serious", "seriously", "seven", "several",
+               "shall", "she", "should", "shouldn't", "since", "six", "so", 
"some", "somebody", "somehow", "someone", "something", "sometime",
+               "sometimes", "somewhat", "somewhere", "soon", "sorry", 
"specified",     "specify", "specifying", "still", "sub", "such", "sup", "sure",
+               "take", "taken", "tell", "tends", "th", "than", "thank", 
"thanks", "thanx", "that", "thats", "that's", "the", "their", "theirs",
+               "them", "themselves", "then", "thence", "there", "thereafter", 
"thereby", "therefore", "therein", "theres", "there's",
+               "thereupon", "these", "they", "they'd", "they'll", "they're", 
"they've", "think", "third", "this", "thorough", "thoroughly",
+               "those", "though", "three", "through", "throughout", "thru", 
"thus", "to", "together", "too", "took", "toward", "towards",
+               "tried", "tries", "truly", "try", "trying", "t's", "twice", 
"two", "un", "under", "unfortunately", "unless", "unlikely", "until",
+               "unto", "up", "upon", "us", "use", "used", "useful", "uses", 
"using", "usually", "value", "various", "very", "via", "viz", "vs",
+               "want", "wants", "was", "wasn't", "way", "we", "we'd", 
"welcome", "well", "we'll", "went", "were", "we're", "weren't", "we've",
+               "what", "whatever", "what's", "when", "whence", "whenever",     
"where", "whereafter", "whereas", "whereby", "wherein", "where's",
+               "whereupon", "wherever", "whether", "which", "while", 
"whither", "who", "whoever", "whole", "whom", "who's", "whose", "why", "will",
+               "willing", "wish", "with", "within", "without", "wonder", 
"won't", "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll",
+               "your", "you're", "yours", "yourself", "yourselves", "you've", 
"zero"));
+               
+       // Print a text in the console
+       public static void print(Object in) {
+               System.out.println(in);
+       }
+
+       public static void print(Object[] array) {
+               System.out.println(Arrays.asList(array));
+       }
+
+       public static void print(Object[][] array) {
+               System.out.print("[");
+               for (int i = 0; i < array.length; i++) {
+                       print(array[i]);
+                       if (i != array.length - 1) {
+                               System.out.print("\n");
+                       }
+                       print("]");
+               }
+       }
+
+       // return the PoS (Class POS) out of the PoS-tag
+       public static POS getPOS(String posTag) {
+
+               ArrayList<String> adjective = new 
ArrayList<String>(Arrays.asList("JJ", "JJR", "JJS"));
+               ArrayList<String> adverb = new 
ArrayList<String>(Arrays.asList("RB", "RBR", "RBS"));
+               ArrayList<String> noun = new 
ArrayList<String>(Arrays.asList("NN", "NNS", "NNP", "NNPS"));
+               ArrayList<String> verb = new 
ArrayList<String>(Arrays.asList("VB", "VBD", "VBG", "VBN", "VBP", "VBZ"));
+
+               if (adjective.contains(posTag)) return POS.ADJECTIVE;
+               else if (adverb.contains(posTag)) return POS.ADVERB;
+               else if (noun.contains(posTag)) return POS.NOUN;
+               else if (verb.contains(posTag)) return POS.VERB;
+               else return null;
+
+       }
+       
+       // Check whether a list of arrays contains an array
+       public static boolean belongsTo(String[] array, ArrayList<String[]> 
fullList) {
+               for (String[] refArray : fullList) {
+                       if (areStringArraysEqual(array, refArray))
+                               return true;
+               }
+               return false;
+       }
+
+       // Check whether two arrays of strings are equal
+       public static boolean areStringArraysEqual(String[] array1, String[] 
array2) {
+
+               if (array1.equals(null) || array2.equals(null))
+                       return false;
+
+               if (array1.length != array2.length) {
+                       return false;
+               }
+               for (int i = 0; i < array1.length; i++) {
+                       if (!array1[i].equals(array2[i])) {
+                               return false;
+                       }
+               }
+
+               return true;
+
+       }
+       
+}

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,227 @@
+package opennlp.tools.disambiguator;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.HashMap;
+
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.POS;
+import net.sf.extjwnl.dictionary.Dictionary;
+import net.sf.extjwnl.dictionary.MorphologicalProcessor;
+import opennlp.tools.cmdline.postag.POSModelLoader;
+import opennlp.tools.lemmatizer.SimpleLemmatizer;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.InvalidFormatException;
+
+public class Loader {
+
+       private static String modelsDir = 
"src\\test\\resources\\opennlp\\tools\\disambiguator\\";
+       
+       private static SentenceDetectorME sdetector;
+       private static Tokenizer tokenizer;
+       private static POSTaggerME tagger;
+       private static NameFinderME nameFinder;
+       private static SimpleLemmatizer lemmatizer;
+
+       private static Dictionary dictionary;
+       private static MorphologicalProcessor morph;
+       private static boolean IsInitialized = false;  
+
+       // local caches for faster lookup
+       private static HashMap<String,Object> stemCache;
+       private static HashMap<String,Object> stopCache;
+       private static HashMap<String,Object> relvCache;
+       
+       
+
+       // Constructor
+       public Loader(){
+               super();
+               load();
+       }
+
+       public static HashMap<String,Object> getRelvCache(){
+               if (relvCache==null || relvCache.keySet().isEmpty()){
+                       relvCache = new HashMap<String, Object>();
+                       for (String t : Constants.relevantPOS){
+                               relvCache.put(t, null);
+                       }
+               }
+               return relvCache;
+       }
+       
+       public static HashMap<String,Object> getStopCache(){
+               if (stopCache==null || stopCache.keySet().isEmpty()){
+                       stopCache = new HashMap<String, Object>();
+                       for (String s : Constants.stopWords){
+                               stopCache.put(s, null);
+                       }
+               }
+               return stopCache;
+       }
+       
+       public static HashMap<String,Object> getStemCache(){
+               if (stemCache==null || stemCache.keySet().isEmpty()){
+                       stemCache = new HashMap<String,Object>();
+                       for (Object pos : POS.getAllPOS()){
+                               stemCache.put(((POS)pos).getKey(),new 
HashMap());
+                       }
+               }
+               return stemCache;
+       }
+       
+       public static MorphologicalProcessor getMorph(){
+               if (morph==null){
+                       morph           = 
dictionary.getMorphologicalProcessor();
+               }
+               return morph;
+       }
+
+       public static Dictionary getDictionary(){
+               if (dictionary==null){
+                       try {
+                               dictionary      = 
Dictionary.getDefaultResourceInstance();
+                       } catch (JWNLException e) {
+                               e.printStackTrace();
+                       }
+               }
+               return dictionary;
+       }
+
+       public static SimpleLemmatizer getLemmatizer(){
+               if (lemmatizer==null){
+                       try {
+                               lemmatizer = new SimpleLemmatizer (new 
FileInputStream(modelsDir + "en-lemmatizer.dict"));
+                       } catch (IOException e) {
+                               e.printStackTrace();
+                       }
+               }
+               
+               return lemmatizer;
+       }
+
+       public static NameFinderME getNameFinder(){
+               if (nameFinder==null){
+                       TokenNameFinderModel nameFinderModel;
+                       try {
+                               nameFinderModel = new TokenNameFinderModel(new 
FileInputStream(modelsDir + "en-ner-person.bin"));
+                               nameFinder = new NameFinderME(nameFinderModel);
+                       } catch (IOException e) {
+                               e.printStackTrace();
+                       }
+               }
+               return nameFinder;
+       }
+
+       public static POSTaggerME getTagger(){
+               if (tagger==null){
+                       POSModel posTaggerModel = new POSModelLoader().load(new 
File(modelsDir + "en-pos-maxent.bin"));
+                       tagger = new POSTaggerME(posTaggerModel);
+               }
+               return tagger;
+       }
+
+       public static SentenceDetectorME getSDetector(){
+               if (sdetector==null){
+                       try {
+                               SentenceModel enSentModel = new 
SentenceModel(new FileInputStream(modelsDir + "en-sent.bin"));
+                               sdetector = new SentenceDetectorME(enSentModel);
+                       } catch (IOException e) {
+                               e.printStackTrace();
+                       }
+               }
+               return sdetector;
+       }
+
+       public static Tokenizer getTokenizer(){
+               if (tokenizer == null){
+                       try {
+                               TokenizerModel  tokenizerModel = new 
TokenizerModel(new FileInputStream(modelsDir + "en-token.bin"));
+                               tokenizer = new TokenizerME(tokenizerModel);
+                       } catch (IOException e) {
+                               e.printStackTrace();
+                       }
+
+               }
+               return tokenizer;
+       }
+
+       public static boolean isInitialized(){
+               return (dictionary !=null
+                               && morph                !=null 
+                               && stemCache    !=null 
+                               && stopCache    !=null
+                               && relvCache    !=null);
+       }
+       
+       public void load(){
+               try {
+                       SentenceModel enSentModel = new SentenceModel(new 
FileInputStream(modelsDir + "en-sent.bin"));
+                       sdetector = new SentenceDetectorME(enSentModel);
+
+                       TokenizerModel TokenizerModel = new TokenizerModel(new 
FileInputStream(modelsDir + "en-token.bin"));
+                       tokenizer = new TokenizerME(TokenizerModel);
+
+
+                       POSModel posTaggerModel = new POSModelLoader().load(new 
File(modelsDir + "en-pos-maxent.bin"));
+                       tagger = new POSTaggerME(posTaggerModel);
+
+                       TokenNameFinderModel nameFinderModel = new 
TokenNameFinderModel(new FileInputStream(modelsDir + "en-ner-person.bin"));
+                       nameFinder = new NameFinderME(nameFinderModel);
+
+                       lemmatizer = new SimpleLemmatizer (new 
FileInputStream(modelsDir + "en-lemmatizer.dict"));
+
+                       dictionary      = 
Dictionary.getDefaultResourceInstance();
+                       morph           = 
dictionary.getMorphologicalProcessor();
+
+                       // loading lookup caches 
+                       stemCache = new HashMap();
+                       for (Object pos : POS.getAllPOS()){
+                               stemCache.put(((POS)pos).getKey(),new 
HashMap());
+                       }
+
+                       stopCache = new HashMap<String, Object>();
+                       for (String s : Constants.stopWords){
+                               stopCache.put(s, null);
+                       }
+                       relvCache = new HashMap<String, Object>();
+                       for (String t : Constants.relevantPOS){
+                               relvCache.put(t, null);
+                       }
+
+
+                       if (isInitialized()){
+                               Constants.print("loading was succesfull");
+                       }else{
+                               Constants.print("loading was unsuccesfull");
+                       }
+
+               } catch (FileNotFoundException e) {
+                       e.printStackTrace();
+               } catch (InvalidFormatException e) {
+                       e.printStackTrace();
+               } catch (IOException e) {
+                       e.printStackTrace();
+               } catch (JWNLException e) {
+                       e.printStackTrace();
+               }
+       }
+
+       public static void unload ()
+       { 
+               dictionary.close();
+       }
+
+
+
+}

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java 
(added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java 
Wed Jun 24 20:19:05 2015
@@ -0,0 +1,158 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.PointerUtils;
+import net.sf.extjwnl.data.Synset;
+import net.sf.extjwnl.data.Word;
+import net.sf.extjwnl.data.list.PointerTargetNode;
+import net.sf.extjwnl.data.list.PointerTargetNodeList;
+
+
+
+/**
+ * Convenience class to access some features.
+ */
+
+public class Node {
+
+  public Synset parent;
+  public Synset synset;
+  
+  protected ArrayList<WordPOS> senseRelevantWords;
+
+  public ArrayList<Synset> hypernyms = new ArrayList<Synset>();
+  public ArrayList<Synset> hyponyms = new ArrayList<Synset>();
+  public ArrayList<Synset> meronyms = new ArrayList<Synset>();
+  public ArrayList<Synset> holonyms = new ArrayList<Synset>();
+  
+  public ArrayList<WordPOS> synonyms = new ArrayList<WordPOS>();
+  
+  
+  public Node(Synset parent, Synset synSet, ArrayList<WordPOS> 
senseRelevantWords) {
+           this.parent = parent;
+           this.synset = synSet;
+           this.senseRelevantWords = senseRelevantWords;
+         }
+         
+  public Node(Synset synSet, ArrayList<WordPOS> senseRelevantWords) {
+                   this.synset = synSet;
+                   this.senseRelevantWords = senseRelevantWords;
+           }
+  
+  
+       public ArrayList<WordPOS> getSenseRelevantWords() {
+               return senseRelevantWords;
+       }
+
+       public void setSenseRelevantWords(ArrayList<WordPOS> 
senseRelevantWords) {
+               this.senseRelevantWords = senseRelevantWords;
+       }
+         
+  public String getSense() {
+    return this.synset.getGloss().toString();
+  }
+
+  
+  public void setHypernyms() {
+  //  PointerUtils pointerUtils = PointerUtils.get();
+    PointerTargetNodeList phypernyms = new PointerTargetNodeList();
+    try {
+      phypernyms = PointerUtils.getDirectHypernyms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  hypernyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < phypernyms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) phypernyms.get(i);
+      this.hypernyms.add(ptn.getSynset());
+    }
+
+  }
+
+  public void setMeronyms() {
+    //PointerUtils pointerUtils = PointerUtils.getInstance();
+    PointerTargetNodeList pmeronyms = new PointerTargetNodeList();
+    try {
+       pmeronyms = PointerUtils.getMeronyms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  meronyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < pmeronyms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) pmeronyms.get(i);
+      this.meronyms.add(ptn.getSynset());
+    }
+  }
+  
+  public void setHolonyms() {
+          // PointerUtils pointerUtils = PointerUtils.getInstance();
+           PointerTargetNodeList pholonyms = new PointerTargetNodeList();
+           try {
+               pholonyms = PointerUtils.getHolonyms(this.synset);
+           } catch (JWNLException e) {
+             e.printStackTrace();
+           } catch (NullPointerException e) {
+             System.err.println("Error finding the  holonyms");
+             e.printStackTrace();
+           }
+
+           for (int i = 0; i < pholonyms.size(); i++) {
+             PointerTargetNode ptn = (PointerTargetNode) pholonyms.get(i);
+             this.holonyms.add(ptn.getSynset());
+           }
+
+         }
+  
+  public void setHyponyms() {
+         //  PointerUtils pointerUtils = PointerUtils.getInstance();
+           PointerTargetNodeList phyponyms = new PointerTargetNodeList();
+           try {
+             phyponyms = PointerUtils.getDirectHyponyms(this.synset);
+           } catch (JWNLException e) {
+             e.printStackTrace();
+           } catch (NullPointerException e) {
+             System.err.println("Error finding the  hyponyms");
+             e.printStackTrace();
+           }
+
+           for (int i = 0; i < phyponyms.size(); i++) {
+             PointerTargetNode ptn = (PointerTargetNode) phyponyms.get(i);
+             this.hyponyms.add(ptn.getSynset());
+           }
+         }
+  
+  public void setSynonyms()
+  {
+    for (Word word : synset.getWords())
+      synonyms.add(new WordPOS(word.toString(),word.getPOS()));
+  }
+  
+  public ArrayList<Synset> getHypernyms() {
+         return hypernyms;
+  }
+  
+  public ArrayList<Synset> getHyponyms() {
+         return hyponyms;
+  }
+  
+  public ArrayList<Synset> getMeronyms() {
+         return meronyms;
+  }
+  public ArrayList<Synset> getHolonyms() {
+         return holonyms;
+  }
+
+  public ArrayList<WordPOS> getSynonyms()
+  {
+    return synonyms;
+  }
+
+}

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,163 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.IndexWord;
+import net.sf.extjwnl.data.POS;
+import opennlp.tools.util.Span;
+
+
+
+public class PreProcessor {
+
+       public PreProcessor() {
+               super();
+       }
+
+       public static String[] split(String text) {
+               return Loader.getSDetector().sentDetect(text);
+       }
+
+       public static String[] tokenize(String sentence) {
+               return Loader.getTokenizer().tokenize(sentence);                
+       }
+
+       public static String[] tag(String[] tokenizedSentence) {
+               return Loader.getTagger().tag(tokenizedSentence);
+       }
+
+       public static String lemmatize(String word, String posTag) {
+               return Loader.getLemmatizer().lemmatize(word, posTag);
+       }
+
+       public static boolean isName(String word) {
+               Span nameSpans[] = Loader.getNameFinder().find(new String[] { 
word });
+               return (nameSpans.length != 0);
+       }
+
+       public static ArrayList<WordPOS> getAllRelevantWords(String[]  
sentence) {
+
+               ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
+
+               String[] tags = tag(sentence);
+
+               for (int i = 0; i<sentence.length; i++) {
+                       if (!Loader.getStopCache().containsKey(sentence[i])) {
+                               if (Loader.getRelvCache().containsKey(tags[i])) 
{
+                                       relevantWords.add(new 
WordPOS(sentence[i],Constants.getPOS(tags[i])));  
+                               }
+
+                       }
+               }
+               return relevantWords;
+       }
+
+
+       public static ArrayList<WordPOS> getAllRelevantWords(WordToDisambiguate 
word) {
+               return getAllRelevantWords(word.getSentence());
+       }
+
+
+       public static ArrayList<WordPOS> getRelevantWords(WordToDisambiguate 
word, int winBackward, int winForward) {
+
+               ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
+
+               String[] sentence = word.getSentence();
+               String[] tags = tag(sentence);
+
+               int index = word.getWordIndex();
+
+               for (int i = index - winBackward; i<=index + winForward; i++) {
+
+                       if (i >= 0 && i < sentence.length && i != index) {
+                               if 
(!Loader.getStopCache().containsKey(sentence[i])) {
+
+                                       if 
(Loader.getRelvCache().containsKey(tags[i])) {
+                                               relevantWords.add(new 
WordPOS(sentence[i],Constants.getPOS(tags[i])));  
+                                       }
+
+                               }
+                       }
+               }
+               return relevantWords;
+       }
+
+       
+       /**
+        * Stem a single word with WordNet dictionnary
+        * 
+        * @param wordToStem
+        *            word to be stemmed
+        * @return stemmed list of words
+        */
+       public static List StemWordWithWordNet(WordPOS wordToStem) {
+               if (!Loader.isInitialized() 
+                               || wordToStem == null)
+                       return null;
+               ArrayList<String> stems = new ArrayList();
+               try {
+                       for (Object pos : POS.getAllPOS()){
+                               
stems.addAll(Loader.getMorph().lookupAllBaseForms((POS)pos, 
wordToStem.getWord())) ;
+                       }
+                       
+                       if (stems.size()>0)
+                               return stems;
+                       else{
+                               return null;
+                       }
+                       
+               } catch (JWNLException e) {
+                       e.printStackTrace();
+               }
+               return null;
+       }
+
+       /**
+        * Stem a single word tries to look up the word in the stemCache 
HashMap If
+        * the word is not found it is stemmed with WordNet and put into 
stemCache
+        * 
+        * @param wordToStem
+        *            word to be stemmed
+        * @return stemmed word list, null means the word is incorrect 
+        */
+       public static List Stem(WordPOS wordToStem) {
+               
+               // check if we already cached the stem map
+               HashMap posMap          = (HashMap) 
Loader.getStemCache().get(wordToStem.getPOS().getKey());
+               
+               // don't check words with digits in them
+               if (containsNumbers(wordToStem.getWord())){
+                       return null;
+               }
+                                               
+               List stemList = (List) posMap.get(wordToStem.getWord());
+               if (stemList != null){ // return it if we already cached it
+                       return stemList;
+                       
+               } else { // unCached list try to stem it
+                       stemList = StemWordWithWordNet(wordToStem);
+                       if (stemList != null) {
+                               // word was recognized and stemmed with wordnet:
+                               // add it to cache and return the stemmed list
+                               posMap.put(wordToStem.getWord(),stemList);
+                               
Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
+                               return stemList;
+                       }else{ // could not be stemmed add it anyway (as 
incorrect with null list)
+                               posMap.put(wordToStem.getWord(), null);
+                               
Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
+                               return null;
+                       }
+               }
+       }
+       
+       public static boolean containsNumbers(String word) {
+               // checks if the word is or contains a number
+               return word.matches(".*[0-9].*");
+       }
+       
+       
+       
+}

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,15 @@
+package opennlp.tools.disambiguator;
+
+import opennlp.tools.util.Span;
+
+/**
+ * The interface for word sense disambiguators.
+ */
+public interface WSDisambiguator {
+       
+       public String[] disambiguate(String[] inputText,int inputWordIndex); 
+       
+       public String[] disambiguate(String[] inputText, Span[] inputWordSpans);
+       
+
+}
\ No newline at end of file

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,94 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.IndexWord;
+import net.sf.extjwnl.data.POS;
+import net.sf.extjwnl.data.Synset;
+import net.sf.extjwnl.dictionary.Dictionary;
+
+
+public class WordPOS {
+
+       private String word;
+       private List stems;
+       private POS pos;
+
+       // Constructor
+       public WordPOS(String word, POS pos) throws IllegalArgumentException{
+               if (word==null || pos ==null){
+                       throw new IllegalArgumentException("Args are null");
+               }
+               this.word = word;
+               this.pos = pos;
+       }
+
+       public String getWord() {
+               return word;
+       }
+
+       public POS getPOS() {
+               return pos;
+       }
+
+       public List getStems() {
+               if (stems==null){
+                       return PreProcessor.Stem(this);
+               }else{
+                       return stems;
+               }
+       }
+
+
+       // Return the synsets (thus the senses) of the current word
+       public ArrayList<Synset> getSynsets() {
+
+               IndexWord indexWord;
+               try {
+                       indexWord = Loader.getDictionary().lookupIndexWord(pos, 
word);
+                       List<Synset> synsets = indexWord.getSenses();
+                       return (new ArrayList<Synset>(synsets));
+               } catch (JWNLException e) {
+                       e.printStackTrace();
+               }
+               return null;
+       }
+
+       // uses Stemming to check if two words are equivalent
+       public boolean isStemEquivalent(WordPOS wordToCompare) {
+               // check if there is intersection in the stems;
+               List originalList  = this.getStems();
+               List listToCompare = wordToCompare.getStems();
+
+//             Constants.print("+++++++++++++++++++++  ::: "+ this.getWord());
+//             Constants.print("+++++++++++++++++++++  ::: "+ 
wordToCompare.getWord());
+//             Constants.print("the first list is \n"+originalList.toString());
+//             Constants.print("the second list is 
\n"+listToCompare.toString());
+
+               if(originalList==null || listToCompare==null){ // any of the 
two requested words do not exist
+                       return false;
+               }else{
+                       return !Collections.disjoint(originalList, 
listToCompare);
+               }
+
+       }
+
+
+       // uses Lemma to check if two words are equivalent
+       public boolean isLemmaEquivalent(WordPOS wordToCompare) {
+               // TODO use lemmatizer to compare with lemmas
+                               
+               ArrayList<String> lemmas_word = new ArrayList();
+               ArrayList<String> lemmas_wordToCompare = new ArrayList();
+               
+               for (String pos : Constants.allPOS){
+                       
Loader.getLemmatizer().lemmatize(wordToCompare.getWord(), pos);
+               }
+               return false;
+       }
+
+}

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,75 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+import java.util.concurrent.Semaphore;
+
+import opennlp.tools.disambiguator.lesk.WTDLesk;
+
+public class WordSense implements Comparable{ 
+       
+       protected WTDLesk WTDLesk;
+       protected Node node;
+       protected int id;
+       protected double score;
+       
+       
+       public WordSense(WTDLesk WTDLesk, Node node) {
+               super();
+               this.WTDLesk = WTDLesk;
+               this.node = node;
+       }
+
+       public WordSense() {
+               super();
+       }
+
+       
+       public WTDLesk getWTDLesk() {
+               return WTDLesk;
+       }
+
+       public void setWTDLesk(WTDLesk WTDLesk) {
+               this.WTDLesk = WTDLesk;
+       }
+
+       
+       public Node getNode() {
+               return node;
+       }
+
+       public void setNode(Node node) {
+               this.node = node;
+       }
+
+       
+       public double getScore() {
+               return score;
+       }
+
+       public void setScore(double score) {
+               this.score = score;
+       }
+
+       public int getId() {
+               return id;
+       }
+
+       public void setId(int id) {
+               this.id = id;
+       }
+
+
+       public int compareTo(Object o) {
+               return (this.score-((WordSense)o).score)<0?1:-1;
+       }
+       
+       
+       public String getSense() {
+               return node.getSense();
+       }
+       
+
+
+}
+
+

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,95 @@
+package opennlp.tools.disambiguator;
+
+
+
+public class WordToDisambiguate {
+       
+       protected String [] sentence;
+       protected int wordIndex;
+       protected String posTag;
+
+       protected int sense;
+       
+       
+       
+       /**
+        * Constructor
+        */
+       
+       
+       public WordToDisambiguate(String[] sentence, int wordIndex, int sense) 
throws IllegalArgumentException{
+               super();
+               
+               if (wordIndex>sentence.length){
+                       throw new IllegalArgumentException("The index is out of 
bounds !");
+               }
+               this.sentence = sentence;
+               this.wordIndex = wordIndex;
+               String[] posTags = PreProcessor.tag(sentence);
+               this.posTag = posTags[wordIndex];
+               this.sense = sense;
+       }
+       
+       public WordToDisambiguate(String[] sentence, int wordIndex) {
+               this(sentence,wordIndex,-1);
+       }
+       
+               
+       
+       /**
+        * Getters and Setters
+        */
+       
+       
+
+       // sentence
+       public String[] getSentence() {
+               return sentence;
+       }
+
+       public void setSentence(String[] sentence) {
+               this.sentence = sentence;
+       }
+
+       
+       // word
+       public int getWordIndex() {
+               return wordIndex;
+       }
+
+       public void setWordIndex(int wordIndex) {
+               this.wordIndex = wordIndex;
+       }
+       
+       public String getWord(){
+               return sentence[wordIndex];
+       }
+       
+       
+       // posTag
+       public String getPosTag() {
+               return posTag;
+       }
+
+       public void setPosTag(String posTag) {
+               this.posTag = posTag;
+       }
+       
+       
+       // sense
+       public int getSense() {
+               return sense;
+       }
+
+       public void setSense(int sense) {
+               this.sense = sense;
+       }
+
+
+
+       
+       
+
+       
+
+}

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,112 @@
+package opennlp.tools.disambiguator.ims;
+
+import java.util.ArrayList;
+
+import opennlp.tools.disambiguator.Constants;
+import opennlp.tools.disambiguator.Loader;
+
+
+public class FeaturesExtractor {
+
+       
+       
+       public FeaturesExtractor() {
+               super();
+       }
+
+
+       /**
+        * @Algorithm: IMS (It Makes Sense)
+        * 
+        * The following methods serve to extract the features for the 
algorithm IMS.
+        */
+       
+       public String[] extractPosOfSurroundingWords (String[] sentence, int 
wordIndex, int numberOfWords) {
+               
+               String[] taggedSentence = Loader.getTagger().tag(sentence);
+               
+               String[] tags = new String[2*numberOfWords+1];
+               
+               int j = 0;
+               
+               for (int i = wordIndex - numberOfWords; i < wordIndex + 
numberOfWords ; i++) {
+                       if (i < 0 || i >= sentence.length) {
+                               tags[j] = "null";
+                       } else {
+                               tags[j] = taggedSentence[i];
+                       }
+                       j++;
+               }
+               
+               return tags;
+       }
+       
+       
+       public String[] extractSurroundingWords(String[] sentence, int 
wordIndex) {
+
+               String[] posTags = Loader.getTagger().tag(sentence);
+               
+               Constants.print(posTags);
+               
+               ArrayList<String> contextWords = new ArrayList<String>();
+
+               for (int i = 0; i < sentence.length; i++) {
+
+                       if 
(!Constants.stopWords.contains(sentence[i].toLowerCase())
+                                       && (wordIndex != i)) {
+                               
+                               String word = 
sentence[i].toLowerCase().replaceAll("[^a-z]", "").trim();
+                               
+                               if (!word.equals("")) {
+                                       String lemma = 
Loader.getLemmatizer().lemmatize(sentence[i], posTags[i]);
+                                       contextWords.add(lemma);
+                               }
+                               
+                               
+
+
+                       }
+               }
+
+               return contextWords.toArray(new String[contextWords.size()]);
+       }
+       
+       
+       public ArrayList<String[]> extractLocalCollocations(String[] sentence, 
int wordIndex, int range) {
+               /**
+                * Here the author used only 11 features of this type. the 
range was set to 3 (bigrams extracted in a way that they are at max separated
+                * by 1 word).
+                */
+               
+               ArrayList<String[]> localCollocations = new 
ArrayList<String[]>();
+               
+               for (int i = wordIndex - range; i <= wordIndex + range ; i++) {
+                       
+                       if (!(i < 0 || i > sentence.length - 2)) {
+                                       if ((i != wordIndex) && (i+1 != 
wordIndex) && (i+1 < wordIndex + range)) {
+                                               String[] lc =  {sentence[i], 
sentence[i+1]};
+                                               localCollocations.add(lc);
+                                       }
+                                       if ((i != wordIndex) && (i+2 != 
wordIndex) && (i+2 < wordIndex + range)) {
+                                               String[] lc =  {sentence[i], 
sentence[i+2]};
+                                               localCollocations.add(lc);
+                                       }
+                       }
+                       
+               }
+               
+               return localCollocations;
+       }
+
+       
+       /**
+        * @Algorithm: SST
+        * 
+        * The following methods serve to extract the features for the 
algorithm SST.
+        */
+       
+       
+       
+       
+       
+}

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,99 @@
+package opennlp.tools.disambiguator.ims;
+
+import java.util.ArrayList;
+
+import opennlp.tools.disambiguator.WSDisambiguator;
+import opennlp.tools.util.Span;
+
+public class IMS implements WSDisambiguator{
+       
+       FeaturesExtractor fExtractor = new FeaturesExtractor();
+       
+       /**
+        * PARAMETERS
+        */
+       
+       int numberOfSurroundingWords;
+       int ngram;
+       
+       
+       
+       /**
+        * Constructors
+        */
+       
+       public IMS() {
+               super();
+               numberOfSurroundingWords = 3;
+               ngram = 2;
+       }
+       
+       public IMS(int numberOfSurroundingWords, int ngram) {
+               super();
+               this.numberOfSurroundingWords = numberOfSurroundingWords;
+               this.ngram = ngram;
+       }
+       
+       
+       
+       /**
+        * INTERNAL METHODS
+        */
+       
+       private void extractFeature(ArrayList<WTDIMS> words) {
+               
+               for (WTDIMS word : words) {
+                       
+                       
word.setPosOfSurroundingWords(fExtractor.extractPosOfSurroundingWords(word.getSentence(),
 word.getWordIndex(), numberOfSurroundingWords));
+                       
+                       
word.setSurroundingWords(fExtractor.extractSurroundingWords(word.getSentence(), 
word.getWordIndex()));
+                       
+                       
word.setLocalCollocations(fExtractor.extractLocalCollocations(word.getSentence(),
 word.getWordIndex(), ngram));
+                       
+               }
+
+       }
+       
+       private ArrayList<WTDIMS> extractTrainingData(String xmlFile) {
+               
+               ArrayList<WTDIMS> trainingData = new ArrayList<WTDIMS>();
+               
+               /**
+                * TODO Processing of the xml File here (To check the format of 
the data)
+                */
+               
+               return trainingData;
+       }
+       
+       
+       public void train(String trainingSetFile) { // TODO To revise after 
finihsing the implementation of the collector
+               
+               ArrayList<WTDIMS> instances = 
extractTrainingData(trainingSetFile);
+               
+               extractFeature(instances);
+               
+               
+               
+       }
+       
+       
+       public void load (String binFile) {
+               // TODO After finishing training the training data
+                               
+       }
+       
+
+       @Override
+       public String[] disambiguate(String[] inputText, int inputWordIndex) {
+               // TODO Auto-generated method stub
+               return null;
+       }
+
+       @Override
+       public String[] disambiguate(String[] inputText, Span[] inputWordSpans) 
{
+               // TODO Auto-generated method stub
+               return null;
+       }
+       
+
+}

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,56 @@
+package opennlp.tools.disambiguator.ims;
+import java.util.ArrayList;
+
+import opennlp.tools.disambiguator.WordToDisambiguate;
+
+
+public class WTDIMS extends WordToDisambiguate {
+               
+       protected String[] posOfSurroundingWords;
+       protected String[] surroundingWords;
+       protected ArrayList<String[]> localCollocations;
+       
+       
+       
+       /**
+        * Constructor
+        */
+       public WTDIMS(String[] sentence, int word, int sense) {
+               super(sentence, word, sense);
+       }
+
+       
+       
+       /**
+        * Getters and Setters
+        */
+       
+       
+       public String[] getPosOfSurroundingWords() {
+               return posOfSurroundingWords;
+       }
+
+       public void setPosOfSurroundingWords(String[] posOfSurroundingWords) {
+               this.posOfSurroundingWords = posOfSurroundingWords;
+       }
+       
+
+       public String[] getSurroundingWords() {
+               return surroundingWords;
+       }
+
+       public void setSurroundingWords(String[] surroundingWords) {
+               this.surroundingWords = surroundingWords;
+       }
+
+       
+       public ArrayList<String[]> getLocalCollocations() {
+               return localCollocations;
+       }
+
+       public void setLocalCollocations(ArrayList<String[]> localCollocations) 
{
+               this.localCollocations = localCollocations;
+       }
+       
+       
+}

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,685 @@
+package opennlp.tools.disambiguator.lesk;
+
+import java.security.InvalidParameterException;
+import java.util.ArrayList;
+
+
+
+import java.util.Collections;
+import java.util.Map;
+
+import opennlp.tools.disambiguator.Constants;
+import opennlp.tools.disambiguator.Loader;
+import opennlp.tools.disambiguator.Node;
+import opennlp.tools.disambiguator.PreProcessor;
+import opennlp.tools.disambiguator.WSDisambiguator;
+import opennlp.tools.disambiguator.WordPOS;
+import opennlp.tools.disambiguator.WordSense;
+import opennlp.tools.util.Span;
+import net.sf.extjwnl.data.Synset;
+
+
+/**
+ * Class for the Lesk algorithm and variants.
+ */
+
+public class Lesk implements WSDisambiguator{
+
+       protected LeskParameters params;
+
+       public Loader loader;
+
+       public Lesk(){
+               this(null);
+       }
+
+       public Lesk(LeskParameters params) throws InvalidParameterException{
+               loader = new Loader();
+               this.setParams(params);
+       }
+
+       public void setParams(LeskParameters params) throws 
InvalidParameterException{
+               if(params==null){
+                       this.params = new LeskParameters();
+               } 
+               else{
+                       if (params.isValid()){
+                               this.params = params;
+                       }else{
+                               throw new InvalidParameterException("wrong 
params");
+                       }
+               }
+       }
+
+       public ArrayList<WordSense> basic(WTDLesk wtd) {
+
+               ArrayList<WordPOS> relvWords = 
PreProcessor.getAllRelevantWords(wtd);
+               WordPOS word = new WordPOS(wtd.getWord(), 
Constants.getPOS(wtd.getPosTag()));
+
+               ArrayList<Synset> synsets = word.getSynsets();
+               ArrayList<Node> nodes = new ArrayList<Node>();
+
+               for (Synset synset : synsets) {
+                       Node node = new Node(synset, relvWords);
+                       nodes.add(node);
+               }
+
+               ArrayList<WordSense> scoredSenses = updateSenses(nodes);
+
+               for (WordSense wordSense : scoredSenses) {
+                       wordSense.setWTDLesk(wtd);
+                       int count = 0;
+                       for (WordPOS senseWordPOS : 
wordSense.getNode().getSenseRelevantWords()) {
+                               ArrayList stems =  
(ArrayList)PreProcessor.Stem(senseWordPOS);
+                               for (WordPOS sentenceWordPOS : relvWords) {
+                                       // TODO change to lemma check
+                                       if 
(sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
+                                               count = count + 1;
+                                       }
+                               }
+                       }
+                       wordSense.setScore(count);
+               }
+
+               return scoredSenses;
+       }
+
+       public ArrayList<WordSense> basicContextual(WTDLesk wtd) {
+               return this.basicContextual(wtd,LeskParameters.DFLT_WIN_SIZE);
+       }
+
+       public ArrayList<WordSense> basicContextual(WTDLesk wtd, int 
windowSize) {
+               return this.basicContextual(wtd, windowSize,windowSize);
+       }
+
+       public ArrayList<WordSense> basicContextual(WTDLesk wtd, int 
windowBackward, int windowForward) {
+
+               ArrayList<WordPOS> relvWords = 
PreProcessor.getRelevantWords(wtd, windowBackward, windowForward);
+               WordPOS word = new WordPOS(wtd.getWord(), 
Constants.getPOS(wtd.getPosTag()));
+
+               ArrayList<Synset> synsets = word.getSynsets();
+               ArrayList<Node> nodes = new ArrayList<Node>();
+
+
+               for (Synset synset : synsets) {
+                       Node node = new Node(synset, relvWords);
+                       nodes.add(node);
+               }
+
+               ArrayList<WordSense> scoredSenses = updateSenses(nodes);
+
+
+               for (WordSense wordSense : scoredSenses) {
+                       wordSense.setWTDLesk(wtd);
+
+                       int count = 0;
+                       for (WordPOS senseWordPOS : 
wordSense.getNode().getSenseRelevantWords()) {
+
+                               for (WordPOS sentenceWordPOS : relvWords) {
+                                       // TODO change to lemma check
+                                       if 
(sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
+                                               count = count + 1;
+                                       }
+                               }
+
+                       }
+                       wordSense.setScore(count);
+
+               }
+
+               Collections.sort(scoredSenses);
+
+               return scoredSenses;
+       }
+
+       public ArrayList<WordSense> extended(WTDLesk wtd,
+                       int depth, double depthScoreWeight, boolean 
includeSynonyms,
+                       boolean includeHypernyms, boolean includeHyponyms,
+                       boolean includeMeronyms, boolean includeHolonyms) {
+
+               return extendedContextual(wtd, 0, depth,
+                               depthScoreWeight, includeSynonyms, 
includeHypernyms,
+                               includeHyponyms, includeMeronyms, 
includeHolonyms);
+
+       }
+
+       public ArrayList<WordSense> extendedContextual(WTDLesk wtd, 
+                       int depth, double depthScoreWeight,
+                       boolean includeSynonyms, boolean includeHypernyms,
+                       boolean includeHyponyms, boolean includeMeronyms,
+                       boolean includeHolonyms){
+
+               return extendedContextual(wtd, LeskParameters.DFLT_WIN_SIZE,
+                               depth, depthScoreWeight, includeSynonyms, 
includeHypernyms,
+                               includeHyponyms, includeMeronyms, 
includeHolonyms);
+
+       }
+
+       public ArrayList<WordSense> extendedContextual(WTDLesk wtd, 
+                       int windowSize, int depth, double depthScoreWeight,
+                       boolean includeSynonyms, boolean includeHypernyms,
+                       boolean includeHyponyms, boolean includeMeronyms,
+                       boolean includeHolonyms) {
+
+               return extendedContextual(wtd, windowSize, windowSize,
+                               depth, depthScoreWeight, includeSynonyms, 
includeHypernyms,
+                               includeHyponyms, includeMeronyms, 
includeHolonyms);
+       }
+
+       public ArrayList<WordSense> extendedContextual(WTDLesk wtd,
+                       int windowBackward, int windowForward, int depth,
+                       double depthScoreWeight, boolean includeSynonyms,
+                       boolean includeHypernyms, boolean includeHyponyms,
+                       boolean includeMeronyms, boolean includeHolonyms) {
+
+               ArrayList<WordPOS> relvWords = 
PreProcessor.getRelevantWords(wtd,windowBackward,windowForward);
+               WordPOS word = new WordPOS(wtd.getWord(), 
Constants.getPOS(wtd.getPosTag()));
+
+               ArrayList<Synset> synsets = word.getSynsets();
+               ArrayList<Node> nodes = new ArrayList<Node>();
+
+               for (Synset synset : synsets) {
+                       Node node = new Node(synset, relvWords);
+                       nodes.add(node);
+               }
+
+               ArrayList<WordSense> scoredSenses = 
basicContextual(wtd,windowBackward, windowForward);
+
+               for (WordSense wordSense : scoredSenses) {
+
+                       if (includeSynonyms) {
+                               wordSense.setScore(wordSense.getScore()
+                                               + depthScoreWeight
+                                               * 
assessSynonyms(wordSense.getNode().getSynonyms(),relvWords));
+                       }
+
+                       if (includeHypernyms) {
+                               fathomHypernyms(wordSense, 
wordSense.getNode().synset,
+                                               relvWords, depth, depth, 
depthScoreWeight);
+                       }
+
+                       if (includeHyponyms) {
+
+                               fathomHyponyms(wordSense, 
wordSense.getNode().synset,
+                                               relvWords, depth, depth, 
depthScoreWeight);
+                       }
+
+                       if (includeMeronyms) {
+
+                               fathomMeronyms(wordSense, 
wordSense.getNode().synset,
+                                               relvWords, depth, depth, 
depthScoreWeight);
+
+                       }
+
+                       if (includeHolonyms) {
+
+                               fathomHolonyms(wordSense, 
wordSense.getNode().synset,
+                                               relvWords, depth, depth, 
depthScoreWeight);
+
+                       }
+
+               }
+
+               return scoredSenses;
+
+       }
+
+       public ArrayList<WordSense> extendedExponential(WTDLesk wtd, 
+                       int depth,
+                       double intersectionExponent,double depthExponent, 
boolean includeSynonyms,
+                       boolean includeHypernyms, boolean includeHyponyms,
+                       boolean includeMeronyms, boolean includeHolonyms) {
+
+               return extendedExponentialContextual(wtd, 0,  depth,
+                               intersectionExponent, depthExponent,  
includeSynonyms,
+                               includeHypernyms,  includeHyponyms,
+                               includeMeronyms,  includeHolonyms);
+
+       }
+
+       public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd, 
+                       int depth,
+                       double intersectionExponent,double depthExponent, 
boolean includeSynonyms,
+                       boolean includeHypernyms, boolean includeHyponyms,
+                       boolean includeMeronyms, boolean includeHolonyms) {
+
+               return extendedExponentialContextual(wtd, 
LeskParameters.DFLT_WIN_SIZE,
+                               depth, intersectionExponent,depthExponent, 
includeSynonyms, includeHypernyms,
+                               includeHyponyms, includeMeronyms, 
includeHolonyms);
+       }
+
+       public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd, 
+                       int windowSize, int depth,
+                       double intersectionExponent,double depthExponent, 
boolean includeSynonyms,
+                       boolean includeHypernyms, boolean includeHyponyms,
+                       boolean includeMeronyms, boolean includeHolonyms) {
+
+               return extendedExponentialContextual(wtd, windowSize, 
windowSize,
+                               depth, intersectionExponent,depthExponent, 
includeSynonyms, includeHypernyms,
+                               includeHyponyms, includeMeronyms, 
includeHolonyms);
+       }
+
+       public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd, 
+                       int windowBackward, int windowForward, int depth,
+                       double intersectionExponent,double depthExponent, 
boolean includeSynonyms,
+                       boolean includeHypernyms, boolean includeHyponyms,
+                       boolean includeMeronyms, boolean includeHolonyms) {
+               ArrayList<WordPOS> relvWords = 
PreProcessor.getRelevantWords(wtd,windowBackward,windowForward);
+               WordPOS word = new WordPOS(wtd.getWord(), 
Constants.getPOS(wtd.getPosTag()));
+
+               ArrayList<Synset> synsets = word.getSynsets();
+               ArrayList<Node> nodes = new ArrayList<Node>();
+
+               for (Synset synset : synsets) {
+                       Node node = new Node(synset, relvWords);
+                       nodes.add(node);
+               }
+
+               ArrayList<WordSense> scoredSenses = basicContextual(wtd, 
windowForward, windowBackward);
+
+               for (WordSense wordSense : scoredSenses) {
+
+
+                       if (includeSynonyms) {
+                               wordSense.setScore(wordSense.getScore() + 
Math.pow(assessSynonyms(wordSense.getNode().getSynonyms(),
+                                               
relvWords),intersectionExponent));
+                       }
+
+                       if (includeHypernyms) {
+                               fathomHypernymsExponential(wordSense, 
wordSense.getNode().synset,
+                                               relvWords, depth, 
depth,intersectionExponent, depthExponent);
+                       }
+
+                       if (includeHyponyms) {
+
+                               fathomHyponymsExponential(wordSense, 
wordSense.getNode().synset,
+                                               relvWords, depth, depth, 
intersectionExponent,depthExponent);
+                       }
+
+                       if (includeMeronyms) {
+
+                               fathomMeronymsExponential(wordSense, 
wordSense.getNode().synset,
+                                               relvWords, depth, depth, 
intersectionExponent,depthExponent);
+
+                       }
+
+                       if (includeHolonyms) {
+
+                               fathomHolonymsExponential(wordSense, 
wordSense.getNode().synset,
+                                               relvWords, depth, depth, 
intersectionExponent,depthExponent);
+
+                       }
+
+               }
+
+               return scoredSenses;
+
+       }
+
+       private void fathomHypernyms(WordSense wordSense, Synset child,
+                       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+                       double depthScoreWeight) {
+               if (depth == 0)
+                       return;
+
+               String[] tokenizedGloss = 
Loader.getTokenizer().tokenize(child.getGloss().toString());
+               ArrayList<WordPOS> relvGlossWords = 
PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+
+               Node childNode = new Node(child, relvGlossWords);
+
+               childNode.setHypernyms();
+               wordSense.setScore(wordSense.getScore()
+                               + Math.pow(depthScoreWeight, maxDepth - depth + 
1)
+                               * assessFeature(childNode.getHypernyms(), 
relvWords));
+               for (Synset hypernym : childNode.getHypernyms()) {
+                       fathomHypernyms(wordSense, hypernym, relvGlossWords, 
depth - 1, maxDepth,
+                                       depthScoreWeight);
+               }
+       }
+
+       private void fathomHypernymsExponential(WordSense wordSense, Synset 
child,
+                       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+                       double intersectionExponent, double depthScoreExponent) 
{
+               if (depth == 0)
+                       return;
+
+               String[] tokenizedGloss = 
Loader.getTokenizer().tokenize(child.getGloss().toString());
+               ArrayList<WordPOS> relvGlossWords = 
PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+               Node childNode = new Node(child, relvGlossWords);
+
+               childNode.setHypernyms();
+               wordSense
+               .setScore(wordSense.getScore()
+                               + Math.pow(
+                                               
assessFeature(childNode.getHypernyms(),
+                                                               relvWords), 
intersectionExponent)
+                                                               / 
Math.pow(depth, depthScoreExponent));
+               for (Synset hypernym : childNode.getHypernyms()) {
+
+                       fathomHypernymsExponential(wordSense, hypernym, 
relvGlossWords, depth - 1, maxDepth,
+                                       intersectionExponent, 
depthScoreExponent);
+               }
+       }
+
+       private void fathomHyponyms(WordSense wordSense, Synset child,
+                       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+                       double depthScoreWeight) {
+               if (depth == 0)
+                       return;
+
+               String[] tokenizedGloss = 
Loader.getTokenizer().tokenize(child.getGloss().toString());
+               ArrayList<WordPOS> relvGlossWords = 
PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+               Node childNode = new Node(child, relvGlossWords);
+
+               childNode.setHyponyms();
+               wordSense.setScore(wordSense.getScore()
+                               + Math.pow(depthScoreWeight, maxDepth - depth + 
1)
+                               * assessFeature(childNode.getHyponyms(), 
relvWords));
+               for (Synset hyponym : childNode.getHyponyms()) {
+
+                       fathomHyponyms(wordSense, hyponym, relvGlossWords, 
depth - 1, maxDepth,
+                                       depthScoreWeight);
+               }
+       }
+
+       private void fathomHyponymsExponential(WordSense wordSense, Synset 
child,
+                       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+                       double intersectionExponent, double depthScoreExponent) 
{
+               if (depth == 0)
+                       return;
+
+               String[] tokenizedGloss = 
Loader.getTokenizer().tokenize(child.getGloss().toString());
+               ArrayList<WordPOS> relvGlossWords = 
PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+               Node childNode = new Node(child, relvGlossWords);
+
+               childNode.setHyponyms();
+               wordSense.setScore(wordSense.getScore()
+                               + Math.pow(
+                                               
assessFeature(childNode.getHyponyms(), relvWords),
+                                               intersectionExponent)
+                                               / Math.pow(depth, 
depthScoreExponent));
+               for (Synset hyponym : childNode.getHyponyms()) {
+
+                       fathomHyponymsExponential(wordSense, hyponym, 
relvGlossWords, depth - 1, maxDepth,
+                                       intersectionExponent, 
depthScoreExponent);
+               }
+       }
+
+       private void fathomMeronyms(WordSense wordSense, Synset child,
+                       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+                       double depthScoreWeight) {
+               if (depth == 0)
+                       return;
+
+               String[] tokenizedGloss = 
Loader.getTokenizer().tokenize(child.getGloss().toString());
+               ArrayList<WordPOS> relvGlossWords = 
PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+               Node childNode = new Node(child, relvGlossWords);
+
+               childNode.setMeronyms();
+               wordSense.setScore(wordSense.getScore()
+                               + Math.pow(depthScoreWeight, maxDepth - depth + 
1)
+                               * assessFeature(childNode.getMeronyms(), 
relvWords));
+               for (Synset meronym : childNode.getMeronyms()) {
+
+                       fathomMeronyms(wordSense, meronym, relvGlossWords, 
depth - 1, maxDepth,
+                                       depthScoreWeight);
+               }
+       }
+
+       private void fathomMeronymsExponential(WordSense wordSense, Synset 
child,
+                       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+                       double intersectionExponent, double depthScoreExponent) 
{
+               if (depth == 0)
+                       return;
+
+               String[] tokenizedGloss = 
Loader.getTokenizer().tokenize(child.getGloss().toString());
+               ArrayList<WordPOS> relvGlossWords = 
PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+               Node childNode = new Node(child, relvGlossWords);
+
+               childNode.setMeronyms();
+               wordSense.setScore(wordSense.getScore()
+                               + Math.pow(
+                                               
assessFeature(childNode.getMeronyms(), relvWords),
+                                               intersectionExponent)
+                                               / Math.pow(depth, 
depthScoreExponent));
+               for (Synset meronym : childNode.getMeronyms()) {
+
+                       fathomMeronymsExponential(wordSense, meronym, 
relvGlossWords, depth - 1, maxDepth,
+                                       intersectionExponent, 
depthScoreExponent);
+               }
+       }
+
+       private void fathomHolonyms(WordSense wordSense, Synset child,
+                       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+                       double depthScoreWeight) {
+               if (depth == 0)
+                       return;
+
+               String[] tokenizedGloss = 
Loader.getTokenizer().tokenize(child.getGloss().toString());
+               ArrayList<WordPOS> relvGlossWords = 
PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+               Node childNode = new Node(child, relvGlossWords);
+
+
+               childNode.setHolonyms();
+               wordSense.setScore(wordSense.getScore()
+                               + Math.pow(depthScoreWeight, maxDepth - depth + 
1)
+                               * assessFeature(childNode.getHolonyms(), 
relvWords));
+               for (Synset holonym : childNode.getHolonyms()) {
+
+                       fathomHolonyms(wordSense, holonym, relvGlossWords, 
depth - 1, maxDepth,
+                                       depthScoreWeight);
+               }
+       }
+
+       private void fathomHolonymsExponential(WordSense wordSense, Synset 
child,
+                       ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+                       double intersectionExponent, double depthScoreExponent) 
{
+               if (depth == 0)
+                       return;
+
+               String[] tokenizedGloss = 
Loader.getTokenizer().tokenize(child.getGloss().toString());
+               ArrayList<WordPOS> relvGlossWords = 
PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+               Node childNode = new Node(child, relvGlossWords);
+
+               childNode.setHolonyms();
+               wordSense.setScore(wordSense.getScore()
+                               + Math.pow(
+                                               
assessFeature(childNode.getHolonyms(), relvWords),
+                                               intersectionExponent)
+                                               / Math.pow(depth, 
depthScoreExponent));
+               for (Synset holonym : childNode.getHolonyms()) {
+
+                       fathomHolonymsExponential(wordSense, holonym, 
relvGlossWords, depth - 1, maxDepth,
+                                       intersectionExponent, 
depthScoreExponent);
+               }
+       }
+
+       private int assessFeature(ArrayList<Synset> featureSynsets,
+                       ArrayList<WordPOS> relevantWords) {
+               int count = 0;
+               for (Synset synset : featureSynsets) {
+                       Node subNode = new Node(synset, relevantWords);
+
+                       String[] tokenizedSense = 
Loader.getTokenizer().tokenize(subNode.getSense());
+                       ArrayList<WordPOS> relvSenseWords = 
PreProcessor.getAllRelevantWords(tokenizedSense);
+
+                       for (WordPOS senseWord : relvSenseWords) {
+                               for (WordPOS sentenceWord : relevantWords) {
+                                       if 
(sentenceWord.isStemEquivalent(senseWord)) {
+                                               count = count + 1;
+                                       }
+                               }
+                       }
+               }
+               return count;
+       }
+
+       private int assessSynonyms(ArrayList<WordPOS> synonyms,
+                       ArrayList<WordPOS> relevantWords) {
+               int count = 0;
+
+               for (WordPOS synonym : synonyms) {
+                       for (WordPOS sentenceWord : relevantWords) {
+                               // TODO try to switch to lemmatizer
+                               if (sentenceWord.isStemEquivalent(synonym)) {
+                                       count = count + 1;
+                               }
+                       }
+
+               }
+
+               return count;
+       }
+
+       public ArrayList<WordSense> updateSenses(ArrayList<Node> nodes) {
+
+               ArrayList<WordSense> scoredSenses = new ArrayList<WordSense>();
+
+               for (int i=0; i< nodes.size(); i++ ) {
+                       ArrayList<WordPOS> sensesComponents = 
PreProcessor.getAllRelevantWords(PreProcessor.tokenize(nodes.get(i).getSense()));
+                       WordSense wordSense = new WordSense();
+                       nodes.get(i).setSenseRelevantWords(sensesComponents);
+                       wordSense.setNode(nodes.get(i));
+                       wordSense.setId(i);
+                       scoredSenses.add(wordSense);
+               }
+               return scoredSenses;
+
+       }
+
+       // disambiguates a WTDLesk and returns an array of sense indexes from 
WordNet ordered by their score
+       @Override
+       public String[] disambiguate(String[] inputText, int inputWordIndex) {
+               WTDLesk wtd = new WTDLesk(inputText,inputWordIndex);    
+               ArrayList<WordSense> wsenses = null;
+
+               switch(this.params.leskType){
+               case LESK_BASIC: 
+                       wsenses = basic(wtd);
+                       break;
+               case LESK_BASIC_CTXT : 
+                       wsenses = basicContextual(wtd);
+                       break;
+               case LESK_BASIC_CTXT_WIN : 
+                       wsenses = basicContextual(wtd, this.params.win_b_size); 
+                       break;
+               case LESK_BASIC_CTXT_WIN_BF : 
+                       wsenses = basicContextual(wtd, this.params.win_b_size, 
this.params.win_f_size);
+                       break;
+               case LESK_EXT : 
+                       wsenses = extended(wtd, 
+                                       this.params.depth,
+                                       this.params.depth_weight, 
+                                       this.params.fathom_synonyms, 
+                                       this.params.fathom_hypernyms, 
+                                       this.params.fathom_hyponyms, 
+                                       this.params.fathom_meronyms, 
+                                       this.params.fathom_holonyms);
+                       break;
+               case LESK_EXT_CTXT : 
+                       wsenses = extendedContextual(wtd,
+                                       this.params.depth,
+                                       this.params.depth_weight, 
+                                       this.params.fathom_synonyms, 
+                                       this.params.fathom_hypernyms, 
+                                       this.params.fathom_hyponyms, 
+                                       this.params.fathom_meronyms, 
+                                       this.params.fathom_holonyms);
+                       break;
+               case LESK_EXT_CTXT_WIN : 
+                       wsenses = extendedContextual(wtd, 
+                                       this.params.win_b_size,
+                                       this.params.depth,
+                                       this.params.depth_weight, 
+                                       this.params.fathom_synonyms, 
+                                       this.params.fathom_hypernyms, 
+                                       this.params.fathom_hyponyms, 
+                                       this.params.fathom_meronyms, 
+                                       this.params.fathom_holonyms);
+                       break;
+               case LESK_EXT_CTXT_WIN_BF :
+                       wsenses = extendedContextual(wtd, 
+                                       this.params.win_b_size, 
+                                       this.params.win_f_size,
+                                       this.params.depth,
+                                       this.params.depth_weight, 
+                                       this.params.fathom_synonyms, 
+                                       this.params.fathom_hypernyms, 
+                                       this.params.fathom_hyponyms, 
+                                       this.params.fathom_meronyms, 
+                                       this.params.fathom_holonyms);
+                       break;
+               case LESK_EXT_EXP : 
+                       wsenses = extendedExponential(wtd, 
+                                       this.params.depth,
+                                       this.params.iexp,
+                                       this.params.dexp, 
+                                       this.params.fathom_synonyms, 
+                                       this.params.fathom_hypernyms, 
+                                       this.params.fathom_hyponyms, 
+                                       this.params.fathom_meronyms, 
+                                       this.params.fathom_holonyms);
+                       break;
+               case LESK_EXT_EXP_CTXT : 
+                       wsenses = extendedExponentialContextual(wtd,
+                                       this.params.depth,
+                                       this.params.iexp,
+                                       this.params.dexp, 
+                                       this.params.fathom_synonyms, 
+                                       this.params.fathom_hypernyms, 
+                                       this.params.fathom_hyponyms, 
+                                       this.params.fathom_meronyms, 
+                                       this.params.fathom_holonyms);
+                       break;
+               case LESK_EXT_EXP_CTXT_WIN : 
+                       wsenses = extendedExponentialContextual(wtd,
+                                       this.params.win_b_size,
+                                       this.params.depth,
+                                       this.params.iexp,
+                                       this.params.dexp, 
+                                       this.params.fathom_synonyms, 
+                                       this.params.fathom_hypernyms, 
+                                       this.params.fathom_hyponyms, 
+                                       this.params.fathom_meronyms, 
+                                       this.params.fathom_holonyms);
+                       break;
+               case LESK_EXT_EXP_CTXT_WIN_BF : 
+                       wsenses = extendedExponentialContextual(wtd,
+                                       this.params.win_b_size,
+                                       this.params.win_f_size,
+                                       this.params.depth,
+                                       this.params.iexp,
+                                       this.params.dexp, 
+                                       this.params.fathom_synonyms, 
+                                       this.params.fathom_hypernyms, 
+                                       this.params.fathom_hyponyms, 
+                                       this.params.fathom_meronyms, 
+                                       this.params.fathom_holonyms);
+                       break;
+               }
+
+               wsenses = extendedExponentialContextual(wtd, 
LeskParameters.DFLT_WIN_SIZE,LeskParameters.DFLT_DEPTH,LeskParameters.DFLT_IEXP,LeskParameters.DFLT_DEXP,
 true,true,true,true,true );
+               Collections.sort(wsenses);
+
+               String[] senses = new String[wsenses.size()];
+               for (int i = 0; i < wsenses.size() ; i++) {
+                       senses[i] = wsenses.get(i).getSense();
+               }
+               return senses;
+       }
+
+       @Override
+       public String[] disambiguate(String[] inputText, Span[] inputWordSpans) 
{
+               // TODO need to work on spans
+               return null;
+       }
+
+}
\ No newline at end of file

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,107 @@
+package opennlp.tools.disambiguator.lesk;
+
+public class LeskParameters {
+
+       // VARIATIONS
+       public static enum LESK_TYPE {
+               LESK_BASIC,
+               LESK_BASIC_CTXT,
+               LESK_BASIC_CTXT_WIN,
+               LESK_BASIC_CTXT_WIN_BF,
+               LESK_EXT,
+               LESK_EXT_CTXT,
+               LESK_EXT_CTXT_WIN,
+               LESK_EXT_CTXT_WIN_BF,
+               LESK_EXT_EXP,
+               LESK_EXT_EXP_CTXT,
+               LESK_EXT_EXP_CTXT_WIN,
+               LESK_EXT_EXP_CTXT_WIN_BF,
+       }
+
+       // DEFAULTS
+       protected static final LESK_TYPE DFLT_LESK_TYPE = 
LESK_TYPE.LESK_EXT_EXP_CTXT_WIN;
+       protected static final int DFLT_WIN_SIZE = 4;
+       protected static final int DFLT_DEPTH = 3;
+       protected static final double DFLT_IEXP = 0.3;
+       protected static final double DFLT_DEXP = 0.3;
+       
+               
+       public LESK_TYPE leskType;
+       public int win_f_size;
+       public int win_b_size;
+       public int depth;
+
+       public boolean fathom_synonyms;
+       public boolean fathom_hypernyms;
+       public boolean fathom_hyponyms;
+       public boolean fathom_meronyms;
+       public boolean fathom_holonyms;
+
+       public double depth_weight;
+       public double iexp;
+       public double dexp;
+       
+       
+       public LeskParameters(){
+               this.setDefaults();
+       }
+
+       public void setDefaults(){
+               this.leskType = LeskParameters.DFLT_LESK_TYPE;
+               this.win_f_size = LeskParameters.DFLT_WIN_SIZE;
+               this.win_b_size = LeskParameters.DFLT_WIN_SIZE;
+               this.depth = LeskParameters.DFLT_DEPTH;
+               this.iexp = LeskParameters.DFLT_IEXP;
+               this.dexp = LeskParameters.DFLT_DEXP;
+               this.fathom_holonyms = true;
+               this.fathom_hypernyms = true;
+               this.fathom_hyponyms = true;
+               this.fathom_meronyms = true;
+               this.fathom_synonyms = true;
+       }
+
+       // Parameter Validation
+       // TODO make isSet for semantic feature booleans
+       public boolean isValid(){
+
+               switch(this.leskType){
+               case LESK_BASIC: 
+               case LESK_BASIC_CTXT : 
+                       return true;
+               case LESK_BASIC_CTXT_WIN : 
+                       return (this.win_b_size==this.win_f_size) 
+                                       && this.win_b_size>=0 ;
+               case LESK_BASIC_CTXT_WIN_BF : 
+                       return (this.win_b_size>=0) 
+                                       && (this.win_f_size>=0) ;
+               case LESK_EXT : 
+               case LESK_EXT_CTXT : 
+                       return (this.depth>=0) 
+                                       && (this.depth_weight >= 0);
+
+               case LESK_EXT_CTXT_WIN : 
+               case LESK_EXT_CTXT_WIN_BF :
+                       return (this.depth>=0) 
+                                       && (this.depth_weight >= 0)
+                                       && (this.win_b_size>=0) 
+                                       && (this.win_f_size>=0);
+
+               case LESK_EXT_EXP : 
+               case LESK_EXT_EXP_CTXT : 
+                       return (this.depth>=0) 
+                                       && (this.dexp >= 0)
+                                       && (this.iexp>=0) ;
+
+               case LESK_EXT_EXP_CTXT_WIN : 
+               case LESK_EXT_EXP_CTXT_WIN_BF : 
+                       return (this.depth>=0) 
+                                       && (this.dexp >= 0)
+                                       && (this.iexp>=0) 
+                                       && (this.win_b_size>=0) 
+                                       && (this.win_f_size>=0);
+               default :
+                       return false;
+               }
+       }
+
+}

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,15 @@
+package opennlp.tools.disambiguator.lesk;
+
+import opennlp.tools.disambiguator.WordToDisambiguate;
+
+
+public class WTDLesk extends WordToDisambiguate{
+
+       public WTDLesk(String[] sentence, int wordIndex) {
+               super(sentence,wordIndex,-1);
+       }
+       
+
+       
+       
+}
\ No newline at end of file

Added: 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java?rev=1687358&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
 Wed Jun 24 20:19:05 2015
@@ -0,0 +1,83 @@
+package opennlp.tools.disambiguator;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.junit.Test;
+
+import opennlp.tools.cmdline.postag.POSModelLoader;
+import opennlp.tools.disambiguator.Constants;
+import opennlp.tools.disambiguator.Loader;
+import opennlp.tools.disambiguator.WordSense;
+import opennlp.tools.disambiguator.ims.FeaturesExtractor;
+import opennlp.tools.disambiguator.lesk.Lesk;
+import opennlp.tools.disambiguator.lesk.LeskParameters;
+import opennlp.tools.disambiguator.lesk.WTDLesk;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSTagger;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+
+
+public class Tester {
+
+       @Test
+       public static void main(String[] args) {
+
+
+               String sentence = "I went fishing for some sea bass.";
+               TokenizerModel TokenizerModel;
+               
+               try {
+                       TokenizerModel = new TokenizerModel(new 
FileInputStream("src\\test\\resources\\opennlp\\tools\\disambiguator\\en-token.bin"));
+                       Tokenizer tokenizer = new TokenizerME(TokenizerModel);
+
+                       String[] words = tokenizer.tokenize(sentence);
+                       
+                       POSModel posTaggerModel = new POSModelLoader().load(new 
File("src\\test\\resources\\opennlp\\tools\\disambiguator\\en-pos-maxent.bin"));
+                       POSTagger tagger = new POSTaggerME(posTaggerModel);
+                               
+
+                       Constants.print("\ntokens :");
+                       Constants.print(words);
+                       Constants.print(tagger.tag(words));
+                       
+                       Constants.print("\ntesting default lesk :");            
        
+                       Lesk lesk = new Lesk();         
+                       Constants.print(lesk.disambiguate(words, 6));
+                       
+                       Constants.print("\ntesting with null params :");
+                       lesk.setParams(null);
+                       Constants.print(lesk.disambiguate(words, 6));
+                       
+                       Constants.print("\ntesting with default params");
+                       lesk.setParams(new LeskParameters());
+                       Constants.print(lesk.disambiguate(words, 6));
+                       
+                       Constants.print("\ntesting with custom params :");
+                       LeskParameters leskParams = new LeskParameters();
+                       leskParams.leskType = 
LeskParameters.LESK_TYPE.LESK_BASIC_CTXT_WIN_BF;
+                       leskParams.win_b_size = 4;
+                       leskParams.depth = 3;
+                       lesk.setParams(leskParams);
+                       Constants.print(lesk.disambiguate(words, 6));
+                               
+                       /*
+                       Constants.print("\ntesting with wrong params should 
throw exception :");
+                       LeskParameters leskWrongParams = new LeskParameters();
+                       leskWrongParams.depth = -1;
+                       lesk.setParams(leskWrongParams);
+                       Constants.print(lesk.disambiguate(words, 6));
+                       */
+
+               } catch (IOException e) {
+                       e.printStackTrace();
+               }
+
+
+       }
+
+}


Reply via email to