Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java?rev=1696509&r1=1696508&r2=1696509&view=diff ============================================================================== --- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java (original) +++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java Tue Aug 18 22:44:32 2015 @@ -32,16 +32,17 @@ public class LeskParameters extends WSDP * */ public static enum LESK_TYPE { - LESK_BASIC, LESK_BASIC_CTXT, LESK_BASIC_CTXT_WIN, LESK_BASIC_CTXT_WIN_BF, LESK_EXT, LESK_EXT_CTXT, LESK_EXT_CTXT_WIN, LESK_EXT_CTXT_WIN_BF, LESK_EXT_EXP, LESK_EXT_EXP_CTXT, LESK_EXT_EXP_CTXT_WIN, LESK_EXT_EXP_CTXT_WIN_BF, + LESK_BASIC, LESK_BASIC_CTXT, LESK_EXT, LESK_EXT_CTXT, LESK_EXT_EXP, LESK_EXT_EXP_CTXT } - + // DEFAULTS - protected static final LESK_TYPE DFLT_LESK_TYPE = LESK_TYPE.LESK_EXT_EXP_CTXT_WIN; + protected static final LESK_TYPE DFLT_LESK_TYPE = LESK_TYPE.LESK_EXT_EXP_CTXT; protected static final Source DFLT_SOURCE = Source.WORDNET; - protected static final int DFLT_WIN_SIZE = 3; - protected static final int DFLT_DEPTH = 2; - protected static final double DFLT_IEXP = 0.4; - protected static final double DFLT_DEXP = 0.4; + protected static final int DFLT_WIN_SIZE = 10; + protected static final int DFLT_DEPTH = 1; + protected static final double DFLT_DEPTH_WEIGHT = 0.8; + protected static final double DFLT_IEXP = 0.3; + protected static final double DFLT_DEXP = 0.3; protected LESK_TYPE leskType; @@ -49,17 +50,17 @@ public class LeskParameters extends WSDP protected int win_f_size; protected int win_b_size; protected int depth; - - protected boolean fathom_synonyms; - protected boolean fathom_hypernyms; - protected boolean fathom_hyponyms; - protected boolean fathom_meronyms; - protected boolean fathom_holonyms; - protected double depth_weight; protected double iexp; protected double dexp; + /* + * 10 possible features for lesk 0 : Synonyms 1 : Hypernyms 2 : Hyponyms 3 : + * Meronyms 4 : Holonyms 5 : Entailments 6 : Coordinate Terms 7 : Causes 8 : + * Attributes 9 : Pertainyms + */ + protected boolean features[]; + public LESK_TYPE getLeskType() { return leskType; } @@ -92,46 +93,6 @@ public class LeskParameters extends WSDP this.depth = depth; } - public boolean isFathom_synonyms() { - return fathom_synonyms; - } - - public void setFathom_synonyms(boolean fathom_synonyms) { - this.fathom_synonyms = fathom_synonyms; - } - - public boolean isFathom_hypernyms() { - return fathom_hypernyms; - } - - public void setFathom_hypernyms(boolean fathom_hypernyms) { - this.fathom_hypernyms = fathom_hypernyms; - } - - public boolean isFathom_hyponyms() { - return fathom_hyponyms; - } - - public void setFathom_hyponyms(boolean fathom_hyponyms) { - this.fathom_hyponyms = fathom_hyponyms; - } - - public boolean isFathom_meronyms() { - return fathom_meronyms; - } - - public void setFathom_meronyms(boolean fathom_meronyms) { - this.fathom_meronyms = fathom_meronyms; - } - - public boolean isFathom_holonyms() { - return fathom_holonyms; - } - - public void setFathom_holonyms(boolean fathom_holonyms) { - this.fathom_holonyms = fathom_holonyms; - } - public double getDepth_weight() { return depth_weight; } @@ -156,6 +117,14 @@ public class LeskParameters extends WSDP this.dexp = dexp; } + public boolean[] getFeatures() { + return features; + } + + public void setFeatures(boolean[] features) { + this.features = features; + } + public LeskParameters() { this.setDefaults(); } @@ -169,13 +138,11 @@ public class LeskParameters extends WSDP this.win_f_size = LeskParameters.DFLT_WIN_SIZE; this.win_b_size = LeskParameters.DFLT_WIN_SIZE; this.depth = LeskParameters.DFLT_DEPTH; + this.depth_weight = LeskParameters.DFLT_DEPTH_WEIGHT; this.iexp = LeskParameters.DFLT_IEXP; this.dexp = LeskParameters.DFLT_DEXP; - this.fathom_holonyms = true; - this.fathom_hypernyms = true; - this.fathom_hyponyms = true; - this.fathom_meronyms = true; - this.fathom_synonyms = true; + boolean[] a = { true, true, true, true, true, true, true, true, true, true }; + this.features = a; } /* @@ -188,23 +155,13 @@ public class LeskParameters extends WSDP switch (this.leskType) { case LESK_BASIC: case LESK_BASIC_CTXT: - return true; - case LESK_BASIC_CTXT_WIN: return (this.win_b_size == this.win_f_size) && this.win_b_size >= 0; - case LESK_BASIC_CTXT_WIN_BF: - return (this.win_b_size >= 0) && (this.win_f_size >= 0); case LESK_EXT: case LESK_EXT_CTXT: - return (this.depth >= 0) && (this.depth_weight >= 0); - case LESK_EXT_CTXT_WIN: - case LESK_EXT_CTXT_WIN_BF: return (this.depth >= 0) && (this.depth_weight >= 0) && (this.win_b_size >= 0) && (this.win_f_size >= 0); case LESK_EXT_EXP: case LESK_EXT_EXP_CTXT: - return (this.depth >= 0) && (this.dexp >= 0) && (this.iexp >= 0); - case LESK_EXT_EXP_CTXT_WIN: - case LESK_EXT_EXP_CTXT_WIN_BF: return (this.depth >= 0) && (this.dexp >= 0) && (this.iexp >= 0) && (this.win_b_size >= 0) && (this.win_f_size >= 0); default:
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java?rev=1696509&r1=1696508&r2=1696509&view=diff ============================================================================== --- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java (original) +++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java Tue Aug 18 22:44:32 2015 @@ -27,7 +27,7 @@ import net.sf.extjwnl.JWNLException; import net.sf.extjwnl.data.POS; import net.sf.extjwnl.data.Synset; import net.sf.extjwnl.data.Word; -import opennlp.tools.disambiguator.Constants; +import opennlp.tools.disambiguator.WSDHelper; import opennlp.tools.disambiguator.WSDParameters; import opennlp.tools.disambiguator.WSDSample; import opennlp.tools.disambiguator.WSDisambiguator; @@ -53,10 +53,11 @@ public class MFS implements WSDisambigua } @Deprecated - public static String[] getMostFrequentSense(WordToDisambiguate wordToDisambiguate) { + public static String[] getMostFrequentSense( + WordToDisambiguate wordToDisambiguate) { String word = wordToDisambiguate.getRawWord().toLowerCase(); - POS pos = Constants.getPOS(wordToDisambiguate.getPosTag()); + POS pos = WSDHelper.getPOS(wordToDisambiguate.getPosTag()); if (pos != null) { @@ -91,7 +92,7 @@ public class MFS implements WSDisambigua } } - + /* * @return the most frequent senses from wordnet */ @@ -102,7 +103,7 @@ public class MFS implements WSDisambigua if (WSDParameters.isStemCompare) { WordPOS wdPOS = new WordPOS(wd.getLemma(), wd.getPOS()); WordPOS samplePOS = new WordPOS(sample.getTargetLemma(), - Constants.getPOS(sample.getTargetTag())); + WSDHelper.getPOS(sample.getTargetTag())); if (wdPOS.isStemEquivalent(samplePOS)) { try { return WSDParameters.Source.WORDNET.name() + " " + wd.getSenseKey(); @@ -134,7 +135,7 @@ public class MFS implements WSDisambigua if (WSDParameters.isStemCompare) { WordPOS wdPOS = new WordPOS(wd.getLemma(), wd.getPOS()); WordPOS samplePOS = new WordPOS(sample.getTargetLemma(), - Constants.getPOS(sample.getTargetTag())); + WSDHelper.getPOS(sample.getTargetTag())); if (wdPOS.isStemEquivalent(samplePOS)) { try { senseKeys[i] = WSDParameters.Source.WORDNET.name() + " " @@ -145,7 +146,7 @@ public class MFS implements WSDisambigua } break; } - }else{ + } else { if (wd.getLemma().equalsIgnoreCase((sample.getTargetLemma()))) { try { senseKeys[i] = WSDParameters.Source.WORDNET.name() + " " Modified: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java?rev=1696509&r1=1696508&r2=1696509&view=diff ============================================================================== --- opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java (original) +++ opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSEvaluatorTest.java Tue Aug 18 22:44:32 2015 @@ -36,8 +36,14 @@ public class IMSEvaluatorTest { @Test public static void main(String[] args) { - Constants.print("Evaluation Started"); + WSDHelper.print("Evaluation Started"); + String modelsDir = "src\\test\\resources\\models\\"; + WSDHelper.loadTokenizer(modelsDir+"en-token.bin"); + WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict"); + WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin"); + + IMS ims = new IMS(); IMSParameters imsParams = new IMSParameters(); ims.setParams(imsParams); @@ -52,16 +58,16 @@ public class IMSEvaluatorTest { ArrayList<WSDSample> instances = getTestData(word); if (instances != null) { - Constants.print("------------------" + word + "------------------"); + WSDHelper.print("------------------" + word + "------------------"); for (WSDSample instance : instances) { if (instance.getSenseIDs() != null && !instance.getSenseIDs().get(0).equals("null")) { evaluator.evaluateSample(instance); } } - Constants.print(evaluator.toString()); + WSDHelper.print(evaluator.toString()); } else { - Constants.print("null instances"); + WSDHelper.print("null instances"); } } @@ -95,7 +101,7 @@ public class IMSEvaluatorTest { ArrayList<WSDSample> instances = new ArrayList<WSDSample>(); for (WordToDisambiguate wtd : seReader.getSensevalData(wordTag)) { - List<WordPOS> words = PreProcessor.getAllRelevantWords(wtd); + List<WordPOS> words = WSDHelper.getAllRelevantWords(wtd); int targetWordIndex=0; for (int i=0; i<words.size();i++){ if(words.get(i).isTarget){ @@ -108,7 +114,7 @@ public class IMSEvaluatorTest { tags[i] = words.get(i).getPosTag(); tokens[i] = words.get(i).getWord(); } - String targetLemma = Loader.getLemmatizer().lemmatize( + String targetLemma = WSDHelper.getLemmatizer().lemmatize( tokens[targetWordIndex], tags[targetWordIndex]); WSDSample sample = new WSDSample(tokens,tags,targetWordIndex,targetLemma); Modified: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java?rev=1696509&r1=1696508&r2=1696509&view=diff ============================================================================== --- opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java (original) +++ opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java Tue Aug 18 22:44:32 2015 @@ -35,19 +35,24 @@ public class IMSTester { public static void main(String[] args) { + String modelsDir = "src\\test\\resources\\models\\"; + WSDHelper.loadTokenizer(modelsDir+"en-token.bin"); + WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict"); + WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin"); + IMS ims = new IMS(); String test1 = "Please write to me soon."; - String[] sentence1 = Loader.getTokenizer().tokenize(test1); - Constants.print(ims.disambiguate(sentence1, 1)); + String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1); + WSDHelper.print(ims.disambiguate(sentence1, 1)); String test2 = "it was a strong argument that his hypothesis was true"; - String[] sentence2 = Loader.getTokenizer().tokenize(test2); - Constants.print(ims.disambiguate(sentence2, 3)); + String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2); + WSDHelper.print(ims.disambiguate(sentence2, 3)); String test3 = "the component was highly radioactive to the point that it has been activated the second it touched water"; - String[] sentence3 = Loader.getTokenizer().tokenize(test3); - Constants.print(ims.disambiguate(sentence3, 12)); + String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3); + WSDHelper.print(ims.disambiguate(sentence3, 12)); } Modified: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java?rev=1696509&r1=1696508&r2=1696509&view=diff ============================================================================== --- opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java (original) +++ opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java Tue Aug 18 22:44:32 2015 @@ -35,11 +35,17 @@ public class LeskEvaluatorTest { @Test public static void main(String[] args) { - Constants.print("Evaluation Started"); - + WSDHelper.print("Evaluation Started"); + String modelsDir = "src\\test\\resources\\models\\"; + WSDHelper.loadTokenizer(modelsDir + "en-token.bin"); + WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict"); + WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin"); Lesk lesk = new Lesk(); LeskParameters leskParams = new LeskParameters(); - leskParams.setLeskType(LeskParameters.LESK_TYPE.LESK_EXT_EXP_CTXT_WIN); + boolean a[] = { true, true, true, true, true, false, false, false, false, + false }; + leskParams.setFeatures(a); + leskParams.setLeskType(LeskParameters.LESK_TYPE.LESK_EXT_CTXT); lesk.setParams(leskParams); ArrayList<String> words = seReader.getSensevalWords(); @@ -52,16 +58,16 @@ public class LeskEvaluatorTest { ArrayList<WSDSample> instances = getTestData(word); if (instances != null) { - Constants.print("------------------" + word + "------------------"); + WSDHelper.print("------------------" + word + "------------------"); for (WSDSample instance : instances) { if (instance.getSenseIDs() != null && !instance.getSenseIDs().get(0).equals("null")) { evaluator.evaluateSample(instance); } } - Constants.print(evaluator.toString()); + WSDHelper.print(evaluator.toString()); } else { - Constants.print("null instances"); + WSDHelper.print("null instances"); } } } @@ -71,23 +77,24 @@ public class LeskEvaluatorTest { ArrayList<WSDSample> instances = new ArrayList<WSDSample>(); for (WordToDisambiguate wtd : seReader.getSensevalData(wordTag)) { - List<WordPOS> words = PreProcessor.getAllRelevantWords(wtd); - int targetWordIndex=0; - for (int i=0; i<words.size();i++){ - if(words.get(i).isTarget){ + List<WordPOS> words = WSDHelper.getAllRelevantWords(wtd); + int targetWordIndex = 0; + for (int i = 0; i < words.size(); i++) { + if (words.get(i).isTarget) { targetWordIndex = i; - } + } } String[] tags = new String[words.size()]; String[] tokens = new String[words.size()]; - for (int i=0;i<words.size();i++){ + for (int i = 0; i < words.size(); i++) { tags[i] = words.get(i).getPosTag(); tokens[i] = words.get(i).getWord(); } - String targetLemma = Loader.getLemmatizer().lemmatize( + String targetLemma = WSDHelper.getLemmatizer().lemmatize( tokens[targetWordIndex], tags[targetWordIndex]); - - WSDSample sample = new WSDSample(tokens,tags,targetWordIndex,targetLemma); + + WSDSample sample = new WSDSample(tokens, tags, targetWordIndex, + targetLemma); sample.setSenseIDs(wtd.getSenseIDs()); if (sample != null) { if (sample.getSenseIDs().get(0) != null Modified: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java?rev=1696509&r1=1696508&r2=1696509&view=diff ============================================================================== --- opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java (original) +++ opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java Tue Aug 18 22:44:32 2015 @@ -25,44 +25,49 @@ import java.util.List; import opennlp.tools.disambiguator.lesk.Lesk; import opennlp.tools.disambiguator.lesk.LeskParameters; import opennlp.tools.disambiguator.lesk.LeskParameters.LESK_TYPE; -import opennlp.tools.disambiguator.mfs.MFS; import org.junit.Test; public class LeskTester { - @Test public static void main(String[] args) { + Lesk lesk = new Lesk(); LeskParameters params = new LeskParameters(); - params.setLeskType(LESK_TYPE.LESK_EXT_EXP_CTXT_WIN); + params.setLeskType(LESK_TYPE.LESK_EXT); + boolean a[] = { true, true, true, true, true, true, true, true, true, true }; + params.setFeatures(a); lesk.setParams(params); - - String test1 = "I went fishing for some sea bass."; - String[] sentence = Loader.getTokenizer().tokenize(test1); - List<WordPOS> words = PreProcessor.getAllRelevantWords(sentence); - int targetWordIndex = 2; + String modelsDir = "src\\test\\resources\\models\\"; + WSDHelper.loadTokenizer(modelsDir+"en-token.bin"); + WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict"); + WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin"); + + String test1 = "I went to the bank to deposit money."; + String[] sentence = WSDHelper.getTokenizer().tokenize(test1); + List<WordPOS> words = WSDHelper.getAllRelevantWords(sentence); + int targetWordIndex = 0; String[] tags = new String[words.size()]; String[] tokens = new String[words.size()]; for (int i=0;i<words.size();i++){ tags[i] = words.get(i).getPosTag(); tokens[i] = words.get(i).getWord(); - // Constants.print("token : "+ tokens[i] + "_" + tags[i]); + WSDHelper.print("token : "+ tokens[i] + "_" + tags[i]); } - String targetLemma = Loader.getLemmatizer().lemmatize( + String targetLemma = WSDHelper.getLemmatizer().lemmatize( tokens[targetWordIndex], tags[targetWordIndex]); // Constants.print("lemma : "+ targetLemma); - Constants.print(lesk.disambiguate(tokens, tags, targetWordIndex,targetLemma)); - Constants.printResults(lesk, + WSDHelper.print(lesk.disambiguate(tokens, tags, targetWordIndex,targetLemma)); + WSDHelper.printResults(lesk, lesk.disambiguate(tokens, tags, targetWordIndex, targetLemma)); - Constants.print("----------------------------------------"); + WSDHelper.print("----------------------------------------"); String test2 = "it was a strong argument that his hypothesis was true"; - sentence = Loader.getTokenizer().tokenize(test2); - words = PreProcessor.getAllRelevantWords(sentence); + sentence = WSDHelper.getTokenizer().tokenize(test2); + words = WSDHelper.getAllRelevantWords(sentence); targetWordIndex = 1; tags = new String[words.size()]; tokens = new String[words.size()]; @@ -72,19 +77,19 @@ public class LeskTester { //Constants.print("token : "+ tokens[i] + "_" + tags[i]); } - targetLemma = Loader.getLemmatizer().lemmatize( + targetLemma = WSDHelper.getLemmatizer().lemmatize( tokens[targetWordIndex], tags[targetWordIndex]); //Constants.print("lemma : "+ targetLemma); - Constants.print(lesk.disambiguate(tokens, tags, targetWordIndex,targetLemma)); - Constants.printResults(lesk, + WSDHelper.print(lesk.disambiguate(tokens, tags, targetWordIndex,targetLemma)); + WSDHelper.printResults(lesk, lesk.disambiguate(tokens, tags, targetWordIndex, targetLemma)); - Constants.print("----------------------------------------"); + WSDHelper.print("----------------------------------------"); String test3 = "the component was highly radioactive to the point that it has been activated the second it touched water"; - sentence = Loader.getTokenizer().tokenize(test3); - words = PreProcessor.getAllRelevantWords(sentence); + sentence = WSDHelper.getTokenizer().tokenize(test3); + words = WSDHelper.getAllRelevantWords(sentence); targetWordIndex = 4; tags = new String[words.size()]; tokens = new String[words.size()]; @@ -94,14 +99,14 @@ public class LeskTester { //Constants.print("token : "+ tokens[i] + "_" + tags[i]); } - targetLemma = Loader.getLemmatizer().lemmatize( + targetLemma = WSDHelper.getLemmatizer().lemmatize( tokens[targetWordIndex], tags[targetWordIndex]); //Constants.print("lemma : "+ targetLemma); - Constants.print(lesk.disambiguate(tokens, tags, targetWordIndex,targetLemma)); - Constants.printResults(lesk, + WSDHelper.print(lesk.disambiguate(tokens, tags, targetWordIndex,targetLemma)); + WSDHelper.printResults(lesk, lesk.disambiguate(tokens, tags, targetWordIndex, targetLemma)); - Constants.print("----------------------------------------"); + WSDHelper.print("----------------------------------------"); } } \ No newline at end of file Modified: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java?rev=1696509&r1=1696508&r2=1696509&view=diff ============================================================================== --- opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java (original) +++ opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSEvaluatorTest.java Tue Aug 18 22:44:32 2015 @@ -34,8 +34,11 @@ public class MFSEvaluatorTest { @Test public static void main(String[] args) { - Constants.print("Evaluation Started"); - + WSDHelper.print("Evaluation Started"); + String modelsDir = "src\\test\\resources\\models\\"; + WSDHelper.loadTokenizer(modelsDir+"en-token.bin"); + WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict"); + WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin"); MFS mfs = new MFS(); WSDParameters.isStemCompare = true; @@ -50,16 +53,16 @@ public class MFSEvaluatorTest { ArrayList<WSDSample> instances = getTestData(word); if (instances != null) { - Constants.print("------------------" + word + "------------------"); + WSDHelper.print("------------------" + word + "------------------"); for (WSDSample instance : instances) { if (instance.getSenseIDs() != null && !instance.getSenseIDs().get(0).equals("null")) { evaluator.evaluateSample(instance); } } - Constants.print(evaluator.toString()); + WSDHelper.print(evaluator.toString()); } else { - Constants.print("null instances"); + WSDHelper.print("null instances"); } } @@ -82,7 +85,7 @@ public class MFSEvaluatorTest { ArrayList<WSDSample> instances = new ArrayList<WSDSample>(); for (WordToDisambiguate wtd : seReader.getSensevalData(wordTag)) { - String targetLemma = Loader.getLemmatizer().lemmatize(wtd.getWord(), + String targetLemma = WSDHelper.getLemmatizer().lemmatize(wtd.getWord(), wtd.getPosTag()); WSDSample sample = new WSDSample(wtd.getSentence(), wtd.getPosTags(), Modified: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java?rev=1696509&r1=1696508&r2=1696509&view=diff ============================================================================== --- opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java (original) +++ opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java Tue Aug 18 22:44:32 2015 @@ -30,11 +30,18 @@ import opennlp.tools.disambiguator.mfs.M public class MFSTester { public static void main(String[] args) { + + String modelsDir = "src\\test\\resources\\models\\"; + WSDHelper.loadTokenizer(modelsDir+"en-token.bin"); + WSDHelper.loadLemmatizer(modelsDir+"en-lemmatizer.dict"); + WSDHelper.loadTagger(modelsDir+"en-pos-maxent.bin"); + + MFS mfs = new MFS(); String test1 = "I went fishing for some sea bass."; - String[] sentence = Loader.getTokenizer().tokenize(test1); - List<WordPOS> words = PreProcessor.getAllRelevantWords(sentence); + String[] sentence = WSDHelper.getTokenizer().tokenize(test1); + List<WordPOS> words = WSDHelper.getAllRelevantWords(sentence); int targetWordIndex = 2; String[] tags = new String[words.size()]; String[] tokens = new String[words.size()]; @@ -44,18 +51,18 @@ public class MFSTester { // Constants.print("token : "+ tokens[i] + "_" + tags[i]); } - String targetLemma = Loader.getLemmatizer().lemmatize( + String targetLemma = WSDHelper.getLemmatizer().lemmatize( tokens[targetWordIndex], tags[targetWordIndex]); // Constants.print("lemma : "+ targetLemma); - Constants.print(mfs.disambiguate(tokens, tags, targetWordIndex,targetLemma)); - Constants.printResults(mfs, + WSDHelper.print(mfs.disambiguate(tokens, tags, targetWordIndex,targetLemma)); + WSDHelper.printResults(mfs, mfs.disambiguate(tokens, tags, targetWordIndex, targetLemma)); - Constants.print("----------------------------------------"); + WSDHelper.print("----------------------------------------"); String test2 = "it was a strong argument that his hypothesis was true"; - sentence = Loader.getTokenizer().tokenize(test2); - words = PreProcessor.getAllRelevantWords(sentence); + sentence = WSDHelper.getTokenizer().tokenize(test2); + words = WSDHelper.getAllRelevantWords(sentence); targetWordIndex = 1; tags = new String[words.size()]; tokens = new String[words.size()]; @@ -65,19 +72,19 @@ public class MFSTester { //Constants.print("token : "+ tokens[i] + "_" + tags[i]); } - targetLemma = Loader.getLemmatizer().lemmatize( + targetLemma = WSDHelper.getLemmatizer().lemmatize( tokens[targetWordIndex], tags[targetWordIndex]); //Constants.print("lemma : "+ targetLemma); - Constants.print(mfs.disambiguate(tokens, tags, targetWordIndex,targetLemma)); - Constants.printResults(mfs, + WSDHelper.print(mfs.disambiguate(tokens, tags, targetWordIndex,targetLemma)); + WSDHelper.printResults(mfs, mfs.disambiguate(tokens, tags, targetWordIndex, targetLemma)); - Constants.print("----------------------------------------"); + WSDHelper.print("----------------------------------------"); String test3 = "the component was highly radioactive to the point that it has been activated the second it touched water"; - sentence = Loader.getTokenizer().tokenize(test3); - words = PreProcessor.getAllRelevantWords(sentence); + sentence = WSDHelper.getTokenizer().tokenize(test3); + words = WSDHelper.getAllRelevantWords(sentence); targetWordIndex = 4; tags = new String[words.size()]; tokens = new String[words.size()]; @@ -87,14 +94,14 @@ public class MFSTester { //Constants.print("token : "+ tokens[i] + "_" + tags[i]); } - targetLemma = Loader.getLemmatizer().lemmatize( + targetLemma = WSDHelper.getLemmatizer().lemmatize( tokens[targetWordIndex], tags[targetWordIndex]); //Constants.print("lemma : "+ targetLemma); - Constants.print(mfs.disambiguate(tokens, tags, targetWordIndex,targetLemma)); - Constants.printResults(mfs, + WSDHelper.print(mfs.disambiguate(tokens, tags, targetWordIndex,targetLemma)); + WSDHelper.printResults(mfs, mfs.disambiguate(tokens, tags, targetWordIndex, targetLemma)); - Constants.print("----------------------------------------"); + WSDHelper.print("----------------------------------------"); } } \ No newline at end of file
