Author: joern
Date: Wed Aug 12 13:30:52 2015
New Revision: 1695509

URL: http://svn.apache.org/r1695509
Log:
OPENNLP-794 

initial code for CLI support :

    First only MFS is supported
    Need to add the extra classes in opennlp.tools.cmdline.CLI.java for build 
and test

Thanks to  Anthony Beylerian for providing a patch!

Added:
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java
   (with props)
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
   (with props)
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
   (with props)
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java
   (with props)
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
   (with props)
Removed:
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DatasetsReader/
Modified:
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
    
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java?rev=1695509&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java
 Wed Aug 12 13:30:52 2015
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.disambiguator;
+
+import java.io.File;
+
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.params.EncodingParameter;
+
+/**
+ * Common evaluation parameters.
+ * 
+ * Note: Do not use this class, internal use only!
+ */
+public interface DisambiguatorEvaluatorParams extends EncodingParameter,
+    DisambiguatorToolParams {
+
+  @ParameterDescription(valueName = "model", description = "the model file to 
be evaluated")
+  @OptionalParameter
+  File getModel();
+
+  @ParameterDescription(valueName = "testData", description = "the data to be 
used during evaluation")
+  File getData();
+
+}

Propchange: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java?rev=1695509&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
 Wed Aug 12 13:30:52 2015
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.disambiguator;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CLI;
+import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.disambiguator.WSDEvaluator;
+import opennlp.tools.disambiguator.WSDSample;
+import opennlp.tools.disambiguator.WSDisambiguator;
+import opennlp.tools.util.ObjectStream;
+
+public final class DisambiguatorEvaluatorTool extends CmdLineTool {
+
+  public String getName() {
+    return "DisambiguatorEvaluator";
+  }
+
+  public String getShortDescription() {
+    return "Disambiguator Evaluation Tool";
+  }
+
+  public String getHelp() {
+    return "Usage: " + CLI.CMD + " " + getName() + " "
+        + ArgumentParser.createUsage(DisambiguatorEvaluatorParams.class);
+  }
+
+  public void run(String[] args) {
+    if (!ArgumentParser.validateArguments(args,
+        DisambiguatorEvaluatorParams.class)) {
+      System.err.println(getHelp());
+      throw new TerminateToolException(1);
+    }
+
+    DisambiguatorEvaluatorParams params = ArgumentParser.parse(args,
+        DisambiguatorEvaluatorParams.class);
+
+    File testData = params.getData();
+    CmdLineUtil.checkInputFile("Test data", testData);
+
+    Charset encoding = params.getEncoding();
+
+    WSDisambiguator disambiguator = DisambiguatorTool.makeTool(params);
+
+    WSDEvaluator evaluator = new WSDEvaluator(disambiguator);
+
+    System.out.print("Evaluating ... ");
+
+    ObjectStream<WSDSample> sampleStream = DisambiguatorTool.openSampleData(
+        "Test", testData, encoding);
+
+    try {
+      evaluator.evaluate(sampleStream);
+    } catch (IOException e) {
+      System.err.println("failed");
+      System.err.println("Reading test data error " + e.getMessage());
+      throw new TerminateToolException(-1);
+    } finally {
+      try {
+        sampleStream.close();
+      } catch (IOException e) {
+        // sorry that this can fail
+      }
+    }
+
+    System.out.println("done");
+
+    System.out.println();
+
+    System.out.println("Accuracy: " + evaluator.getAccuracy());
+  }
+}
\ No newline at end of file

Propchange: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java?rev=1695509&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
 Wed Aug 12 13:30:52 2015
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.cmdline.disambiguator;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CLI;
+import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.PerformanceMonitor;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.disambiguator.Constants;
+import opennlp.tools.disambiguator.WSDSample;
+import opennlp.tools.disambiguator.WSDSampleStream;
+import opennlp.tools.disambiguator.WSDisambiguator;
+import opennlp.tools.disambiguator.mfs.MFS;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+/*
+ * Command line tool for disambiguator supports MFS for now
+ * 
+ */
+public class DisambiguatorTool extends CmdLineTool {
+
+  // TODO CmdLineTool should be an interface not abstract class
+  public String getName() {
+    return "Disambiguator";
+  }
+
+  public String getShortDescription() {
+    return "Word Sense Disambiguator";
+  }
+
+  public String getHelp() {
+    return "Usage: " + CLI.CMD + " " + getName() + " "
+        + ArgumentParser.createUsage(DisambiguatorToolParams.class)
+        + " < sentences";
+  }
+
+  public void run(String[] args) {
+
+    if (!ArgumentParser.validateArguments(args, 
DisambiguatorToolParams.class)) {
+      System.err.println(getHelp());
+      throw new TerminateToolException(1);
+    }
+
+    DisambiguatorToolParams params = ArgumentParser.parse(args,
+        DisambiguatorToolParams.class);
+
+    WSDisambiguator disambiguator = makeTool(params);
+
+    PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
+
+    ObjectStream<String> lineStream = new PlainTextByLineStream(
+        new InputStreamReader(System.in));
+
+    perfMon.start();
+
+    try {
+      String line;
+      while ((line = lineStream.read()) != null) {
+
+        WSDSample sample = WSDSample.parse(line);
+
+        Constants.printResults(disambiguator,
+            disambiguator.disambiguate(sample));
+
+        perfMon.incrementCounter();
+      }
+    } catch (IOException e) {
+      CmdLineUtil.handleStdinIoError(e);
+    }
+
+    perfMon.stopAndPrintFinalResult();
+
+  }
+
+  public static WSDisambiguator makeTool(DisambiguatorToolParams params) {
+
+    WSDisambiguator wsd = null;
+
+    if (params.getType().equalsIgnoreCase("mfs")) {
+      wsd = new MFS();
+    } else if (params.getType().equalsIgnoreCase("lesk")) {
+    } else if (params.getType().equalsIgnoreCase("ims")) {
+    }
+    return wsd;
+
+  }
+
+  static ObjectStream<WSDSample> openSampleData(String sampleDataName,
+      File sampleDataFile, Charset encoding) {
+    CmdLineUtil.checkInputFile(sampleDataName + " Data", sampleDataFile);
+
+    FileInputStream sampleDataIn = CmdLineUtil.openInFile(sampleDataFile);
+
+    ObjectStream<String> lineStream = new PlainTextByLineStream(
+        sampleDataIn.getChannel(), encoding);
+
+    return new WSDSampleStream(lineStream);
+  }
+}

Propchange: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java?rev=1695509&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java
 Wed Aug 12 13:30:52 2015
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.disambiguator;
+
+import java.io.File;
+
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.params.EncodingParameter;
+import opennlp.tools.cmdline.params.LanguageParams;
+
+/**
+ * Parameters for DisambiguatorTool.
+ * 
+ * Note: Do not use this class, internal use only!
+ */
+interface DisambiguatorToolParams extends LanguageParams,EncodingParameter {
+
+  @ParameterDescription(valueName = "mfs|lesk|ims", description = "The type of 
the disambiguator approach. One of mfs|lesk|ims.")
+  @OptionalParameter(defaultValue = "mfs")
+  String getType();
+     
+  @ParameterDescription(valueName = "testData", description = "the data to be 
used during evaluation")
+  @OptionalParameter
+  File getData();
+
+}
\ No newline at end of file

Propchange: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java?rev=1695509&r1=1695508&r2=1695509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
 Wed Aug 12 13:30:52 2015
@@ -138,48 +138,56 @@ public class Constants {
           "you're", "yours", "yourself", "yourselves", "you've", "zero"));
 
   // Print a text in the console
-//Print a text in the console
- public static void printResults(WSDisambiguator disambiguator,
-     String[] results) {
-
-   if (results != null) {
-
-     String[] parts;
-     String sensekey;
-     if (disambiguator instanceof Lesk) {
-
-       Double score;
-
-       for (String result : results) {
-         parts = result.split(" ");
-         sensekey = parts[1];
-         score = Double.parseDouble(parts[2]);
-         try {
-           Constants.print("score : "
-               + score
-               + " for : "
-               + Loader.getDictionary().getWordBySenseKey(sensekey)
-                   .getSynset().getGloss());
-         } catch (JWNLException e) {
-           e.printStackTrace();
-         }
-       }
-     } else {
-       for (String result : results) {
-         parts = result.split(" ");
-         sensekey = parts[1];
-         try {
-           Constants.print("sense : "
-               + Loader.getDictionary().getWordBySenseKey(sensekey)
-                   .getSynset().getGloss());
-         } catch (JWNLException e) {
-           e.printStackTrace();
-         }
-       }
-     }
-   }
+  // Print a text in the console
+  public static void printResults(WSDisambiguator disambiguator,
+      String[] results) {
+
+    if (results != null) {
+
+      String[] parts;
+      String sensekey;
+      if (disambiguator instanceof Lesk) {
+
+        Double score;
+     
+        for (int i = 0; i < results.length; i++) {
+          parts = results[i].split(" ");
+          sensekey = parts[1];
+          score = Double.parseDouble(parts[2]);
+          try {
+            Constants.print("score : "
+                + score
+                + " for sense "
+                + i
+                + " : "
+                + sensekey
+                + " : "
+                + Loader.getDictionary().getWordBySenseKey(sensekey)
+                    .getSynset().getGloss());
+          } catch (JWNLException e) {
+            e.printStackTrace();
+          }
+        }
+      } else {
+        for (int i = 0; i < results.length; i++) {
+          parts = results[i].split(" ");
+          sensekey = parts[1];
+          try {
+            Constants.print("sense "
+                + i
+                + " : "
+                + sensekey
+                + " : "
+                + Loader.getDictionary().getWordBySenseKey(sensekey)
+                    .getSynset().getGloss());
+          } catch (JWNLException e) {
+            e.printStackTrace();
+          }
+        }
+      }
+    }
 
- }
+  }
 
   public static void print(Object in) {
     if (in == null) {

Modified: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java?rev=1695509&r1=1695508&r2=1695509&view=diff
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
 (original)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
 Wed Aug 12 13:30:52 2015
@@ -136,7 +136,7 @@ public class WSDSample {
   public String getTargetLemma() {
     return targetLemma;
   }
-  
+
   public void setSentence(List<String> sentence) {
     this.sentence = sentence;
   }
@@ -183,6 +183,9 @@ public class WSDSample {
     return result.toString();
   }
 
+  /*
+   * Parses a sample of format : TargetIndex TargetLemma Token Tag Token Tag 
...
+   */
   public static WSDSample parse(String sentenceString)
       throws InvalidFormatException {
 

Added: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
URL: 
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java?rev=1695509&view=auto
==============================================================================
--- 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
 (added)
+++ 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
 Wed Aug 12 13:30:52 2015
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+public class WSDSampleStream extends FilterObjectStream<String, WSDSample> {
+
+  private static Logger logger = 
Logger.getLogger(WSDSampleStream.class.getName());
+
+  /**
+   * Initializes the current instance.
+   *
+   * @param sentences reader with sentences
+   * @throws IOException IOException
+   */
+  public WSDSampleStream(Reader sentences) throws IOException {
+    super(new PlainTextByLineStream(sentences));
+  }
+
+  public WSDSampleStream(ObjectStream<String> sentences) {
+    super(sentences);
+  }
+
+  /**
+   * Parses the next sentence and return the next
+   * {@link WSDSample} object.
+   *
+   * If an error occurs an empty {@link WSDSample} object is returned
+   * and an warning message is logged. Usually it does not matter if one
+   * of many sentences is ignored.
+   *
+   * TODO: An exception in error case should be thrown.
+   */
+  public WSDSample read() throws IOException {
+
+    String sentence = samples.read();
+
+    if (sentence != null) {
+      WSDSample sample;
+      try {
+        sample = WSDSample.parse(sentence);
+      } catch (InvalidFormatException e) {
+
+        if (logger.isLoggable(Level.WARNING)) {
+          logger.warning("Error during parsing, ignoring sentence: " + 
sentence);
+        }
+
+        sample = null;// new WSDSample(new String[]{}, new String[]{},0);
+      }
+
+      return sample;
+    }
+    else {
+      // sentences stream is exhausted
+      return null;
+    }
+  }
+}

Propchange: 
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain


Reply via email to