Author: joern
Date: Wed Aug 12 13:30:52 2015
New Revision: 1695509
URL: http://svn.apache.org/r1695509
Log:
OPENNLP-794
initial code for CLI support :
First only MFS is supported
Need to add the extra classes in opennlp.tools.cmdline.CLI.java for build
and test
Thanks to Anthony Beylerian for providing a patch!
Added:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java
(with props)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
(with props)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
(with props)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java
(with props)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
(with props)
Removed:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DatasetsReader/
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
Added:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java?rev=1695509&view=auto
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java
(added)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java
Wed Aug 12 13:30:52 2015
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.disambiguator;
+
+import java.io.File;
+
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.params.EncodingParameter;
+
+/**
+ * Common evaluation parameters.
+ *
+ * Note: Do not use this class, internal use only!
+ */
+public interface DisambiguatorEvaluatorParams extends EncodingParameter,
+ DisambiguatorToolParams {
+
+ @ParameterDescription(valueName = "model", description = "the model file to
be evaluated")
+ @OptionalParameter
+ File getModel();
+
+ @ParameterDescription(valueName = "testData", description = "the data to be
used during evaluation")
+ File getData();
+
+}
Propchange:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorParams.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java?rev=1695509&view=auto
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
(added)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
Wed Aug 12 13:30:52 2015
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.disambiguator;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CLI;
+import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.disambiguator.WSDEvaluator;
+import opennlp.tools.disambiguator.WSDSample;
+import opennlp.tools.disambiguator.WSDisambiguator;
+import opennlp.tools.util.ObjectStream;
+
+public final class DisambiguatorEvaluatorTool extends CmdLineTool {
+
+ public String getName() {
+ return "DisambiguatorEvaluator";
+ }
+
+ public String getShortDescription() {
+ return "Disambiguator Evaluation Tool";
+ }
+
+ public String getHelp() {
+ return "Usage: " + CLI.CMD + " " + getName() + " "
+ + ArgumentParser.createUsage(DisambiguatorEvaluatorParams.class);
+ }
+
+ public void run(String[] args) {
+ if (!ArgumentParser.validateArguments(args,
+ DisambiguatorEvaluatorParams.class)) {
+ System.err.println(getHelp());
+ throw new TerminateToolException(1);
+ }
+
+ DisambiguatorEvaluatorParams params = ArgumentParser.parse(args,
+ DisambiguatorEvaluatorParams.class);
+
+ File testData = params.getData();
+ CmdLineUtil.checkInputFile("Test data", testData);
+
+ Charset encoding = params.getEncoding();
+
+ WSDisambiguator disambiguator = DisambiguatorTool.makeTool(params);
+
+ WSDEvaluator evaluator = new WSDEvaluator(disambiguator);
+
+ System.out.print("Evaluating ... ");
+
+ ObjectStream<WSDSample> sampleStream = DisambiguatorTool.openSampleData(
+ "Test", testData, encoding);
+
+ try {
+ evaluator.evaluate(sampleStream);
+ } catch (IOException e) {
+ System.err.println("failed");
+ System.err.println("Reading test data error " + e.getMessage());
+ throw new TerminateToolException(-1);
+ } finally {
+ try {
+ sampleStream.close();
+ } catch (IOException e) {
+ // sorry that this can fail
+ }
+ }
+
+ System.out.println("done");
+
+ System.out.println();
+
+ System.out.println("Accuracy: " + evaluator.getAccuracy());
+ }
+}
\ No newline at end of file
Propchange:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorEvaluatorTool.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java?rev=1695509&view=auto
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
(added)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
Wed Aug 12 13:30:52 2015
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.cmdline.disambiguator;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CLI;
+import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.PerformanceMonitor;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.disambiguator.Constants;
+import opennlp.tools.disambiguator.WSDSample;
+import opennlp.tools.disambiguator.WSDSampleStream;
+import opennlp.tools.disambiguator.WSDisambiguator;
+import opennlp.tools.disambiguator.mfs.MFS;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+/*
+ * Command line tool for disambiguator supports MFS for now
+ *
+ */
+public class DisambiguatorTool extends CmdLineTool {
+
+ // TODO CmdLineTool should be an interface not abstract class
+ public String getName() {
+ return "Disambiguator";
+ }
+
+ public String getShortDescription() {
+ return "Word Sense Disambiguator";
+ }
+
+ public String getHelp() {
+ return "Usage: " + CLI.CMD + " " + getName() + " "
+ + ArgumentParser.createUsage(DisambiguatorToolParams.class)
+ + " < sentences";
+ }
+
+ public void run(String[] args) {
+
+ if (!ArgumentParser.validateArguments(args,
DisambiguatorToolParams.class)) {
+ System.err.println(getHelp());
+ throw new TerminateToolException(1);
+ }
+
+ DisambiguatorToolParams params = ArgumentParser.parse(args,
+ DisambiguatorToolParams.class);
+
+ WSDisambiguator disambiguator = makeTool(params);
+
+ PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
+
+ ObjectStream<String> lineStream = new PlainTextByLineStream(
+ new InputStreamReader(System.in));
+
+ perfMon.start();
+
+ try {
+ String line;
+ while ((line = lineStream.read()) != null) {
+
+ WSDSample sample = WSDSample.parse(line);
+
+ Constants.printResults(disambiguator,
+ disambiguator.disambiguate(sample));
+
+ perfMon.incrementCounter();
+ }
+ } catch (IOException e) {
+ CmdLineUtil.handleStdinIoError(e);
+ }
+
+ perfMon.stopAndPrintFinalResult();
+
+ }
+
+ public static WSDisambiguator makeTool(DisambiguatorToolParams params) {
+
+ WSDisambiguator wsd = null;
+
+ if (params.getType().equalsIgnoreCase("mfs")) {
+ wsd = new MFS();
+ } else if (params.getType().equalsIgnoreCase("lesk")) {
+ } else if (params.getType().equalsIgnoreCase("ims")) {
+ }
+ return wsd;
+
+ }
+
+ static ObjectStream<WSDSample> openSampleData(String sampleDataName,
+ File sampleDataFile, Charset encoding) {
+ CmdLineUtil.checkInputFile(sampleDataName + " Data", sampleDataFile);
+
+ FileInputStream sampleDataIn = CmdLineUtil.openInFile(sampleDataFile);
+
+ ObjectStream<String> lineStream = new PlainTextByLineStream(
+ sampleDataIn.getChannel(), encoding);
+
+ return new WSDSampleStream(lineStream);
+ }
+}
Propchange:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorTool.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java?rev=1695509&view=auto
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java
(added)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java
Wed Aug 12 13:30:52 2015
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.disambiguator;
+
+import java.io.File;
+
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.params.EncodingParameter;
+import opennlp.tools.cmdline.params.LanguageParams;
+
+/**
+ * Parameters for DisambiguatorTool.
+ *
+ * Note: Do not use this class, internal use only!
+ */
+interface DisambiguatorToolParams extends LanguageParams,EncodingParameter {
+
+ @ParameterDescription(valueName = "mfs|lesk|ims", description = "The type of
the disambiguator approach. One of mfs|lesk|ims.")
+ @OptionalParameter(defaultValue = "mfs")
+ String getType();
+
+ @ParameterDescription(valueName = "testData", description = "the data to be
used during evaluation")
+ @OptionalParameter
+ File getData();
+
+}
\ No newline at end of file
Propchange:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/cmdline/disambiguator/DisambiguatorToolParams.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java?rev=1695509&r1=1695508&r2=1695509&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
Wed Aug 12 13:30:52 2015
@@ -138,48 +138,56 @@ public class Constants {
"you're", "yours", "yourself", "yourselves", "you've", "zero"));
// Print a text in the console
-//Print a text in the console
- public static void printResults(WSDisambiguator disambiguator,
- String[] results) {
-
- if (results != null) {
-
- String[] parts;
- String sensekey;
- if (disambiguator instanceof Lesk) {
-
- Double score;
-
- for (String result : results) {
- parts = result.split(" ");
- sensekey = parts[1];
- score = Double.parseDouble(parts[2]);
- try {
- Constants.print("score : "
- + score
- + " for : "
- + Loader.getDictionary().getWordBySenseKey(sensekey)
- .getSynset().getGloss());
- } catch (JWNLException e) {
- e.printStackTrace();
- }
- }
- } else {
- for (String result : results) {
- parts = result.split(" ");
- sensekey = parts[1];
- try {
- Constants.print("sense : "
- + Loader.getDictionary().getWordBySenseKey(sensekey)
- .getSynset().getGloss());
- } catch (JWNLException e) {
- e.printStackTrace();
- }
- }
- }
- }
+ // Print a text in the console
+ public static void printResults(WSDisambiguator disambiguator,
+ String[] results) {
+
+ if (results != null) {
+
+ String[] parts;
+ String sensekey;
+ if (disambiguator instanceof Lesk) {
+
+ Double score;
+
+ for (int i = 0; i < results.length; i++) {
+ parts = results[i].split(" ");
+ sensekey = parts[1];
+ score = Double.parseDouble(parts[2]);
+ try {
+ Constants.print("score : "
+ + score
+ + " for sense "
+ + i
+ + " : "
+ + sensekey
+ + " : "
+ + Loader.getDictionary().getWordBySenseKey(sensekey)
+ .getSynset().getGloss());
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ }
+ }
+ } else {
+ for (int i = 0; i < results.length; i++) {
+ parts = results[i].split(" ");
+ sensekey = parts[1];
+ try {
+ Constants.print("sense "
+ + i
+ + " : "
+ + sensekey
+ + " : "
+ + Loader.getDictionary().getWordBySenseKey(sensekey)
+ .getSynset().getGloss());
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
- }
+ }
public static void print(Object in) {
if (in == null) {
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java?rev=1695509&r1=1695508&r2=1695509&view=diff
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
(original)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
Wed Aug 12 13:30:52 2015
@@ -136,7 +136,7 @@ public class WSDSample {
public String getTargetLemma() {
return targetLemma;
}
-
+
public void setSentence(List<String> sentence) {
this.sentence = sentence;
}
@@ -183,6 +183,9 @@ public class WSDSample {
return result.toString();
}
+ /*
+ * Parses a sample of format : TargetIndex TargetLemma Token Tag Token Tag
...
+ */
public static WSDSample parse(String sentenceString)
throws InvalidFormatException {
Added:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java?rev=1695509&view=auto
==============================================================================
---
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
(added)
+++
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
Wed Aug 12 13:30:52 2015
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+public class WSDSampleStream extends FilterObjectStream<String, WSDSample> {
+
+ private static Logger logger =
Logger.getLogger(WSDSampleStream.class.getName());
+
+ /**
+ * Initializes the current instance.
+ *
+ * @param sentences reader with sentences
+ * @throws IOException IOException
+ */
+ public WSDSampleStream(Reader sentences) throws IOException {
+ super(new PlainTextByLineStream(sentences));
+ }
+
+ public WSDSampleStream(ObjectStream<String> sentences) {
+ super(sentences);
+ }
+
+ /**
+ * Parses the next sentence and return the next
+ * {@link WSDSample} object.
+ *
+ * If an error occurs an empty {@link WSDSample} object is returned
+ * and an warning message is logged. Usually it does not matter if one
+ * of many sentences is ignored.
+ *
+ * TODO: An exception in error case should be thrown.
+ */
+ public WSDSample read() throws IOException {
+
+ String sentence = samples.read();
+
+ if (sentence != null) {
+ WSDSample sample;
+ try {
+ sample = WSDSample.parse(sentence);
+ } catch (InvalidFormatException e) {
+
+ if (logger.isLoggable(Level.WARNING)) {
+ logger.warning("Error during parsing, ignoring sentence: " +
sentence);
+ }
+
+ sample = null;// new WSDSample(new String[]{}, new String[]{},0);
+ }
+
+ return sample;
+ }
+ else {
+ // sentences stream is exhausted
+ return null;
+ }
+ }
+}
Propchange:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSampleStream.java
------------------------------------------------------------------------------
svn:mime-type = text/plain