Repository: opennlp
Updated Branches:
  refs/heads/master 9a9366c78 -> 53e5e3fa8


OPENNLP-972 - add LM#predictNextTokens, rename to NGramLMTool, this closes 
apache/opennlp#100


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/53e5e3fa
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/53e5e3fa
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/53e5e3fa

Branch: refs/heads/master
Commit: 53e5e3fa8531bbb4d70523197215fd19b89c1a76
Parents: 9a9366c
Author: Tommaso Teofili <[email protected]>
Authored: Sun Jan 29 00:25:33 2017 +0100
Committer: Tommaso Teofili <[email protected]>
Committed: Sun Jan 29 00:25:33 2017 +0100

----------------------------------------------------------------------
 .../main/java/opennlp/tools/cmdline/CLI.java    |   4 +-
 .../languagemodel/LanguageModelTool.java        | 103 -------------------
 .../languagemodel/NGramLanguageModelTool.java   | 102 ++++++++++++++++++
 3 files changed, 104 insertions(+), 105 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/53e5e3fa/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
index ca9b12f..9385a18 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
@@ -37,7 +37,7 @@ import opennlp.tools.cmdline.doccat.DoccatEvaluatorTool;
 import opennlp.tools.cmdline.doccat.DoccatTool;
 import opennlp.tools.cmdline.doccat.DoccatTrainerTool;
 import opennlp.tools.cmdline.entitylinker.EntityLinkerTool;
-import opennlp.tools.cmdline.languagemodel.LanguageModelTool;
+import opennlp.tools.cmdline.languagemodel.NGramLanguageModelTool;
 import opennlp.tools.cmdline.lemmatizer.LemmatizerEvaluatorTool;
 import opennlp.tools.cmdline.lemmatizer.LemmatizerMETool;
 import opennlp.tools.cmdline.lemmatizer.LemmatizerTrainerTool;
@@ -150,7 +150,7 @@ public final class CLI {
     tools.add(new EntityLinkerTool());
 
     // Language Model
-    tools.add(new LanguageModelTool());
+    tools.add(new NGramLanguageModelTool());
 
     for (CmdLineTool tool : tools) {
       toolLookupMap.put(tool.getName(), tool);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/53e5e3fa/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/LanguageModelTool.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/LanguageModelTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/LanguageModelTool.java
deleted file mode 100644
index aa46355..0000000
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/LanguageModelTool.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package opennlp.tools.cmdline.languagemodel;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.Arrays;
-
-import opennlp.tools.cmdline.BasicCmdLineTool;
-import opennlp.tools.cmdline.CLI;
-import opennlp.tools.cmdline.CmdLineUtil;
-import opennlp.tools.cmdline.PerformanceMonitor;
-import opennlp.tools.cmdline.SystemInputStreamFactory;
-import opennlp.tools.languagemodel.NGramLanguageModel;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.PlainTextByLineStream;
-import opennlp.tools.util.StringList;
-
-/**
- * Command line tool for {@link 
opennlp.tools.languagemodel.LanguageModel#calculateProbability(StringList)}.
- */
-public class LanguageModelTool extends BasicCmdLineTool {
-
-  @Override
-  public String getShortDescription() {
-    return "gives the probability of a sequence of tokens in a language model";
-  }
-
-  @Override
-  public void run(String[] args) {
-    File lmFile = new File(args[0]);
-    FileInputStream stream = null;
-    try {
-      stream = new FileInputStream(lmFile);
-      NGramLanguageModel nGramLanguageModel = new NGramLanguageModel(
-          stream);
-
-      ObjectStream<String> lineStream;
-      PerformanceMonitor perfMon = null;
-
-      try {
-        lineStream = new PlainTextByLineStream(
-            new SystemInputStreamFactory(),
-            SystemInputStreamFactory.encoding());
-        perfMon = new PerformanceMonitor(System.err, "lm");
-        perfMon.start();
-        String line;
-        while ((line = lineStream.read()) != null) {
-          double probability;
-          String[] tokens = line.split(" ");
-          try {
-            probability = nGramLanguageModel
-                .calculateProbability(new StringList(tokens));
-          } catch (Exception e) {
-            System.err.println("Error:" + e.getLocalizedMessage());
-            System.err.println(line);
-            continue;
-          }
-
-          System.out.println("sequence '" + Arrays.toString(tokens)
-              + "' has a probability of " + probability);
-
-          perfMon.incrementCounter();
-        }
-      } catch (IOException e) {
-        CmdLineUtil.handleStdinIoError(e);
-      }
-
-      perfMon.stopAndPrintFinalResult();
-
-    } catch (java.io.IOException e) {
-      System.err.println(e.getLocalizedMessage());
-    } finally {
-      if (stream != null) {
-        try {
-          stream.close();
-        } catch (IOException e) {
-          // do nothing
-        }
-      }
-    }
-  }
-
-  @Override
-  public String getHelp() {
-    return "Usage: " + CLI.CMD + " " + getName() + " model";
-  }
-}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/53e5e3fa/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java
new file mode 100644
index 0000000..1c599c5
--- /dev/null
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/NGramLanguageModelTool.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.tools.cmdline.languagemodel;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import opennlp.tools.cmdline.BasicCmdLineTool;
+import opennlp.tools.cmdline.CLI;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.PerformanceMonitor;
+import opennlp.tools.cmdline.SystemInputStreamFactory;
+import opennlp.tools.languagemodel.NGramLanguageModel;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.StringList;
+
+/**
+ * Command line tool for {@link 
opennlp.tools.languagemodel.NGramLanguageModel}.
+ */
+public class NGramLanguageModelTool extends BasicCmdLineTool {
+
+  @Override
+  public String getShortDescription() {
+    return "gives the probability and most probable next token(s) of a 
sequence of tokens in a " +
+        "language model";
+  }
+
+  @Override
+  public void run(String[] args) {
+    File lmFile = new File(args[0]);
+    FileInputStream stream = null;
+    try {
+      stream = new FileInputStream(lmFile);
+      NGramLanguageModel nGramLanguageModel = new NGramLanguageModel(stream);
+
+      ObjectStream<String> lineStream;
+      PerformanceMonitor perfMon = null;
+
+      try {
+        lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(),
+            SystemInputStreamFactory.encoding());
+        perfMon = new PerformanceMonitor(System.err, "nglm");
+        perfMon.start();
+        String line;
+        while ((line = lineStream.read()) != null) {
+          double probability;
+          StringList predicted;
+          String[] tokens = line.split(" ");
+          StringList sample = new StringList(tokens);
+          try {
+            probability = nGramLanguageModel.calculateProbability(sample);
+            predicted = nGramLanguageModel.predictNextTokens(sample);
+          } catch (Exception e) {
+            System.err.println("Error:" + e.getLocalizedMessage());
+            System.err.println(line);
+            continue;
+          }
+
+          System.out.println(sample + " -> prob:" + probability + ", next:" + 
predicted);
+
+          perfMon.incrementCounter();
+        }
+      } catch (IOException e) {
+        CmdLineUtil.handleStdinIoError(e);
+      }
+
+      perfMon.stopAndPrintFinalResult();
+
+    } catch (java.io.IOException e) {
+      System.err.println(e.getLocalizedMessage());
+    } finally {
+      if (stream != null) {
+        try {
+          stream.close();
+        } catch (IOException e) {
+          // do nothing
+        }
+      }
+    }
+  }
+
+  @Override
+  public String getHelp() {
+    return "Usage: " + CLI.CMD + " " + getName() + " model";
+  }
+}

Reply via email to