Repository: opennlp
Updated Branches:
  refs/heads/master 6c2dbf288 -> b4eb29107


OPENNLP-1042: Correctly tokenize reference parse text

Closes #180


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/b4eb2910
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/b4eb2910
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/b4eb2910

Branch: refs/heads/master
Commit: b4eb29107afb3cad68d3acadd6dccaa63ceef9a9
Parents: 6c2dbf2
Author: Jörn Kottmann <[email protected]>
Authored: Fri Apr 21 15:45:53 2017 +0200
Committer: Jörn Kottmann <[email protected]>
Committed: Thu May 4 09:29:40 2017 +0200

----------------------------------------------------------------------
 .../src/main/java/opennlp/tools/parser/Parse.java   | 16 ++++++++++++++++
 .../java/opennlp/tools/parser/ParserEvaluator.java  | 11 +++++------
 2 files changed, 21 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/b4eb2910/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java 
b/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java
index 5ee4f0a..d4265cf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java
@@ -972,6 +972,22 @@ public class Parse implements Cloneable, Comparable<Parse> 
{
     return tags.toArray(new Parse[tags.size()]);
   }
 
+  public Parse[] getTokenNodes() {
+    List<Parse> tokens = new LinkedList<>();
+    List<Parse> nodes = new LinkedList<>();
+    nodes.addAll(this.parts);
+    while (nodes.size() != 0) {
+      Parse p = nodes.remove(0);
+      if (p.getType().equals(AbstractBottomUpParser.TOK_NODE)) {
+        tokens.add(p);
+      }
+      else {
+        nodes.addAll(0, p.parts);
+      }
+    }
+    return tokens.toArray(new Parse[tokens.size()]);
+  }
+
   /**
    * Returns the deepest shared parent of this node and the specified node.
    * If the nodes are identical then their parent is returned.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/b4eb2910/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java 
b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java
index 64b2b42..013a6c1 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java
@@ -89,15 +89,14 @@ public class ParserEvaluator extends Evaluator<Parse> {
     return consts.toArray(new Span[consts.size()]);
   }
 
-  /* (non-Javadoc)
-   * @see opennlp.tools.util.eval.Evaluator#processSample(java.lang.Object)
-   */
   @Override
   protected final Parse processSample(final Parse reference) {
+    List<String> tokens = new ArrayList<>();
+    for (Parse token : reference.getTokenNodes()) {
+      
tokens.add(token.getSpan().getCoveredText(reference.getText()).toString());
+    }
 
-    String sentenceText = reference.getText();
-
-    Parse[] predictions = ParserTool.parseLine(sentenceText, parser, 1);
+    Parse[] predictions = ParserTool.parseLine(String.join(" ", tokens), 
parser, 1);
 
     Parse prediction = null;
     if (predictions.length > 0) {

Reply via email to