Repository: opennlp Updated Branches: refs/heads/master 6c2dbf288 -> b4eb29107
OPENNLP-1042: Correctly tokenize reference parse text Closes #180 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/b4eb2910 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/b4eb2910 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/b4eb2910 Branch: refs/heads/master Commit: b4eb29107afb3cad68d3acadd6dccaa63ceef9a9 Parents: 6c2dbf2 Author: Jörn Kottmann <[email protected]> Authored: Fri Apr 21 15:45:53 2017 +0200 Committer: Jörn Kottmann <[email protected]> Committed: Thu May 4 09:29:40 2017 +0200 ---------------------------------------------------------------------- .../src/main/java/opennlp/tools/parser/Parse.java | 16 ++++++++++++++++ .../java/opennlp/tools/parser/ParserEvaluator.java | 11 +++++------ 2 files changed, 21 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/b4eb2910/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java b/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java index 5ee4f0a..d4265cf 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java @@ -972,6 +972,22 @@ public class Parse implements Cloneable, Comparable<Parse> { return tags.toArray(new Parse[tags.size()]); } + public Parse[] getTokenNodes() { + List<Parse> tokens = new LinkedList<>(); + List<Parse> nodes = new LinkedList<>(); + nodes.addAll(this.parts); + while (nodes.size() != 0) { + Parse p = nodes.remove(0); + if (p.getType().equals(AbstractBottomUpParser.TOK_NODE)) { + tokens.add(p); + } + else { + nodes.addAll(0, p.parts); + } + } + return tokens.toArray(new Parse[tokens.size()]); + } + /** * Returns the deepest shared parent of this node and the specified node. * If the nodes are identical then their parent is returned. http://git-wip-us.apache.org/repos/asf/opennlp/blob/b4eb2910/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java index 64b2b42..013a6c1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java @@ -89,15 +89,14 @@ public class ParserEvaluator extends Evaluator<Parse> { return consts.toArray(new Span[consts.size()]); } - /* (non-Javadoc) - * @see opennlp.tools.util.eval.Evaluator#processSample(java.lang.Object) - */ @Override protected final Parse processSample(final Parse reference) { + List<String> tokens = new ArrayList<>(); + for (Parse token : reference.getTokenNodes()) { + tokens.add(token.getSpan().getCoveredText(reference.getText()).toString()); + } - String sentenceText = reference.getText(); - - Parse[] predictions = ParserTool.parseLine(sentenceText, parser, 1); + Parse[] predictions = ParserTool.parseLine(String.join(" ", tokens), parser, 1); Parse prediction = null; if (predictions.length > 0) {
