OPENNLP-1046: Correctly join tokens to text string The text was one space too long which results in a different parse tree if the method is used to reproduce an existing parse tree as it is done by the parser evaluation tool.
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/bbbb4313 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/bbbb4313 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/bbbb4313 Branch: refs/heads/LangDetect Commit: bbbb4313846efaba9546e86052940f0a79b8948f Parents: 6059525 Author: Jörn Kottmann <jo...@apache.org> Authored: Wed Apr 26 10:46:48 2017 +0200 Committer: Jörn Kottmann <jo...@apache.org> Committed: Wed Apr 26 15:35:54 2017 +0200 ---------------------------------------------------------------------- .../main/java/opennlp/tools/cmdline/parser/ParserTool.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbbb4313/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java index 499fa58..d8d3902 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java @@ -72,11 +72,8 @@ public final class ParserTool extends BasicCmdLineTool { // tokenize List<String> tokens = Arrays.asList( tokenizer.tokenize(line)); - StringBuilder sb = new StringBuilder(); - for (String tok : tokens) { - sb.append(tok).append(" "); - } - String text = sb.substring(0, sb.length()); + String text = String.join(" ", tokens); + Parse p = new Parse(text, new Span(0, text.length()), AbstractBottomUpParser.INC_NODE, 0, 0); int start = 0; int i = 0;