OPENNLP-1046: Correctly join tokens to text string

The text was one space too long which results in a different
parse tree if the method is used to reproduce an existing
parse tree as it is done by the parser evaluation tool.


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/bbbb4313
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/bbbb4313
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/bbbb4313

Branch: refs/heads/LangDetect
Commit: bbbb4313846efaba9546e86052940f0a79b8948f
Parents: 6059525
Author: Jörn Kottmann <jo...@apache.org>
Authored: Wed Apr 26 10:46:48 2017 +0200
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Wed Apr 26 15:35:54 2017 +0200

----------------------------------------------------------------------
 .../main/java/opennlp/tools/cmdline/parser/ParserTool.java    | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/bbbb4313/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
index 499fa58..d8d3902 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
@@ -72,11 +72,8 @@ public final class ParserTool extends BasicCmdLineTool {
 
     // tokenize
     List<String> tokens = Arrays.asList( tokenizer.tokenize(line));
-    StringBuilder sb = new StringBuilder();
-    for (String tok : tokens) {
-      sb.append(tok).append(" ");
-    }
-    String text = sb.substring(0, sb.length());
+    String text = String.join(" ", tokens);
+
     Parse p = new Parse(text, new Span(0, text.length()), 
AbstractBottomUpParser.INC_NODE, 0, 0);
     int start = 0;
     int i = 0;

Reply via email to