Remove number and tab from line

Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/4a4bf910
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/4a4bf910
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/4a4bf910

Branch: refs/heads/LangDetect
Commit: 4a4bf910022a41a7aa3beb92c4880894723cd20d
Parents: 696c149
Author: Jörn Kottmann <[email protected]>
Authored: Fri May 19 16:35:11 2017 +0200
Committer: Jörn Kottmann <[email protected]>
Committed: Fri May 19 16:35:11 2017 +0200

----------------------------------------------------------------------
 .../tools/formats/leipzig/LeipzigLanguageSampleStream.java      | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4bf910/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
index 582fb08..6c4d009 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
@@ -53,6 +53,7 @@ public class LeipzigLanguageSampleStream implements 
ObjectStream<LanguageSample>
 
     @Override
     public LanguageSample read() throws IOException {
+
       if (sampleCount < numberOfSamples) {
         StringBuilder sampleString = new StringBuilder();
 
@@ -60,8 +61,10 @@ public class LeipzigLanguageSampleStream implements 
ObjectStream<LanguageSample>
         String line;
         while (count < sentencesPerSample && (line = lineStream.read()) != 
null) {
 
+          int textStart = line.indexOf('\t') + 1;
+
           // TODO: It should it be changed to contain an array of sample 
strings ?!
-          sampleString.append(line + " ");
+          sampleString.append(line.substring(textStart) + " ");
 
           count++;
         }

Reply via email to