Remove number and tab from line
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/4a4bf910 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/4a4bf910 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/4a4bf910 Branch: refs/heads/LangDetect Commit: 4a4bf910022a41a7aa3beb92c4880894723cd20d Parents: 696c149 Author: Jörn Kottmann <[email protected]> Authored: Fri May 19 16:35:11 2017 +0200 Committer: Jörn Kottmann <[email protected]> Committed: Fri May 19 16:35:11 2017 +0200 ---------------------------------------------------------------------- .../tools/formats/leipzig/LeipzigLanguageSampleStream.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4bf910/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java index 582fb08..6c4d009 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java @@ -53,6 +53,7 @@ public class LeipzigLanguageSampleStream implements ObjectStream<LanguageSample> @Override public LanguageSample read() throws IOException { + if (sampleCount < numberOfSamples) { StringBuilder sampleString = new StringBuilder(); @@ -60,8 +61,10 @@ public class LeipzigLanguageSampleStream implements ObjectStream<LanguageSample> String line; while (count < sentencesPerSample && (line = lineStream.read()) != null) { + int textStart = line.indexOf('\t') + 1; + // TODO: It should it be changed to contain an array of sample strings ?! - sampleString.append(line + " "); + sampleString.append(line.substring(textStart) + " "); count++; }
