Repository: opennlp Updated Branches: refs/heads/master e76ba3694 -> cff6e0009
OPENNLP-1025:Add javadocs and test for FileToStringSampleStream, this closes apache/opennlp#161 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/cff6e000 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/cff6e000 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/cff6e000 Branch: refs/heads/master Commit: cff6e00097bf7dd97a5404080df3bf7e95813a1d Parents: e76ba36 Author: jzonthemtn <[email protected]> Authored: Sun Apr 16 17:21:41 2017 -0400 Committer: smarthi <[email protected]> Committed: Sun Apr 16 17:21:52 2017 -0400 ---------------------------------------------------------------------- .../convert/FileToStringSampleStream.java | 19 ++++++ .../convert/FileToStringSampleStreamTest.java | 69 ++++++++++++++++++++ 2 files changed, 88 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/cff6e000/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java index 3b0476a..933d3b8 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/FileToStringSampleStream.java @@ -28,16 +28,33 @@ import java.nio.charset.Charset; import opennlp.tools.util.FilterObjectStream; import opennlp.tools.util.ObjectStream; +/** + * Provides the ability to read the contents of files + * contained in an object stream of files. + * + */ public class FileToStringSampleStream extends FilterObjectStream<File, String> { private final Charset encoding; + /** + * Creates a new file-to-string sample stream. + * @param samples The {@link ObjectStream} containing the files. + * @param encoding The {@link Charset} encoding of the files. + */ public FileToStringSampleStream(ObjectStream<File> samples, Charset encoding) { super(samples); this.encoding = encoding; } + /** + * Reads the contents of a file to a string. + * @param textFile The {@link File} to read. + * @param encoding The {@link Charset} for the file. + * @return The string contents of the file. + * @throws IOException Thrown if the file cannot be read. + */ private static String readFile(File textFile, Charset encoding) throws IOException { Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(textFile), encoding)); @@ -63,6 +80,7 @@ public class FileToStringSampleStream extends FilterObjectStream<File, String> { return text.toString(); } + @Override public String read() throws IOException { File sampleFile = samples.read(); @@ -74,4 +92,5 @@ public class FileToStringSampleStream extends FilterObjectStream<File, String> { return null; } } + } http://git-wip-us.apache.org/repos/asf/opennlp/blob/cff6e000/opennlp-tools/src/test/java/opennlp/tools/convert/FileToStringSampleStreamTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/convert/FileToStringSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/convert/FileToStringSampleStreamTest.java new file mode 100644 index 0000000..6f6f7dc --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/convert/FileToStringSampleStreamTest.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.convert; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import opennlp.tools.formats.DirectorySampleStream; +import opennlp.tools.formats.convert.FileToStringSampleStream; + +public class FileToStringSampleStreamTest { + + @Rule + public TemporaryFolder directory = new TemporaryFolder(); + + @Test + public void readFileTest() throws IOException { + + final String sentence1 = "This is a sentence."; + final String sentence2 = "This is another sentence."; + + List<String> sentences = Arrays.asList(sentence1, sentence2); + + DirectorySampleStream directorySampleStream = new DirectorySampleStream(directory.getRoot(), null, false); + + File tempFile1 = directory.newFile(); + FileUtils.writeStringToFile(tempFile1, sentence1); + + File tempFile2 = directory.newFile(); + FileUtils.writeStringToFile(tempFile2, sentence2); + + FileToStringSampleStream stream = + new FileToStringSampleStream(directorySampleStream, Charset.defaultCharset()); + + String read = stream.read(); + Assert.assertTrue(sentences.contains(read)); + + read = stream.read(); + Assert.assertTrue(sentences.contains(read)); + + stream.close(); + + } + +}
