Repository: opennlp Updated Branches: refs/heads/trunk 49f8e25a1 -> c4c4fd3f4
OPENNLP-882 Change test code to stop using deprecated PlainTextByLineStream constructor Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/c4c4fd3f Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/c4c4fd3f Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/c4c4fd3f Branch: refs/heads/trunk Commit: c4c4fd3f40ecd5e8140b34c4e9b6a331cb8987ff Parents: 49f8e25 Author: William Colen <[email protected]> Authored: Thu Nov 10 01:04:21 2016 -0200 Committer: William Colen <[email protected]> Committed: Thu Nov 10 01:04:21 2016 -0200 ---------------------------------------------------------------------- .../tools/chunker/ChunkSampleStreamTest.java | 14 ++-- .../opennlp/tools/chunker/ChunkSampleTest.java | 25 +++---- .../ChunkerDetailedFMeasureListenerTest.java | 57 ++++++++------- .../tools/chunker/ChunkerEvaluatorTest.java | 44 +++++------- .../tools/chunker/ChunkerFactoryTest.java | 18 +++-- .../opennlp/tools/chunker/ChunkerMETest.java | 18 +++-- .../tools/ml/maxent/ScaleDoesntMatterTest.java | 11 +-- .../namefind/NameSampleDataStreamTest.java | 76 ++++++++++---------- .../tools/parser/ParseSampleStreamTest.java | 15 ++-- .../opennlp/tools/parser/ParserTestUtil.java | 10 +-- .../sentdetect/SentenceDetectorFactoryTest.java | 15 ++-- .../sentdetect/SentenceDetectorMETest.java | 13 ++-- .../tools/tokenize/TokenizerFactoryTest.java | 15 ++-- .../tools/tokenize/TokenizerTestUtil.java | 10 +-- .../tools/util/MockInputStreamFactory.java | 11 +++ .../tools/util/PlainTextByLineStreamTest.java | 6 +- 16 files changed, 188 insertions(+), 170 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleStreamTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleStreamTest.java index 6e1d637..5bfbcb6 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleStreamTest.java @@ -17,17 +17,18 @@ package opennlp.tools.chunker; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import java.io.IOException; -import java.io.StringReader; +import org.junit.Test; + +import opennlp.tools.util.MockInputStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; -import org.junit.Test; - public class ChunkSampleStreamTest{ @Test @@ -53,8 +54,9 @@ public class ChunkSampleStreamTest{ sample.append('\n'); sample.append("word23 tag23 pred23"); sample.append('\n'); - - ObjectStream<String> stringStream = new PlainTextByLineStream(new StringReader(sample.toString())); + + ObjectStream<String> stringStream = new PlainTextByLineStream( + new MockInputStreamFactory(sample.toString()), UTF_8); ObjectStream<ChunkSample> chunkStream = new ChunkSampleStream(stringStream); @@ -84,5 +86,7 @@ public class ChunkSampleStreamTest{ assertEquals("pred23", secondSample.getPreds()[2]); assertNull(chunkStream.read()); + + chunkStream.close(); } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleTest.java index ea3181f..2495272 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleTest.java @@ -17,6 +17,7 @@ package opennlp.tools.chunker; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -24,16 +25,16 @@ import static org.junit.Assert.assertTrue; import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; import java.io.StringReader; import java.util.Arrays; +import org.junit.Test; + +import opennlp.tools.formats.ResourceAsStreamFactory; +import opennlp.tools.util.InputStreamFactory; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.Span; -import org.junit.Test; - public class ChunkSampleTest { @Test(expected=IllegalArgumentException.class) @@ -184,14 +185,11 @@ public class ChunkSampleTest { @Test public void testRegions() throws IOException { - InputStream in = getClass().getClassLoader() - .getResourceAsStream("opennlp/tools/chunker/output.txt"); + InputStreamFactory in = new ResourceAsStreamFactory(getClass(), + "/opennlp/tools/chunker/output.txt"); - String encoding = "UTF-8"; - - DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( - new PlainTextByLineStream(new InputStreamReader(in, - encoding)), false); + DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( + new PlainTextByLineStream(in, UTF_8), false); ChunkSample cs1 = predictedSample.read(); String[] g1 = Span.spansToStrings(cs1.getPhrasesAsSpanList(), cs1.getSentence()); @@ -211,7 +209,10 @@ public class ChunkSampleTest { assertEquals("their spouses", g3[4]); assertEquals("lifetime access", g3[5]); assertEquals("to", g3[6]); - } + + predictedSample.close(); + + } // following are some tests to check the argument validation. Since all uses http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerDetailedFMeasureListenerTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerDetailedFMeasureListenerTest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerDetailedFMeasureListenerTest.java index 905339b..835cfd7 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerDetailedFMeasureListenerTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerDetailedFMeasureListenerTest.java @@ -17,56 +17,55 @@ package opennlp.tools.chunker; +import static java.nio.charset.StandardCharsets.UTF_8; import static junit.framework.Assert.assertEquals; import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.util.Locale; +import org.junit.Test; + import opennlp.tools.cmdline.chunker.ChunkerDetailedFMeasureListener; +import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.util.PlainTextByLineStream; -import org.junit.Test; - public class ChunkerDetailedFMeasureListenerTest { @Test public void testEvaluator() throws IOException { - String encoding = "UTF-8"; - - try (InputStream inPredicted = getClass().getClassLoader().getResourceAsStream( - "opennlp/tools/chunker/output.txt"); - InputStream inExpected = getClass().getClassLoader().getResourceAsStream( - "opennlp/tools/chunker/output.txt"); - InputStream detailedOutputStream = getClass().getClassLoader().getResourceAsStream( - "opennlp/tools/chunker/detailedOutput.txt")) { - DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( - new PlainTextByLineStream( - new InputStreamReader(inPredicted, encoding)), true); + ResourceAsStreamFactory inPredicted = new ResourceAsStreamFactory( + getClass(), "/opennlp/tools/chunker/output.txt"); + ResourceAsStreamFactory inExpected = new ResourceAsStreamFactory(getClass(), + "/opennlp/tools/chunker/output.txt"); + ResourceAsStreamFactory detailedOutputStream = new ResourceAsStreamFactory( + getClass(), "/opennlp/tools/chunker/detailedOutput.txt"); + + DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( + new PlainTextByLineStream(inPredicted, UTF_8), true); - DummyChunkSampleStream expectedSample = new DummyChunkSampleStream( - new PlainTextByLineStream(new InputStreamReader(inExpected)), false); + DummyChunkSampleStream expectedSample = new DummyChunkSampleStream( + new PlainTextByLineStream(inExpected, UTF_8), false); - Chunker dummyChunker = new DummyChunker(predictedSample); + Chunker dummyChunker = new DummyChunker(predictedSample); - ChunkerDetailedFMeasureListener listener = new ChunkerDetailedFMeasureListener(); - ChunkerEvaluator evaluator = new ChunkerEvaluator(dummyChunker, listener); + ChunkerDetailedFMeasureListener listener = new ChunkerDetailedFMeasureListener(); + ChunkerEvaluator evaluator = new ChunkerEvaluator(dummyChunker, listener); - evaluator.evaluate(expectedSample); + evaluator.evaluate(expectedSample); - StringBuilder expected = new StringBuilder(); - BufferedReader reader = new BufferedReader(new InputStreamReader(detailedOutputStream, encoding)); - String line = reader.readLine(); + StringBuilder expected = new StringBuilder(); + BufferedReader reader = new BufferedReader( + new InputStreamReader(detailedOutputStream.createInputStream(), UTF_8)); + String line = reader.readLine(); - while (line != null) { - expected.append(line); - expected.append("\n"); - line = reader.readLine(); - } - assertEquals(expected.toString().trim(), listener.createReport(Locale.ENGLISH).trim()); + while (line != null) { + expected.append(line); + expected.append("\n"); + line = reader.readLine(); } + assertEquals(expected.toString().trim(), listener.createReport(Locale.ENGLISH).trim()); } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerEvaluatorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerEvaluatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerEvaluatorTest.java index 6af4c53..edd5a3b 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerEvaluatorTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerEvaluatorTest.java @@ -17,21 +17,21 @@ package opennlp.tools.chunker; +import static java.nio.charset.StandardCharsets.UTF_8; import static junit.framework.Assert.assertNotSame; import static org.junit.Assert.assertEquals; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; import java.io.OutputStream; +import org.junit.Test; + import opennlp.tools.cmdline.chunker.ChunkEvaluationErrorListener; +import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.eval.FMeasure; -import org.junit.Test; - /** * Tests for {@link ChunkerEvaluator}. * @@ -50,18 +50,16 @@ public class ChunkerEvaluatorTest { */ @Test public void testEvaluator() throws IOException { - InputStream inPredicted = getClass().getClassLoader().getResourceAsStream( - "opennlp/tools/chunker/output.txt"); - InputStream inExpected = getClass().getClassLoader().getResourceAsStream( - "opennlp/tools/chunker/output.txt"); + ResourceAsStreamFactory inPredicted = new ResourceAsStreamFactory( + getClass(), "/opennlp/tools/chunker/output.txt"); + ResourceAsStreamFactory inExpected = new ResourceAsStreamFactory(getClass(), + "/opennlp/tools/chunker/output.txt"); - String encoding = "UTF-8"; - - DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( - new PlainTextByLineStream(new InputStreamReader(inPredicted, encoding)), true); - - DummyChunkSampleStream expectedSample = new DummyChunkSampleStream( - new PlainTextByLineStream(new InputStreamReader(inExpected)), false); + DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( + new PlainTextByLineStream(inPredicted, UTF_8), true); + + DummyChunkSampleStream expectedSample = new DummyChunkSampleStream( + new PlainTextByLineStream(inExpected, UTF_8), false); Chunker dummyChunker = new DummyChunker(predictedSample); @@ -82,20 +80,16 @@ public class ChunkerEvaluatorTest { @Test public void testEvaluatorNoError() throws IOException { - InputStream inPredicted = getClass().getClassLoader().getResourceAsStream( - "opennlp/tools/chunker/output.txt"); - InputStream inExpected = getClass().getClassLoader().getResourceAsStream( - "opennlp/tools/chunker/output.txt"); - - String encoding = "UTF-8"; + ResourceAsStreamFactory inPredicted = new ResourceAsStreamFactory( + getClass(), "/opennlp/tools/chunker/output.txt"); + ResourceAsStreamFactory inExpected = new ResourceAsStreamFactory(getClass(), + "/opennlp/tools/chunker/output.txt"); DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( - new PlainTextByLineStream(new InputStreamReader(inPredicted, encoding)), - true); + new PlainTextByLineStream(inPredicted, UTF_8), true); DummyChunkSampleStream expectedSample = new DummyChunkSampleStream( - new PlainTextByLineStream(new InputStreamReader(inExpected, encoding)), - true); + new PlainTextByLineStream(inExpected, UTF_8), true); Chunker dummyChunker = new DummyChunker(predictedSample); http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java index da5ba5c..7b29ce7 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java @@ -17,22 +17,21 @@ package opennlp.tools.chunker; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; +import org.junit.Test; + +import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.model.ModelType; -import org.junit.Test; - /** * Tests for the {@link ChunkerFactory} class. */ @@ -40,12 +39,11 @@ public class ChunkerFactoryTest { private static ObjectStream<ChunkSample> createSampleStream() throws IOException { - InputStream in = ChunkerFactoryTest.class.getClassLoader() - .getResourceAsStream("opennlp/tools/chunker/test.txt"); - Reader sentences = new InputStreamReader(in); + ResourceAsStreamFactory in = new ResourceAsStreamFactory( + ChunkerFactoryTest.class, "/opennlp/tools/chunker/test.txt"); - ChunkSampleStream stream = new ChunkSampleStream(new PlainTextByLineStream( - sentences)); + ChunkSampleStream stream = new ChunkSampleStream( + new PlainTextByLineStream(in, UTF_8)); return stream; } http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java index 35d6205..bfb6b2c 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java @@ -17,17 +17,20 @@ package opennlp.tools.chunker; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotSame; import static org.junit.Assert.assertTrue; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; import java.util.Arrays; import java.util.List; +import org.junit.Before; +import org.junit.Test; + +import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.namefind.NameFinderME; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; @@ -35,9 +38,6 @@ import opennlp.tools.util.Sequence; import opennlp.tools.util.Span; import opennlp.tools.util.TrainingParameters; -import org.junit.Before; -import org.junit.Test; - /** * This is the test class for {@link NameFinderME}. * <p> @@ -72,13 +72,11 @@ public class ChunkerMETest { public void startup() throws IOException { // train the chunker - InputStream in = getClass().getClassLoader().getResourceAsStream( - "opennlp/tools/chunker/test.txt"); - - String encoding = "UTF-8"; + ResourceAsStreamFactory in = new ResourceAsStreamFactory(getClass(), + "/opennlp/tools/chunker/test.txt"); ObjectStream<ChunkSample> sampleStream = new ChunkSampleStream( - new PlainTextByLineStream(new InputStreamReader(in, encoding))); + new PlainTextByLineStream(in, UTF_8)); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70)); http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java index 7b1f474..f6c38d8 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/ScaleDoesntMatterTest.java @@ -17,13 +17,14 @@ package opennlp.tools.ml.maxent; -import java.io.StringReader; +import static java.nio.charset.StandardCharsets.UTF_8; import junit.framework.TestCase; import opennlp.tools.ml.model.Event; import opennlp.tools.ml.model.MaxentModel; import opennlp.tools.ml.model.OnePassRealValueDataIndexer; import opennlp.tools.ml.model.RealValueFileEventStream; +import opennlp.tools.util.MockInputStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; @@ -46,9 +47,9 @@ public class ScaleDoesntMatterTest extends TestCase { String largeTest = "predA=20 predB=20"; - StringReader smallReader = new StringReader(smallValues); ObjectStream<Event> smallEventStream = new RealBasicEventStream( - new PlainTextByLineStream(smallReader)); + new PlainTextByLineStream(new MockInputStreamFactory(smallValues), + UTF_8)); MaxentModel smallModel = GIS.trainModel(100, new OnePassRealValueDataIndexer(smallEventStream, 0), false); @@ -59,9 +60,9 @@ public class ScaleDoesntMatterTest extends TestCase { String smallResultString = smallModel.getAllOutcomes(smallResults); System.out.println("smallResults: " + smallResultString); - StringReader largeReader = new StringReader(largeValues); ObjectStream<Event> largeEventStream = new RealBasicEventStream( - new PlainTextByLineStream(largeReader)); + new PlainTextByLineStream(new MockInputStreamFactory(largeValues), + UTF_8)); MaxentModel largeModel = GIS.trainModel(100, new OnePassRealValueDataIndexer(largeEventStream, 0), false); http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java index a2cb517..8bb39f9 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java @@ -17,6 +17,8 @@ package opennlp.tools.namefind; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; @@ -24,22 +26,22 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; import java.io.ObjectStreamException; -import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import org.junit.Test; + +import opennlp.tools.formats.ResourceAsStreamFactory; +import opennlp.tools.util.InputStreamFactory; +import opennlp.tools.util.MockInputStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.ObjectStreamUtils; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.Span; -import org.junit.Test; - /** * This is the test class for {@link NameSampleDataStream}.. */ @@ -74,13 +76,11 @@ public class NameSampleDataStreamTest { */ @Test public void testWithoutNameTypes() throws Exception { - InputStream in = getClass().getClassLoader().getResourceAsStream( - "opennlp/tools/namefind/AnnotatedSentences.txt"); - - String encoding = "ISO-8859-1"; + InputStreamFactory in = new ResourceAsStreamFactory(getClass(), + "/opennlp/tools/namefind/AnnotatedSentences.txt"); NameSampleDataStream ds = new NameSampleDataStream( - new PlainTextByLineStream(new InputStreamReader(in, encoding))); + new PlainTextByLineStream(in, ISO_8859_1)); NameSample ns = ds.read(); @@ -102,6 +102,8 @@ public class NameSampleDataStreamTest { ns = ds.read(); } + ds.close(); + assertEquals(expectedNames.length, names.size()); assertEquals(createDefaultSpan(6,8), spans.get(0)); assertEquals(createDefaultSpan(3,4), spans.get(1)); @@ -137,28 +139,24 @@ public class NameSampleDataStreamTest { */ @Test public void testWithoutNameTypeAndInvalidData() { - NameSampleDataStream sampleStream = new NameSampleDataStream( - ObjectStreamUtils.createObjectStream("<START> <START> Name <END>")); - try { + try (NameSampleDataStream sampleStream = new NameSampleDataStream( + ObjectStreamUtils.createObjectStream("<START> <START> Name <END>"))) { sampleStream.read(); fail(); } catch (IOException e) { } - sampleStream = new NameSampleDataStream( - ObjectStreamUtils.createObjectStream("<START> Name <END> <END>")); - - try { + try (NameSampleDataStream sampleStream = new NameSampleDataStream( + ObjectStreamUtils.createObjectStream("<START> Name <END> <END>"))) { sampleStream.read(); fail(); } catch (IOException e) { } - sampleStream = new NameSampleDataStream( - ObjectStreamUtils.createObjectStream("<START> <START> Person <END> Street <END>")); - - try { + try (NameSampleDataStream sampleStream = new NameSampleDataStream( + ObjectStreamUtils.createObjectStream( + "<START> <START> Person <END> Street <END>"));) { sampleStream.read(); fail(); } catch (IOException e) { @@ -173,11 +171,11 @@ public class NameSampleDataStreamTest { */ @Test public void testWithNameTypes() throws Exception { - InputStream in = getClass().getClassLoader().getResourceAsStream( - "opennlp/tools/namefind/voa1.train"); + InputStreamFactory in = new ResourceAsStreamFactory(getClass(), + "/opennlp/tools/namefind/voa1.train"); NameSampleDataStream ds = new NameSampleDataStream( - new PlainTextByLineStream(new InputStreamReader(in))); + new PlainTextByLineStream(in, UTF_8)); Map<String, List<String>> names = new HashMap<String, List<String>>(); Map<String, List<Span>> spans = new HashMap<String, List<Span>>(); @@ -197,7 +195,8 @@ public class NameSampleDataStreamTest { .add(nameSpan); } } - + ds.close(); + String[] expectedPerson = { "Barack Obama", "Obama", "Obama", "Lee Myung - bak", "Obama", "Obama", "Scott Snyder", "Snyder", "Obama", "Obama", "Obama", "Tim Peters", "Obama", "Peters" }; @@ -295,19 +294,16 @@ public class NameSampleDataStreamTest { @Test public void testWithNameTypeAndInvalidData() { - NameSampleDataStream sampleStream = new NameSampleDataStream( - ObjectStreamUtils.createObjectStream("<START:> Name <END>")); - - try { + try (NameSampleDataStream sampleStream = new NameSampleDataStream( + ObjectStreamUtils.createObjectStream("<START:> Name <END>"))) { sampleStream.read(); fail(); } catch (IOException e) { } - sampleStream = new NameSampleDataStream( - ObjectStreamUtils.createObjectStream("<START:street> <START:person> Name <END> <END>")); - - try { + try (NameSampleDataStream sampleStream = new NameSampleDataStream( + ObjectStreamUtils.createObjectStream( + "<START:street> <START:person> Name <END> <END>"))) { sampleStream.read(); fail(); } catch (IOException e) { @@ -323,8 +319,8 @@ public class NameSampleDataStreamTest { trainingData.append("\n"); trainingData.append("d\n"); - ObjectStream<String> untokenizedLineStream = - new PlainTextByLineStream(new StringReader(trainingData.toString())); + ObjectStream<String> untokenizedLineStream = new PlainTextByLineStream( + new MockInputStreamFactory(trainingData.toString()), UTF_8); ObjectStream<NameSample> trainingStream = new NameSampleDataStream(untokenizedLineStream); @@ -333,15 +329,17 @@ public class NameSampleDataStreamTest { assertFalse(trainingStream.read().isClearAdaptiveDataSet()); assertTrue(trainingStream.read().isClearAdaptiveDataSet()); assertNull(trainingStream.read()); + + trainingStream.close(); } @Test public void testHtmlNameSampleParsing() throws IOException { - InputStream in = getClass().getClassLoader().getResourceAsStream( - "opennlp/tools/namefind/html1.train"); + InputStreamFactory in = new ResourceAsStreamFactory(getClass(), + "/opennlp/tools/namefind/html1.train"); NameSampleDataStream ds = new NameSampleDataStream( - new PlainTextByLineStream(new InputStreamReader(in, "UTF-8"))); + new PlainTextByLineStream(in, UTF_8)); NameSample ns = ds.read(); @@ -396,5 +394,7 @@ public class NameSampleDataStreamTest { assertEquals("</html>", ns.getSentence()[0]); assertNull(ds.read()); + + ds.close(); } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java index e35b036..a34af37 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java @@ -17,26 +17,27 @@ package opennlp.tools.parser; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; +import org.junit.Test; + +import opennlp.tools.formats.ResourceAsStreamFactory; +import opennlp.tools.util.InputStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; -import org.junit.Test; - public class ParseSampleStreamTest { static ObjectStream<Parse> createParseSampleStream() throws IOException { - InputStream in = ParseSampleStreamTest.class.getResourceAsStream( - "/opennlp/tools/parser/test.parse"); + InputStreamFactory in = new ResourceAsStreamFactory( + ParseSampleStreamTest.class, "/opennlp/tools/parser/test.parse"); - return new ParseSampleStream(new PlainTextByLineStream(new InputStreamReader(in, "UTF-8"))); + return new ParseSampleStream(new PlainTextByLineStream(in, UTF_8)); } @Test http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/parser/ParserTestUtil.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/ParserTestUtil.java b/opennlp-tools/src/test/java/opennlp/tools/parser/ParserTestUtil.java index 9cdca64..b14e182 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/parser/ParserTestUtil.java +++ b/opennlp-tools/src/test/java/opennlp/tools/parser/ParserTestUtil.java @@ -17,6 +17,7 @@ package opennlp.tools.parser; +import static java.nio.charset.StandardCharsets.UTF_8; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; @@ -24,7 +25,9 @@ import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import junit.framework.Assert; +import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.parser.lang.en.HeadRules; +import opennlp.tools.util.InputStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; @@ -61,10 +64,9 @@ public class ParserTestUtil { try { if (samples != null) samples.close(); - - samples = new ParseSampleStream(new PlainTextByLineStream( - new InputStreamReader( - ParserTestUtil.class.getResourceAsStream("/opennlp/tools/parser/parser.train"), "UTF-8"))); + InputStreamFactory in = new ResourceAsStreamFactory(getClass(), + "/opennlp/tools/parser/parser.train"); + samples = new ParseSampleStream(new PlainTextByLineStream(in, UTF_8)); } catch (UnsupportedEncodingException e) { // Should never happen Assert.fail(e.getMessage()); http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java index 4150281..f84fb25 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java @@ -17,6 +17,7 @@ package opennlp.tools.sentdetect; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; @@ -25,20 +26,21 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.util.Arrays; +import org.junit.Test; + import opennlp.tools.dictionary.Dictionary; +import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.sentdetect.DummySentenceDetectorFactory.DummyDictionary; import opennlp.tools.sentdetect.DummySentenceDetectorFactory.DummyEOSScanner; import opennlp.tools.sentdetect.DummySentenceDetectorFactory.DummySDContextGenerator; import opennlp.tools.sentdetect.lang.Factory; +import opennlp.tools.util.InputStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.TrainingParameters; -import org.junit.Test; - /** * Tests for the {@link SentenceDetectorME} class. */ @@ -46,11 +48,12 @@ public class SentenceDetectorFactoryTest { private static ObjectStream<SentenceSample> createSampleStream() throws IOException { - InputStream in = SentenceDetectorFactoryTest.class.getClassLoader() - .getResourceAsStream("opennlp/tools/sentdetect/Sentences.txt"); + InputStreamFactory in = new ResourceAsStreamFactory( + SentenceDetectorFactoryTest.class, + "/opennlp/tools/sentdetect/Sentences.txt"); return new SentenceSampleStream(new PlainTextByLineStream( - new InputStreamReader(in))); + in, UTF_8)); } private static SentenceModel train(SentenceDetectorFactory factory) http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java index b648dc3..4a33c5f 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java @@ -18,18 +18,19 @@ package opennlp.tools.sentdetect; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; +import org.junit.Test; + +import opennlp.tools.formats.ResourceAsStreamFactory; +import opennlp.tools.util.InputStreamFactory; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.Span; import opennlp.tools.util.TrainingParameters; -import org.junit.Test; - /** * Tests for the {@link SentenceDetectorME} class. */ @@ -38,7 +39,7 @@ public class SentenceDetectorMETest { @Test public void testSentenceDetector() throws IOException { - InputStream in = getClass().getResourceAsStream( + InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/sentdetect/Sentences.txt"); TrainingParameters mlParams = new TrainingParameters(); @@ -46,7 +47,7 @@ public class SentenceDetectorMETest { mlParams.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0)); SentenceModel sentdetectModel = SentenceDetectorME.train( - "en", new SentenceSampleStream(new PlainTextByLineStream(new InputStreamReader(in))), true, null, mlParams); + "en", new SentenceSampleStream(new PlainTextByLineStream(in, UTF_8)), true, null, mlParams); assertEquals("en", sentdetectModel.getLanguage()); http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java index 33f2e55..96d19a6 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java @@ -17,6 +17,7 @@ package opennlp.tools.tokenize; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; @@ -26,19 +27,20 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.util.regex.Pattern; +import org.junit.Test; + import opennlp.tools.dictionary.Dictionary; +import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.tokenize.DummyTokenizerFactory.DummyContextGenerator; import opennlp.tools.tokenize.DummyTokenizerFactory.DummyDictionary; import opennlp.tools.tokenize.lang.Factory; +import opennlp.tools.util.InputStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.TrainingParameters; -import org.junit.Test; - /** * Tests for the {@link TokenizerFactory} class. */ @@ -46,11 +48,10 @@ public class TokenizerFactoryTest { private static ObjectStream<TokenSample> createSampleStream() throws IOException { - InputStream in = TokenizerFactoryTest.class.getClassLoader() - .getResourceAsStream("opennlp/tools/tokenize/token.train"); + InputStreamFactory in = new ResourceAsStreamFactory( + TokenizerFactoryTest.class, "/opennlp/tools/tokenize/token.train"); - return new TokenSampleStream(new PlainTextByLineStream( - new InputStreamReader(in))); + return new TokenSampleStream(new PlainTextByLineStream(in, UTF_8)); } private static TokenizerModel train(TokenizerFactory factory) http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java index ac7b364..f8eb85b 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java +++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java @@ -18,13 +18,15 @@ package opennlp.tools.tokenize; +import static java.nio.charset.StandardCharsets.UTF_8; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; +import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.util.CollectionObjectStream; +import opennlp.tools.util.InputStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.Span; @@ -63,11 +65,11 @@ public class TokenizerTestUtil { static TokenizerModel createMaxentTokenModel() throws IOException { - InputStream trainDataIn = TokenizerTestUtil.class.getResourceAsStream( - "/opennlp/tools/tokenize/token.train"); + InputStreamFactory trainDataIn = new ResourceAsStreamFactory( + TokenizerModel.class, "/opennlp/tools/tokenize/token.train"); ObjectStream<TokenSample> samples = new TokenSampleStream( - new PlainTextByLineStream(new InputStreamReader(trainDataIn, "UTF-8"))); + new PlainTextByLineStream(trainDataIn, UTF_8)); TrainingParameters mlParams = new TrainingParameters(); mlParams.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100)); http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/util/MockInputStreamFactory.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/MockInputStreamFactory.java b/opennlp-tools/src/test/java/opennlp/tools/util/MockInputStreamFactory.java index a95a903..3c65833 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/MockInputStreamFactory.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/MockInputStreamFactory.java @@ -17,9 +17,12 @@ package opennlp.tools.util; +import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; public class MockInputStreamFactory implements InputStreamFactory { @@ -28,6 +31,14 @@ public class MockInputStreamFactory implements InputStreamFactory { public MockInputStreamFactory(InputStream is) throws FileNotFoundException { this.is = is; } + + public MockInputStreamFactory(String str) throws FileNotFoundException { + this.is = new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)); + } + + public MockInputStreamFactory(String str, Charset charset) throws FileNotFoundException { + this.is = new ByteArrayInputStream(str.getBytes(charset)); + } @Override public InputStream createInputStream() throws IOException { http://git-wip-us.apache.org/repos/asf/opennlp/blob/c4c4fd3f/opennlp-tools/src/test/java/opennlp/tools/util/PlainTextByLineStreamTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/PlainTextByLineStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/PlainTextByLineStreamTest.java index 607a42a..66af38e 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/PlainTextByLineStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/PlainTextByLineStreamTest.java @@ -17,10 +17,10 @@ package opennlp.tools.util; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import java.io.IOException; -import java.io.StringReader; import org.junit.Test; @@ -42,11 +42,13 @@ public class PlainTextByLineStreamTest { testString.append('\n'); ObjectStream<String> stream = - new PlainTextByLineStream(new StringReader(testString.toString())); + new PlainTextByLineStream(new MockInputStreamFactory(testString.toString()), UTF_8); assertEquals("line1", stream.read()); assertEquals("line2", stream.read()); assertEquals("line3", stream.read()); assertEquals("line4", stream.read()); + + stream.close(); } }
