Author: tommaso
Date: Thu Jan 29 08:02:31 2015
New Revision: 1655546
URL: http://svn.apache.org/r1655546
Log:
OPENNLP-746 - added unit test for NGramModel
Added:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java
(with props)
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml
(with props)
Modified:
opennlp/trunk/opennlp-tools/pom.xml
Modified: opennlp/trunk/opennlp-tools/pom.xml
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/pom.xml?rev=1655546&r1=1655545&r2=1655546&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/pom.xml (original)
+++ opennlp/trunk/opennlp-tools/pom.xml Thu Jan 29 08:02:31 2015
@@ -53,6 +53,13 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>2.4</version>
+ <scope>test</scope>
</dependency>
</dependencies>
Added:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java?rev=1655546&view=auto
==============================================================================
---
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java
(added)
+++
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java
Thu Jan 29 08:02:31 2015
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package opennlp.tools.ngram;
+
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.util.StringList;
+import org.apache.commons.io.IOUtils;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Tests for {@link opennlp.tools.ngram.NGramModel}
+ */
+public class NGramModelTest {
+
+ @Test
+ public void testZeroGetCount() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ int count = ngramModel.getCount(new StringList(""));
+ assertEquals(0, count);
+ assertEquals(0, ngramModel.size());
+ }
+
+ @Test
+ public void testZeroGetCount2() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ ngramModel.add(new StringList("the", "bro", "wn"));
+ int count = ngramModel.getCount(new StringList("fox"));
+ assertEquals(0, count);
+ assertEquals(1, ngramModel.size());
+ }
+
+ @Test
+ public void testAdd() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ ngramModel.add(new StringList("the", "bro", "wn"));
+ int count = ngramModel.getCount(new StringList("the"));
+ assertEquals(0, count);
+ assertEquals(1, ngramModel.size());
+ }
+
+ @Test
+ public void testAdd1() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ ngramModel.add(new StringList("the", "bro", "wn"));
+ int count = ngramModel.getCount(new StringList("the", "bro", "wn"));
+ assertEquals(1, count);
+ assertEquals(1, ngramModel.size());
+ }
+
+ @Test
+ public void testAdd2() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ ngramModel.add(new StringList("the", "bro", "wn"), 2, 3);
+ int count = ngramModel.getCount(new StringList("the", "bro", "wn"));
+ assertEquals(1, count);
+ assertEquals(3, ngramModel.size());
+ }
+
+ @Test
+ public void testAdd3() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ ngramModel.add(new StringList("the", "brown", "fox"), 2, 3);
+ int count = ngramModel.getCount(new StringList("the", "brown", "fox"));
+ assertEquals(1, count);
+ count = ngramModel.getCount(new StringList("the", "brown"));
+ assertEquals(1, count);
+ count = ngramModel.getCount(new StringList("brown", "fox"));
+ assertEquals(1, count);
+ assertEquals(3, ngramModel.size());
+ }
+
+ @Test
+ public void testRemove() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ StringList tokens = new StringList("the", "bro", "wn");
+ ngramModel.add(tokens);
+ ngramModel.remove(tokens);
+ assertEquals(0, ngramModel.size());
+ }
+
+ @Test
+ public void testContains() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ StringList tokens = new StringList("the", "bro", "wn");
+ ngramModel.add(tokens);
+ assertFalse(ngramModel.contains(new StringList("the")));
+ }
+
+ @Test
+ public void testContains2() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ StringList tokens = new StringList("the", "bro", "wn");
+ ngramModel.add(tokens, 1, 3);
+ assertTrue(ngramModel.contains(new StringList("the")));
+ }
+
+ @Test
+ public void testNumberOfGrams() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ StringList tokens = new StringList("the", "bro", "wn");
+ ngramModel.add(tokens, 1, 3);
+ assertEquals(6, ngramModel.numberOfGrams());
+ }
+
+ @Test
+ public void testCutoff1() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ StringList tokens = new StringList("the", "brown", "fox", "jumped");
+ ngramModel.add(tokens, 1, 3);
+ ngramModel.cutoff(2, 4);
+ assertEquals(0, ngramModel.size());
+ }
+
+ @Test
+ public void testCutoff2() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ StringList tokens = new StringList("the", "brown", "fox", "jumped");
+ ngramModel.add(tokens, 1, 3);
+ ngramModel.cutoff(1, 3);
+ assertEquals(9, ngramModel.size());
+ }
+
+ @Test
+ public void testToDictionary() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ StringList tokens = new StringList("the", "brown", "fox", "jumped");
+ ngramModel.add(tokens, 1, 3);
+ tokens = new StringList("the", "brown", "Fox", "jumped");
+ ngramModel.add(tokens, 1, 3);
+ Dictionary dictionary = ngramModel.toDictionary();
+ assertNotNull(dictionary);
+ assertEquals(9, dictionary.size());
+ assertEquals(1, dictionary.getMinTokenCount());
+ assertEquals(3, dictionary.getMaxTokenCount());
+ }
+
+ @Test
+ public void testToDictionary1() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ StringList tokens = new StringList("the", "brown", "fox", "jumped");
+ ngramModel.add(tokens, 1, 3);
+ tokens = new StringList("the", "brown", "Fox", "jumped");
+ ngramModel.add(tokens, 1, 3);
+ Dictionary dictionary = ngramModel.toDictionary(true);
+ assertNotNull(dictionary);
+ assertEquals(14, dictionary.size());
+ assertEquals(1, dictionary.getMinTokenCount());
+ assertEquals(3, dictionary.getMaxTokenCount());
+ }
+
+ @Test
+ public void testSerialize() throws Exception {
+ NGramModel ngramModel = new NGramModel();
+ StringList tokens = new StringList("the", "brown", "fox", "jumped");
+ ngramModel.add(tokens, 1, 3);
+ tokens = new StringList("the", "brown", "Fox", "jumped");
+ ngramModel.add(tokens, 1, 3);
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ngramModel.serialize(out);
+ assertNotNull(out);
+ InputStream nGramModelStream =
getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model.xml");
+ String modelString = IOUtils.toString(nGramModelStream);
+ String outputString = out.toString(Charset.defaultCharset().name());
+ assertEquals(modelString.replaceAll("\n", "").replaceAll("\r",
"").replaceAll("\t", "").replaceAll(" ", ""),
+ outputString.replaceAll("\n", "").replaceAll("\r",
"").replaceAll("\t", "").replaceAll(" ", ""));
+ }
+}
\ No newline at end of file
Propchange:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added:
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml?rev=1655546&view=auto
==============================================================================
---
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml
(added)
+++
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml
Thu Jan 29 08:02:31 2015
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dictionary case_sensitive="false">
+ <entry count="1">
+ <token>brown</token>
+ <token>fox</token>
+ </entry>
+ <entry count="1">
+ <token>fox</token>
+ </entry>
+ <entry count="1">
+ <token>brown</token>
+ <token>fox</token>
+ <token>jumped</token>
+ </entry>
+ <entry count="2">
+ <token>the</token>
+ </entry>
+ <entry count="1">
+ <token>the</token>
+ <token>brown</token>
+ <token>fox</token>
+ </entry>
+ <entry count="1">
+ <token>the</token>
+ <token>brown</token>
+ <token>Fox</token>
+ </entry>
+ <entry count="2">
+ <token>jumped</token>
+ </entry>
+ <entry count="2">
+ <token>brown</token>
+ </entry>
+ <entry count="1">
+ <token>brown</token>
+ <token>Fox</token>
+ <token>jumped</token>
+ </entry>
+ <entry count="1">
+ <token>Fox</token>
+ </entry>
+ <entry count="1">
+ <token>fox</token>
+ <token>jumped</token>
+ </entry>
+ <entry count="2">
+ <token>the</token>
+ <token>brown</token>
+ </entry>
+ <entry count="1">
+ <token>brown</token>
+ <token>Fox</token>
+ </entry>
+ <entry count="1">
+ <token>Fox</token>
+ <token>jumped</token>
+ </entry>
+</dictionary>
Propchange:
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml
------------------------------------------------------------------------------
svn:eol-style = native