Author: tommaso
Date: Thu Jan 29 08:02:31 2015
New Revision: 1655546

URL: http://svn.apache.org/r1655546
Log:
OPENNLP-746 - added unit test for NGramModel

Added:
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/
    
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java
   (with props)
    opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/
    
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml
   (with props)
Modified:
    opennlp/trunk/opennlp-tools/pom.xml

Modified: opennlp/trunk/opennlp-tools/pom.xml
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/pom.xml?rev=1655546&r1=1655545&r2=1655546&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/pom.xml (original)
+++ opennlp/trunk/opennlp-tools/pom.xml Thu Jan 29 08:02:31 2015
@@ -53,6 +53,13 @@
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>2.4</version>
+      <scope>test</scope>
     </dependency>
 
   </dependencies>

Added: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java?rev=1655546&view=auto
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java
 (added)
+++ 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java
 Thu Jan 29 08:02:31 2015
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package opennlp.tools.ngram;
+
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.util.StringList;
+import org.apache.commons.io.IOUtils;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Tests for {@link opennlp.tools.ngram.NGramModel}
+ */
+public class NGramModelTest {
+
+  @Test
+  public void testZeroGetCount() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    int count = ngramModel.getCount(new StringList(""));
+    assertEquals(0, count);
+    assertEquals(0, ngramModel.size());
+  }
+
+  @Test
+  public void testZeroGetCount2() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    ngramModel.add(new StringList("the", "bro", "wn"));
+    int count = ngramModel.getCount(new StringList("fox"));
+    assertEquals(0, count);
+    assertEquals(1, ngramModel.size());
+  }
+
+  @Test
+  public void testAdd() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    ngramModel.add(new StringList("the", "bro", "wn"));
+    int count = ngramModel.getCount(new StringList("the"));
+    assertEquals(0, count);
+    assertEquals(1, ngramModel.size());
+  }
+
+  @Test
+  public void testAdd1() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    ngramModel.add(new StringList("the", "bro", "wn"));
+    int count = ngramModel.getCount(new StringList("the", "bro", "wn"));
+    assertEquals(1, count);
+    assertEquals(1, ngramModel.size());
+  }
+
+  @Test
+  public void testAdd2() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    ngramModel.add(new StringList("the", "bro", "wn"), 2, 3);
+    int count = ngramModel.getCount(new StringList("the", "bro", "wn"));
+    assertEquals(1, count);
+    assertEquals(3, ngramModel.size());
+  }
+
+  @Test
+  public void testAdd3() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    ngramModel.add(new StringList("the", "brown", "fox"), 2, 3);
+    int count = ngramModel.getCount(new StringList("the", "brown", "fox"));
+    assertEquals(1, count);
+    count = ngramModel.getCount(new StringList("the", "brown"));
+    assertEquals(1, count);
+    count = ngramModel.getCount(new StringList("brown", "fox"));
+    assertEquals(1, count);
+    assertEquals(3, ngramModel.size());
+  }
+
+  @Test
+  public void testRemove() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    StringList tokens = new StringList("the", "bro", "wn");
+    ngramModel.add(tokens);
+    ngramModel.remove(tokens);
+    assertEquals(0, ngramModel.size());
+  }
+
+  @Test
+  public void testContains() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    StringList tokens = new StringList("the", "bro", "wn");
+    ngramModel.add(tokens);
+    assertFalse(ngramModel.contains(new StringList("the")));
+  }
+
+  @Test
+  public void testContains2() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    StringList tokens = new StringList("the", "bro", "wn");
+    ngramModel.add(tokens, 1, 3);
+    assertTrue(ngramModel.contains(new StringList("the")));
+  }
+
+  @Test
+  public void testNumberOfGrams() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    StringList tokens = new StringList("the", "bro", "wn");
+    ngramModel.add(tokens, 1, 3);
+    assertEquals(6, ngramModel.numberOfGrams());
+  }
+
+  @Test
+  public void testCutoff1() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    StringList tokens = new StringList("the", "brown", "fox", "jumped");
+    ngramModel.add(tokens, 1, 3);
+    ngramModel.cutoff(2, 4);
+    assertEquals(0, ngramModel.size());
+  }
+
+  @Test
+  public void testCutoff2() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    StringList tokens = new StringList("the", "brown", "fox", "jumped");
+    ngramModel.add(tokens, 1, 3);
+    ngramModel.cutoff(1, 3);
+    assertEquals(9, ngramModel.size());
+  }
+
+  @Test
+  public void testToDictionary() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    StringList tokens = new StringList("the", "brown", "fox", "jumped");
+    ngramModel.add(tokens, 1, 3);
+    tokens = new StringList("the", "brown", "Fox", "jumped");
+    ngramModel.add(tokens, 1, 3);
+    Dictionary dictionary = ngramModel.toDictionary();
+    assertNotNull(dictionary);
+    assertEquals(9, dictionary.size());
+    assertEquals(1, dictionary.getMinTokenCount());
+    assertEquals(3, dictionary.getMaxTokenCount());
+  }
+
+  @Test
+  public void testToDictionary1() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    StringList tokens = new StringList("the", "brown", "fox", "jumped");
+    ngramModel.add(tokens, 1, 3);
+    tokens = new StringList("the", "brown", "Fox", "jumped");
+    ngramModel.add(tokens, 1, 3);
+    Dictionary dictionary = ngramModel.toDictionary(true);
+    assertNotNull(dictionary);
+    assertEquals(14, dictionary.size());
+    assertEquals(1, dictionary.getMinTokenCount());
+    assertEquals(3, dictionary.getMaxTokenCount());
+  }
+
+  @Test
+  public void testSerialize() throws Exception {
+    NGramModel ngramModel = new NGramModel();
+    StringList tokens = new StringList("the", "brown", "fox", "jumped");
+    ngramModel.add(tokens, 1, 3);
+    tokens = new StringList("the", "brown", "Fox", "jumped");
+    ngramModel.add(tokens, 1, 3);
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    ngramModel.serialize(out);
+    assertNotNull(out);
+    InputStream nGramModelStream = 
getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model.xml");
+    String modelString = IOUtils.toString(nGramModelStream);
+    String outputString = out.toString(Charset.defaultCharset().name());
+    assertEquals(modelString.replaceAll("\n", "").replaceAll("\r", 
"").replaceAll("\t", "").replaceAll(" ", ""),
+            outputString.replaceAll("\n", "").replaceAll("\r", 
"").replaceAll("\t", "").replaceAll(" ", ""));
+  }
+}
\ No newline at end of file

Propchange: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml?rev=1655546&view=auto
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml
 (added)
+++ 
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml
 Thu Jan 29 08:02:31 2015
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dictionary case_sensitive="false">
+    <entry count="1">
+        <token>brown</token>
+        <token>fox</token>
+    </entry>
+    <entry count="1">
+        <token>fox</token>
+    </entry>
+    <entry count="1">
+        <token>brown</token>
+        <token>fox</token>
+        <token>jumped</token>
+    </entry>
+    <entry count="2">
+        <token>the</token>
+    </entry>
+    <entry count="1">
+        <token>the</token>
+        <token>brown</token>
+        <token>fox</token>
+    </entry>
+    <entry count="1">
+        <token>the</token>
+        <token>brown</token>
+        <token>Fox</token>
+    </entry>
+    <entry count="2">
+        <token>jumped</token>
+    </entry>
+    <entry count="2">
+        <token>brown</token>
+    </entry>
+    <entry count="1">
+        <token>brown</token>
+        <token>Fox</token>
+        <token>jumped</token>
+    </entry>
+    <entry count="1">
+        <token>Fox</token>
+    </entry>
+    <entry count="1">
+        <token>fox</token>
+        <token>jumped</token>
+    </entry>
+    <entry count="2">
+        <token>the</token>
+        <token>brown</token>
+    </entry>
+    <entry count="1">
+        <token>brown</token>
+        <token>Fox</token>
+    </entry>
+    <entry count="1">
+        <token>Fox</token>
+        <token>jumped</token>
+    </entry>
+</dictionary>

Propchange: 
opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/ngram/ngram-model.xml
------------------------------------------------------------------------------
    svn:eol-style = native


Reply via email to