OPENNLP-1014: Add more tests for featuregen

This closes #151


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/35f27899
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/35f27899
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/35f27899

Branch: refs/heads/parser_regression
Commit: 35f2789979677642a88a93eaeadc9d41d7ae913f
Parents: 4ba2a8b
Author: koji <koji.sekigu...@rondhuit.com>
Authored: Fri Apr 7 10:49:50 2017 +0900
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Thu Apr 20 12:40:25 2017 +0200

----------------------------------------------------------------------
 .../util/featuregen/FeatureGeneratorUtil.java   |  10 +-
 .../BigramNameFeatureGeneratorTest.java         |  94 ++++++++++++++
 .../CharacterNgramFeatureGeneratorTest.java     |  61 +++++++++
 .../featuregen/FeatureGeneratorUtilTest.java    |  45 +++++++
 .../util/featuregen/InSpanGeneratorTest.java    |  73 +++++++++++
 .../PosTaggerFeatureGeneratorTest.java          |  76 ++++++++++++
 .../PreviousTwoMapFeatureGeneratorTest.java     |  63 ++++++++++
 .../SentenceFeatureGeneratorTest.java           | 123 +++++++++++++++++++
 .../TokenClassFeatureGeneratorTest.java         |  63 ++++++++++
 .../featuregen/TokenFeatureGeneratorTest.java   |  62 ++++++++++
 .../TokenPatternFeatureGeneratorTest.java       |  74 +++++++++++
 .../TrigramNameFeatureGeneratorTest.java        | 108 ++++++++++++++++
 12 files changed, 848 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
index dfcf10d..79c2a50 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java
@@ -85,11 +85,13 @@ public class FeatureGeneratorUtil {
         feat = "num";
       }
     }
-    else if (pattern.isAllCapitalLetter() && token.length() == 1) {
-      feat = "sc";
-    }
     else if (pattern.isAllCapitalLetter()) {
-      feat = "ac";
+      if (token.length() == 1) {
+        feat = "sc";
+      }
+      else {
+        feat = "ac";
+      }
     }
     else if (capPeriod.matcher(token).find()) {
       feat = "cp";

http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java
new file mode 100644
index 0000000..0e31059
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class BigramNameFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", 
"sentence"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testBegin() {
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("w,nw=This,is", features.get(0));
+    Assert.assertEquals("wc,nc=ic,lc", features.get(1));
+  }
+
+  @Test
+  public void testMiddle() {
+
+    final int testTokenIndex = 2;
+
+    AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(4, features.size());
+    Assert.assertEquals("pw,w=is,an", features.get(0));
+    Assert.assertEquals("pwc,wc=lc,lc", features.get(1));
+    Assert.assertEquals("w,nw=an,example", features.get(2));
+    Assert.assertEquals("wc,nc=lc,lc", features.get(3));
+  }
+
+  @Test
+  public void testEnd() {
+
+    final int testTokenIndex = 4;
+
+    AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("pw,w=example,sentence", features.get(0));
+    Assert.assertEquals("pwc,wc=lc,lc", features.get(1));
+  }
+
+  @Test
+  public void testShort() {
+
+    String[] shortSentence = new String[] {"word"};
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator();
+
+    generator.createFeatures(features, shortSentence, testTokenIndex, null);
+
+    Assert.assertEquals(0, features.size());
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java
new file mode 100644
index 0000000..a695e06
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class CharacterNgramFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", 
"sentence"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testDefault() {
+
+    final int testTokenIndex = 3;
+
+    AdaptiveFeatureGenerator generator = new CharacterNgramFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    assertContainsNg(features,
+            "ex", "exa", "exam", "examp",
+            "xa", "xam", "xamp", "xampl",
+            "am", "amp", "ampl", "ample",
+            "mp", "mpl", "mple",
+            "pl", "ple",
+            "le");
+  }
+
+  private static void assertContainsNg(List<String> features, String... 
elements) {
+    Assert.assertEquals(elements.length, features.size());
+    for (String e: elements) {
+      Assert.assertTrue(features.contains("ng=" + e));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java
new file mode 100644
index 0000000..cca0d83
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class FeatureGeneratorUtilTest {
+
+  @Test
+  public void test() {
+    // digits
+    Assert.assertEquals("2d", FeatureGeneratorUtil.tokenFeature("12"));
+    Assert.assertEquals("4d", FeatureGeneratorUtil.tokenFeature("1234"));
+    Assert.assertEquals("an", FeatureGeneratorUtil.tokenFeature("abcd234"));
+    Assert.assertEquals("dd", FeatureGeneratorUtil.tokenFeature("1234-56"));
+    Assert.assertEquals("ds", FeatureGeneratorUtil.tokenFeature("4/6/2017"));
+    Assert.assertEquals("dc", FeatureGeneratorUtil.tokenFeature("1,234,567"));
+    Assert.assertEquals("dp", FeatureGeneratorUtil.tokenFeature("12.34567"));
+    Assert.assertEquals("num", 
FeatureGeneratorUtil.tokenFeature("123(456)7890"));
+
+    // letters
+    Assert.assertEquals("lc", FeatureGeneratorUtil.tokenFeature("opennlp"));
+    Assert.assertEquals("sc", FeatureGeneratorUtil.tokenFeature("O"));
+    Assert.assertEquals("ac", FeatureGeneratorUtil.tokenFeature("OPENNLP"));
+    Assert.assertEquals("cp", FeatureGeneratorUtil.tokenFeature("A."));
+    Assert.assertEquals("ic", FeatureGeneratorUtil.tokenFeature("Mike"));
+    Assert.assertEquals("other", 
FeatureGeneratorUtil.tokenFeature("somethingStupid"));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java
new file mode 100644
index 0000000..a5fd3d4
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.namefind.TokenNameFinder;
+import opennlp.tools.util.Span;
+
+public class InSpanGeneratorTest {
+
+  static class SimpleSpecificPersonFinder implements TokenNameFinder {
+
+    private final String theName;
+
+    public SimpleSpecificPersonFinder(String theName) {
+      this.theName = theName;
+    }
+
+    @Override
+    public Span[] find(String[] tokens) {
+      for (int i = 0; i < tokens.length; i++) {
+        if (theName.equals(tokens[i])) {
+          return new Span[]{ new Span(i, i + 1, "person") };
+        }
+      }
+
+      return new Span[]{};
+    }
+
+    @Override
+    public void clearAdaptiveData() {
+    }
+  }
+
+  @Test
+  public void test() {
+
+    List<String> features = new ArrayList<>();
+
+    String[] testSentence = new String[]{ "Every", "John", "has", "its", 
"day", "." };
+
+    AdaptiveFeatureGenerator generator = new InSpanGenerator("john", new 
SimpleSpecificPersonFinder("John"));
+
+    generator.createFeatures(features, testSentence, 0, null);
+    Assert.assertEquals(0, features.size());
+
+    features.clear();
+    generator.createFeatures(features, testSentence, 1, null);
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("john:w=dic", features.get(0));
+    Assert.assertEquals("john:w=dic=John", features.get(1));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java
new file mode 100644
index 0000000..b916fc9
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class PosTaggerFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", 
"sentence"};
+  static String[] testTags = new String[] {"DT", "VBZ", "DT", "NN", "NN"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testBegin() {
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new PosTaggerFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, testTags);
+
+    Assert.assertEquals(0, features.size());
+  }
+
+  @Test
+  public void testNext() {
+
+    final int testTokenIndex = 1;
+
+    AdaptiveFeatureGenerator generator = new PosTaggerFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, testTags);
+
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("t=DT", features.get(0));
+  }
+
+  @Test
+  public void testMiddle() {
+
+    final int testTokenIndex = 3;
+
+    AdaptiveFeatureGenerator generator = new PosTaggerFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, testTags);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("t=DT", features.get(0));
+    Assert.assertEquals("t2=VBZ,DT", features.get(1));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java
new file mode 100644
index 0000000..2b66b50
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class PreviousTwoMapFeatureGeneratorTest {
+
+  @Test
+  public void testFeatureGeneration() {
+
+    AdaptiveFeatureGenerator fg = new PreviousTwoMapFeatureGenerator();
+
+    String[] sentence = new String[] {"a", "b", "c"};
+
+    List<String> features = new ArrayList<>();
+
+    // this should generate the no features
+    fg.createFeatures(features, sentence, 0, null);
+    Assert.assertEquals(0, features.size());
+
+    // this should generate the pd=null feature
+    fg.createFeatures(features, sentence, 1, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("ppd=null,null", features.get(0));
+
+    features.clear();
+
+    // this should generate the pd=1 feature
+    fg.updateAdaptiveData(sentence, new String[] {"1", "2", "3"});
+    fg.createFeatures(features, sentence, 1, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("ppd=2,1", features.get(0));
+
+    features.clear();
+
+    // this should generate the pd=null feature again after
+    // the adaptive data was cleared
+    fg.clearAdaptiveData();
+    fg.createFeatures(features, sentence, 1, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("ppd=null,null", features.get(0));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java
new file mode 100644
index 0000000..a4cee75
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class SentenceFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", 
"sentence"};
+  static String[] testShort = new String[] {"word"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testTT() {
+    AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(true, 
true);
+
+    generator.createFeatures(features, testSentence, 2, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, 0, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=begin", features.get(0));
+
+    features.clear();
+
+    generator.createFeatures(features, testSentence, testSentence.length - 1, 
null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=end", features.get(0));
+
+    features.clear();
+
+    generator.createFeatures(features, testShort, 0, null);
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("S=begin", features.get(0));
+    Assert.assertEquals("S=end", features.get(1));
+  }
+
+  @Test
+  public void testTF() {
+    AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(true, 
false);
+
+    generator.createFeatures(features, testSentence, 2, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, 0, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=begin", features.get(0));
+
+    features.clear();
+
+    generator.createFeatures(features, testSentence, testSentence.length - 1, 
null);
+    Assert.assertEquals(0, features.size());
+
+    features.clear();
+
+    generator.createFeatures(features, testShort, 0, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=begin", features.get(0));
+  }
+
+  @Test
+  public void testFT() {
+    AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(false, 
true);
+
+    generator.createFeatures(features, testSentence, 2, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, 0, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, testSentence.length - 1, 
null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=end", features.get(0));
+
+    features.clear();
+
+    generator.createFeatures(features, testShort, 0, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("S=end", features.get(0));
+  }
+
+  @Test
+  public void testFF() {
+    AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(false, 
false);
+
+    generator.createFeatures(features, testSentence, 2, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, 0, null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testSentence, testSentence.length - 1, 
null);
+    Assert.assertEquals(0, features.size());
+
+    generator.createFeatures(features, testShort, 0, null);
+    Assert.assertEquals(0, features.size());
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java
new file mode 100644
index 0000000..b165f90
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TokenClassFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "Example", 
"sentence"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testGenWAC() {
+
+    final int testTokenIndex = 3;
+
+    AdaptiveFeatureGenerator generator = new TokenClassFeatureGenerator(true);
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("wc=ic", features.get(0));
+    Assert.assertEquals("w&c=example,ic", features.get(1));
+  }
+
+  @Test
+  public void testNoWAC() {
+
+    final int testTokenIndex = 3;
+
+    AdaptiveFeatureGenerator generator = new TokenClassFeatureGenerator(false);
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("wc=ic", features.get(0));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java
new file mode 100644
index 0000000..4eae767
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TokenFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", 
"sentence"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void test() {
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new TokenFeatureGenerator(false);
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("w=This", features.get(0));
+  }
+
+  @Test
+  public void testLowerCase() {
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new TokenFeatureGenerator(true);
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("w=this", features.get(0));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java
new file mode 100644
index 0000000..1d905db
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TokenPatternFeatureGeneratorTest {
+
+  private List<String> features;
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testSingleToken() {
+
+    String[] testSentence = new String[] {"This", "is", "an", "example", 
"sentence"};
+    final int testTokenIndex = 3;
+
+    AdaptiveFeatureGenerator generator = new TokenPatternFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+    Assert.assertEquals(1, features.size());
+    Assert.assertEquals("st=example", features.get(0));
+  }
+
+  @Test
+  public void testSentence() {
+
+    String[] testSentence = new String[] {"This is an example sentence"};
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new TokenPatternFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+    Assert.assertEquals(14, features.size());
+    Assert.assertEquals("stn=5", features.get(0));
+    Assert.assertEquals("pt2=iclc", features.get(1));
+    Assert.assertEquals("pt3=iclclc", features.get(2));
+    Assert.assertEquals("st=this", features.get(3));
+    Assert.assertEquals("pt2=lclc", features.get(4));
+    Assert.assertEquals("pt3=lclclc", features.get(5));
+    Assert.assertEquals("st=is", features.get(6));
+    Assert.assertEquals("pt2=lclc", features.get(7));
+    Assert.assertEquals("pt3=lclclc", features.get(8));
+    Assert.assertEquals("st=an", features.get(9));
+    Assert.assertEquals("pt2=lclc", features.get(10));
+    Assert.assertEquals("st=example", features.get(11));
+    Assert.assertEquals("st=sentence", features.get(12));
+    Assert.assertEquals("pta=iclclclclc", features.get(13));
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java
new file mode 100644
index 0000000..1e0cb18
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TrigramNameFeatureGeneratorTest {
+
+  private List<String> features;
+  static String[] testSentence = new String[] {"This", "is", "an", "example", 
"sentence"};
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void testBegin() {
+
+    final int testTokenIndex = 0;
+
+    AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("w,nw,nnw=This,is,an", features.get(0));
+    Assert.assertEquals("wc,nwc,nnwc=ic,lc,lc", features.get(1));
+  }
+
+  @Test
+  public void testNextOfBegin() {
+
+    final int testTokenIndex = 1;
+
+    AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("w,nw,nnw=is,an,example", features.get(0));
+    Assert.assertEquals("wc,nwc,nnwc=lc,lc,lc", features.get(1));
+  }
+
+  @Test
+  public void testMiddle() {
+
+    final int testTokenIndex = 2;
+
+    AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(4, features.size());
+    Assert.assertEquals("ppw,pw,w=This,is,an", features.get(0));
+    Assert.assertEquals("ppwc,pwc,wc=ic,lc,lc", features.get(1));
+    Assert.assertEquals("w,nw,nnw=an,example,sentence", features.get(2));
+    Assert.assertEquals("wc,nwc,nnwc=lc,lc,lc", features.get(3));
+  }
+
+  @Test
+  public void testEnd() {
+
+    final int testTokenIndex = 4;
+
+    AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("ppw,pw,w=an,example,sentence", features.get(0));
+    Assert.assertEquals("ppwc,pwc,wc=lc,lc,lc", features.get(1));
+  }
+
+  @Test
+  public void testShort() {
+
+    String[] shortSentence = new String[] {"I", "know", "it"};
+
+    final int testTokenIndex = 1;
+
+    AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator();
+
+    generator.createFeatures(features, shortSentence, testTokenIndex, null);
+
+    Assert.assertEquals(0, features.size());
+  }
+}

Reply via email to