OPENNLP-1014: Add more tests for featuregen This closes #151
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/35f27899 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/35f27899 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/35f27899 Branch: refs/heads/parser_regression Commit: 35f2789979677642a88a93eaeadc9d41d7ae913f Parents: 4ba2a8b Author: koji <koji.sekigu...@rondhuit.com> Authored: Fri Apr 7 10:49:50 2017 +0900 Committer: Jörn Kottmann <jo...@apache.org> Committed: Thu Apr 20 12:40:25 2017 +0200 ---------------------------------------------------------------------- .../util/featuregen/FeatureGeneratorUtil.java | 10 +- .../BigramNameFeatureGeneratorTest.java | 94 ++++++++++++++ .../CharacterNgramFeatureGeneratorTest.java | 61 +++++++++ .../featuregen/FeatureGeneratorUtilTest.java | 45 +++++++ .../util/featuregen/InSpanGeneratorTest.java | 73 +++++++++++ .../PosTaggerFeatureGeneratorTest.java | 76 ++++++++++++ .../PreviousTwoMapFeatureGeneratorTest.java | 63 ++++++++++ .../SentenceFeatureGeneratorTest.java | 123 +++++++++++++++++++ .../TokenClassFeatureGeneratorTest.java | 63 ++++++++++ .../featuregen/TokenFeatureGeneratorTest.java | 62 ++++++++++ .../TokenPatternFeatureGeneratorTest.java | 74 +++++++++++ .../TrigramNameFeatureGeneratorTest.java | 108 ++++++++++++++++ 12 files changed, 848 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java index dfcf10d..79c2a50 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/FeatureGeneratorUtil.java @@ -85,11 +85,13 @@ public class FeatureGeneratorUtil { feat = "num"; } } - else if (pattern.isAllCapitalLetter() && token.length() == 1) { - feat = "sc"; - } else if (pattern.isAllCapitalLetter()) { - feat = "ac"; + if (token.length() == 1) { + feat = "sc"; + } + else { + feat = "ac"; + } } else if (capPeriod.matcher(token).find()) { feat = "cp"; http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java new file mode 100644 index 0000000..0e31059 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.featuregen; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class BigramNameFeatureGeneratorTest { + + private List<String> features; + static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"}; + + @Before + public void setUp() throws Exception { + features = new ArrayList<>(); + } + + @Test + public void testBegin() { + + final int testTokenIndex = 0; + + AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + Assert.assertEquals(2, features.size()); + Assert.assertEquals("w,nw=This,is", features.get(0)); + Assert.assertEquals("wc,nc=ic,lc", features.get(1)); + } + + @Test + public void testMiddle() { + + final int testTokenIndex = 2; + + AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + Assert.assertEquals(4, features.size()); + Assert.assertEquals("pw,w=is,an", features.get(0)); + Assert.assertEquals("pwc,wc=lc,lc", features.get(1)); + Assert.assertEquals("w,nw=an,example", features.get(2)); + Assert.assertEquals("wc,nc=lc,lc", features.get(3)); + } + + @Test + public void testEnd() { + + final int testTokenIndex = 4; + + AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + Assert.assertEquals(2, features.size()); + Assert.assertEquals("pw,w=example,sentence", features.get(0)); + Assert.assertEquals("pwc,wc=lc,lc", features.get(1)); + } + + @Test + public void testShort() { + + String[] shortSentence = new String[] {"word"}; + + final int testTokenIndex = 0; + + AdaptiveFeatureGenerator generator = new BigramNameFeatureGenerator(); + + generator.createFeatures(features, shortSentence, testTokenIndex, null); + + Assert.assertEquals(0, features.size()); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java new file mode 100644 index 0000000..a695e06 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.featuregen; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class CharacterNgramFeatureGeneratorTest { + + private List<String> features; + static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"}; + + @Before + public void setUp() throws Exception { + features = new ArrayList<>(); + } + + @Test + public void testDefault() { + + final int testTokenIndex = 3; + + AdaptiveFeatureGenerator generator = new CharacterNgramFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + assertContainsNg(features, + "ex", "exa", "exam", "examp", + "xa", "xam", "xamp", "xampl", + "am", "amp", "ampl", "ample", + "mp", "mpl", "mple", + "pl", "ple", + "le"); + } + + private static void assertContainsNg(List<String> features, String... elements) { + Assert.assertEquals(elements.length, features.size()); + for (String e: elements) { + Assert.assertTrue(features.contains("ng=" + e)); + } + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java new file mode 100644 index 0000000..cca0d83 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGeneratorUtilTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.featuregen; + +import org.junit.Assert; +import org.junit.Test; + +public class FeatureGeneratorUtilTest { + + @Test + public void test() { + // digits + Assert.assertEquals("2d", FeatureGeneratorUtil.tokenFeature("12")); + Assert.assertEquals("4d", FeatureGeneratorUtil.tokenFeature("1234")); + Assert.assertEquals("an", FeatureGeneratorUtil.tokenFeature("abcd234")); + Assert.assertEquals("dd", FeatureGeneratorUtil.tokenFeature("1234-56")); + Assert.assertEquals("ds", FeatureGeneratorUtil.tokenFeature("4/6/2017")); + Assert.assertEquals("dc", FeatureGeneratorUtil.tokenFeature("1,234,567")); + Assert.assertEquals("dp", FeatureGeneratorUtil.tokenFeature("12.34567")); + Assert.assertEquals("num", FeatureGeneratorUtil.tokenFeature("123(456)7890")); + + // letters + Assert.assertEquals("lc", FeatureGeneratorUtil.tokenFeature("opennlp")); + Assert.assertEquals("sc", FeatureGeneratorUtil.tokenFeature("O")); + Assert.assertEquals("ac", FeatureGeneratorUtil.tokenFeature("OPENNLP")); + Assert.assertEquals("cp", FeatureGeneratorUtil.tokenFeature("A.")); + Assert.assertEquals("ic", FeatureGeneratorUtil.tokenFeature("Mike")); + Assert.assertEquals("other", FeatureGeneratorUtil.tokenFeature("somethingStupid")); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java new file mode 100644 index 0000000..a5fd3d4 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/InSpanGeneratorTest.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.featuregen; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; + +import opennlp.tools.namefind.TokenNameFinder; +import opennlp.tools.util.Span; + +public class InSpanGeneratorTest { + + static class SimpleSpecificPersonFinder implements TokenNameFinder { + + private final String theName; + + public SimpleSpecificPersonFinder(String theName) { + this.theName = theName; + } + + @Override + public Span[] find(String[] tokens) { + for (int i = 0; i < tokens.length; i++) { + if (theName.equals(tokens[i])) { + return new Span[]{ new Span(i, i + 1, "person") }; + } + } + + return new Span[]{}; + } + + @Override + public void clearAdaptiveData() { + } + } + + @Test + public void test() { + + List<String> features = new ArrayList<>(); + + String[] testSentence = new String[]{ "Every", "John", "has", "its", "day", "." }; + + AdaptiveFeatureGenerator generator = new InSpanGenerator("john", new SimpleSpecificPersonFinder("John")); + + generator.createFeatures(features, testSentence, 0, null); + Assert.assertEquals(0, features.size()); + + features.clear(); + generator.createFeatures(features, testSentence, 1, null); + Assert.assertEquals(2, features.size()); + Assert.assertEquals("john:w=dic", features.get(0)); + Assert.assertEquals("john:w=dic=John", features.get(1)); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java new file mode 100644 index 0000000..b916fc9 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorTest.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.featuregen; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class PosTaggerFeatureGeneratorTest { + + private List<String> features; + static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"}; + static String[] testTags = new String[] {"DT", "VBZ", "DT", "NN", "NN"}; + + @Before + public void setUp() throws Exception { + features = new ArrayList<>(); + } + + @Test + public void testBegin() { + + final int testTokenIndex = 0; + + AdaptiveFeatureGenerator generator = new PosTaggerFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, testTags); + + Assert.assertEquals(0, features.size()); + } + + @Test + public void testNext() { + + final int testTokenIndex = 1; + + AdaptiveFeatureGenerator generator = new PosTaggerFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, testTags); + + Assert.assertEquals(1, features.size()); + Assert.assertEquals("t=DT", features.get(0)); + } + + @Test + public void testMiddle() { + + final int testTokenIndex = 3; + + AdaptiveFeatureGenerator generator = new PosTaggerFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, testTags); + + Assert.assertEquals(2, features.size()); + Assert.assertEquals("t=DT", features.get(0)); + Assert.assertEquals("t2=VBZ,DT", features.get(1)); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java new file mode 100644 index 0000000..2b66b50 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGeneratorTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.featuregen; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; + +public class PreviousTwoMapFeatureGeneratorTest { + + @Test + public void testFeatureGeneration() { + + AdaptiveFeatureGenerator fg = new PreviousTwoMapFeatureGenerator(); + + String[] sentence = new String[] {"a", "b", "c"}; + + List<String> features = new ArrayList<>(); + + // this should generate the no features + fg.createFeatures(features, sentence, 0, null); + Assert.assertEquals(0, features.size()); + + // this should generate the pd=null feature + fg.createFeatures(features, sentence, 1, null); + Assert.assertEquals(1, features.size()); + Assert.assertEquals("ppd=null,null", features.get(0)); + + features.clear(); + + // this should generate the pd=1 feature + fg.updateAdaptiveData(sentence, new String[] {"1", "2", "3"}); + fg.createFeatures(features, sentence, 1, null); + Assert.assertEquals(1, features.size()); + Assert.assertEquals("ppd=2,1", features.get(0)); + + features.clear(); + + // this should generate the pd=null feature again after + // the adaptive data was cleared + fg.clearAdaptiveData(); + fg.createFeatures(features, sentence, 1, null); + Assert.assertEquals(1, features.size()); + Assert.assertEquals("ppd=null,null", features.get(0)); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java new file mode 100644 index 0000000..a4cee75 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorTest.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.featuregen; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class SentenceFeatureGeneratorTest { + + private List<String> features; + static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"}; + static String[] testShort = new String[] {"word"}; + + @Before + public void setUp() throws Exception { + features = new ArrayList<>(); + } + + @Test + public void testTT() { + AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(true, true); + + generator.createFeatures(features, testSentence, 2, null); + Assert.assertEquals(0, features.size()); + + generator.createFeatures(features, testSentence, 0, null); + Assert.assertEquals(1, features.size()); + Assert.assertEquals("S=begin", features.get(0)); + + features.clear(); + + generator.createFeatures(features, testSentence, testSentence.length - 1, null); + Assert.assertEquals(1, features.size()); + Assert.assertEquals("S=end", features.get(0)); + + features.clear(); + + generator.createFeatures(features, testShort, 0, null); + Assert.assertEquals(2, features.size()); + Assert.assertEquals("S=begin", features.get(0)); + Assert.assertEquals("S=end", features.get(1)); + } + + @Test + public void testTF() { + AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(true, false); + + generator.createFeatures(features, testSentence, 2, null); + Assert.assertEquals(0, features.size()); + + generator.createFeatures(features, testSentence, 0, null); + Assert.assertEquals(1, features.size()); + Assert.assertEquals("S=begin", features.get(0)); + + features.clear(); + + generator.createFeatures(features, testSentence, testSentence.length - 1, null); + Assert.assertEquals(0, features.size()); + + features.clear(); + + generator.createFeatures(features, testShort, 0, null); + Assert.assertEquals(1, features.size()); + Assert.assertEquals("S=begin", features.get(0)); + } + + @Test + public void testFT() { + AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(false, true); + + generator.createFeatures(features, testSentence, 2, null); + Assert.assertEquals(0, features.size()); + + generator.createFeatures(features, testSentence, 0, null); + Assert.assertEquals(0, features.size()); + + generator.createFeatures(features, testSentence, testSentence.length - 1, null); + Assert.assertEquals(1, features.size()); + Assert.assertEquals("S=end", features.get(0)); + + features.clear(); + + generator.createFeatures(features, testShort, 0, null); + Assert.assertEquals(1, features.size()); + Assert.assertEquals("S=end", features.get(0)); + } + + @Test + public void testFF() { + AdaptiveFeatureGenerator generator = new SentenceFeatureGenerator(false, false); + + generator.createFeatures(features, testSentence, 2, null); + Assert.assertEquals(0, features.size()); + + generator.createFeatures(features, testSentence, 0, null); + Assert.assertEquals(0, features.size()); + + generator.createFeatures(features, testSentence, testSentence.length - 1, null); + Assert.assertEquals(0, features.size()); + + generator.createFeatures(features, testShort, 0, null); + Assert.assertEquals(0, features.size()); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java new file mode 100644 index 0000000..b165f90 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.featuregen; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TokenClassFeatureGeneratorTest { + + private List<String> features; + static String[] testSentence = new String[] {"This", "is", "an", "Example", "sentence"}; + + @Before + public void setUp() throws Exception { + features = new ArrayList<>(); + } + + @Test + public void testGenWAC() { + + final int testTokenIndex = 3; + + AdaptiveFeatureGenerator generator = new TokenClassFeatureGenerator(true); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + Assert.assertEquals(2, features.size()); + Assert.assertEquals("wc=ic", features.get(0)); + Assert.assertEquals("w&c=example,ic", features.get(1)); + } + + @Test + public void testNoWAC() { + + final int testTokenIndex = 3; + + AdaptiveFeatureGenerator generator = new TokenClassFeatureGenerator(false); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + Assert.assertEquals(1, features.size()); + Assert.assertEquals("wc=ic", features.get(0)); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java new file mode 100644 index 0000000..4eae767 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorTest.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.featuregen; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TokenFeatureGeneratorTest { + + private List<String> features; + static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"}; + + @Before + public void setUp() throws Exception { + features = new ArrayList<>(); + } + + @Test + public void test() { + + final int testTokenIndex = 0; + + AdaptiveFeatureGenerator generator = new TokenFeatureGenerator(false); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + Assert.assertEquals(1, features.size()); + Assert.assertEquals("w=This", features.get(0)); + } + + @Test + public void testLowerCase() { + + final int testTokenIndex = 0; + + AdaptiveFeatureGenerator generator = new TokenFeatureGenerator(true); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + Assert.assertEquals(1, features.size()); + Assert.assertEquals("w=this", features.get(0)); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java new file mode 100644 index 0000000..1d905db --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.featuregen; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TokenPatternFeatureGeneratorTest { + + private List<String> features; + + @Before + public void setUp() throws Exception { + features = new ArrayList<>(); + } + + @Test + public void testSingleToken() { + + String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"}; + final int testTokenIndex = 3; + + AdaptiveFeatureGenerator generator = new TokenPatternFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + Assert.assertEquals(1, features.size()); + Assert.assertEquals("st=example", features.get(0)); + } + + @Test + public void testSentence() { + + String[] testSentence = new String[] {"This is an example sentence"}; + final int testTokenIndex = 0; + + AdaptiveFeatureGenerator generator = new TokenPatternFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + Assert.assertEquals(14, features.size()); + Assert.assertEquals("stn=5", features.get(0)); + Assert.assertEquals("pt2=iclc", features.get(1)); + Assert.assertEquals("pt3=iclclc", features.get(2)); + Assert.assertEquals("st=this", features.get(3)); + Assert.assertEquals("pt2=lclc", features.get(4)); + Assert.assertEquals("pt3=lclclc", features.get(5)); + Assert.assertEquals("st=is", features.get(6)); + Assert.assertEquals("pt2=lclc", features.get(7)); + Assert.assertEquals("pt3=lclclc", features.get(8)); + Assert.assertEquals("st=an", features.get(9)); + Assert.assertEquals("pt2=lclc", features.get(10)); + Assert.assertEquals("st=example", features.get(11)); + Assert.assertEquals("st=sentence", features.get(12)); + Assert.assertEquals("pta=iclclclclc", features.get(13)); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/35f27899/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java new file mode 100644 index 0000000..1e0cb18 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/TrigramNameFeatureGeneratorTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.featuregen; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TrigramNameFeatureGeneratorTest { + + private List<String> features; + static String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"}; + + @Before + public void setUp() throws Exception { + features = new ArrayList<>(); + } + + @Test + public void testBegin() { + + final int testTokenIndex = 0; + + AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + Assert.assertEquals(2, features.size()); + Assert.assertEquals("w,nw,nnw=This,is,an", features.get(0)); + Assert.assertEquals("wc,nwc,nnwc=ic,lc,lc", features.get(1)); + } + + @Test + public void testNextOfBegin() { + + final int testTokenIndex = 1; + + AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + Assert.assertEquals(2, features.size()); + Assert.assertEquals("w,nw,nnw=is,an,example", features.get(0)); + Assert.assertEquals("wc,nwc,nnwc=lc,lc,lc", features.get(1)); + } + + @Test + public void testMiddle() { + + final int testTokenIndex = 2; + + AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + Assert.assertEquals(4, features.size()); + Assert.assertEquals("ppw,pw,w=This,is,an", features.get(0)); + Assert.assertEquals("ppwc,pwc,wc=ic,lc,lc", features.get(1)); + Assert.assertEquals("w,nw,nnw=an,example,sentence", features.get(2)); + Assert.assertEquals("wc,nwc,nnwc=lc,lc,lc", features.get(3)); + } + + @Test + public void testEnd() { + + final int testTokenIndex = 4; + + AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator(); + + generator.createFeatures(features, testSentence, testTokenIndex, null); + + Assert.assertEquals(2, features.size()); + Assert.assertEquals("ppw,pw,w=an,example,sentence", features.get(0)); + Assert.assertEquals("ppwc,pwc,wc=lc,lc,lc", features.get(1)); + } + + @Test + public void testShort() { + + String[] shortSentence = new String[] {"I", "know", "it"}; + + final int testTokenIndex = 1; + + AdaptiveFeatureGenerator generator = new TrigramNameFeatureGenerator(); + + generator.createFeatures(features, shortSentence, testTokenIndex, null); + + Assert.assertEquals(0, features.size()); + } +}