This is an automated email from the ASF dual-hosted git repository.
koji pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new c9685cd OPENNLP-1160: avoid letting users specify
CachedFeatureGeneratorFactory in XML config (#305)
c9685cd is described below
commit c9685cd5e462926aa401fca7321ea23927492006
Author: Koji Sekiguchi <[email protected]>
AuthorDate: Fri Jan 12 11:50:19 2018 +0900
OPENNLP-1160: avoid letting users specify CachedFeatureGeneratorFactory in
XML config (#305)
---
.../tools/util/featuregen/GeneratorFactory.java | 16 ++++++-
.../tools/namefind/ner-default-features.xml | 38 ++++++++--------
.../opennlp/tools/postag/pos-default-features.xml | 38 ++++++++--------
.../util/featuregen/GeneratorFactoryTest.java | 27 ++++++++++-
.../opennlp/tools/eval/ner-en_pos-features.xml | 39 +++++++++-------
.../opennlp/tools/namefind/ner-pos-features.xml | 52 ++++++++++------------
...callyInsertAggregatedFeatureGeneratorCache.xml} | 10 ++---
...ld.xml => TestInsertCachedFeatureGenerator.xml} | 8 ++--
...callyInsertAggregatedFeatureGeneratorCache.xml} | 6 +--
9 files changed, 131 insertions(+), 103 deletions(-)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index 867c1e0..bf55abf 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -469,13 +469,25 @@ public class GeneratorFactory {
}
}
+ AdaptiveFeatureGenerator featureGenerator = null;
if (generators.size() == 1)
- return generators.get(0);
+ featureGenerator = generators.get(0);
else if (generators.size() > 1)
- return new AggregatedFeatureGenerator(generators.toArray(
+ featureGenerator = new AggregatedFeatureGenerator(generators.toArray(
new AdaptiveFeatureGenerator[generators.size()]));
else
throw new InvalidFormatException("featureGenerators must have one or
more generators");
+
+ // disallow manually specifying CachedFeatureGenerator
+ if (featureGenerator instanceof CachedFeatureGenerator)
+ throw new InvalidFormatException("CachedFeatureGeneratorFactory cannot
be specified manually." +
+ "Use cache=\"true\" attribute in featureGenerators element
instead.");
+
+ // check cache usage
+ if (Boolean.parseBoolean(generatorElement.getAttribute("cache")))
+ return new CachedFeatureGenerator(featureGenerator);
+ else
+ return featureGenerator;
}
else {
// support classic format
diff --git
a/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
b/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
index 32887cf..1f60ad1 100644
---
a/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
+++
b/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
@@ -18,24 +18,22 @@
-->
<!-- Default name finder feature generator configuration -->
-<featureGenerators name="nameFinder">
- <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
- <bool name="begin">true</bool>
- <bool name="end">false</bool>
- </generator>
- </generator>
+<featureGenerators cache="true" name="nameFinder">
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
</featureGenerators>
diff --git
a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
index c1be8ee..2137511 100644
---
a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
+++
b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
@@ -18,25 +18,23 @@
-->
<!-- Default pos tagger feature generator configuration -->
-<featureGenerators name="posTagger">
- <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.SuffixFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.PrefixFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
- <bool name="begin">true</bool>
- <bool name="end">false</bool>
- </generator>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.PosTaggerFeatureGeneratorFactory"/>
+<featureGenerators cache="true" name="posTagger">
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.SuffixFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.PrefixFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
</generator>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.PosTaggerFeatureGeneratorFactory"/>
</featureGenerators>
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
index 35a58bb..4e95b20 100644
---
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
+++
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
@@ -208,7 +208,7 @@ public class GeneratorFactoryTest {
@Test
public void testNotAutomaticallyInsertAggregatedFeatureGeneratorChild()
throws Exception {
InputStream generatorDescriptorIn = getClass().getResourceAsStream(
-
"/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml");
+
"/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml");
// If this fails the generator descriptor could not be found
// at the expected location
@@ -225,7 +225,7 @@ public class GeneratorFactoryTest {
@Test
public void testAutomaticallyInsertAggregatedFeatureGeneratorChildren()
throws Exception {
InputStream generatorDescriptorIn = getClass().getResourceAsStream(
-
"/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml");
+
"/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml");
// If this fails the generator descriptor could not be found
// at the expected location
@@ -244,4 +244,27 @@ public class GeneratorFactoryTest {
Assert.assertTrue(afgen instanceof OutcomePriorFeatureGenerator);
}
}
+
+ @Test
+ public void testInsertCachedFeatureGenerator() throws Exception {
+ InputStream generatorDescriptorIn = getClass().getResourceAsStream(
+ "/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml");
+
+ // If this fails the generator descriptor could not be found
+ // at the expected location
+ Assert.assertNotNull(generatorDescriptorIn);
+
+ AdaptiveFeatureGenerator featureGenerator =
GeneratorFactory.create(generatorDescriptorIn, null);
+ Assert.assertTrue(featureGenerator instanceof CachedFeatureGenerator);
+ CachedFeatureGenerator cachedFeatureGenerator =
(CachedFeatureGenerator)featureGenerator;
+
+ Assert.assertTrue(cachedFeatureGenerator.getCachedFeatureGenerator()
+ instanceof AggregatedFeatureGenerator);
+ AggregatedFeatureGenerator aggregatedFeatureGenerator =
+
(AggregatedFeatureGenerator)cachedFeatureGenerator.getCachedFeatureGenerator();
+ Assert.assertEquals(3, aggregatedFeatureGenerator.getGenerators().size());
+ for (AdaptiveFeatureGenerator afg:
aggregatedFeatureGenerator.getGenerators()) {
+ Assert.assertTrue(afg instanceof OutcomePriorFeatureGenerator);
+ }
+ }
}
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
b/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
index b850904..06c73df 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
@@ -18,20 +18,25 @@
-->
<!-- Default name finder feature generator configuration -->
-<generators>
- <cache>
- <generators>
- <window prevLength = "2" nextLength = "2">
- <tokenclass/>
- </window>
- <window prevLength = "2" nextLength = "2">
- <token/>
- </window>
- <definition/>
- <prevmap/>
- <bigram/>
- <sentence begin="true" end="false"/>
- <tokenpos model="en-pos-perceptron.bin"/>
- </generators>
- </cache>
-</generators>
\ No newline at end of file
+<featureGenerators cache="true" name="nameFinder">
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
+ <str name="model">en-pos-perceptron.bin</str>
+ </generator>
+</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
index 7464627..c8b5887 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
@@ -15,33 +15,29 @@
~ limitations under the License.
-->
-<featureGenerators name="nameFinder">
- <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.AggregatedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
- <str name="model">pos-model.bin</str>
- </generator>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
- <bool name="begin">true</bool>
- <bool name="end">false</bool>
- </generator>
- </generator>
+<featureGenerators cache="true" name="nameFinder">
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
+ <str name="model">pos-model.bin</str>
</generator>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
similarity index 67%
rename from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml
rename to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
index 7dbed59..08f1400 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
@@ -19,10 +19,8 @@
under the License.
-->
-<featureGenerators name="test">
- <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- </generator>
+<featureGenerators cache="true" name="test">
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
similarity index 73%
copy from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
copy to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
index ed7f2f6..08f1400 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
@@ -19,8 +19,8 @@
under the License.
-->
-<featureGenerators name="test">
- <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- </generator>
+<featureGenerators cache="true" name="test">
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
similarity index 79%
rename from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
rename to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
index ed7f2f6..801adad 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
@@ -19,8 +19,6 @@
under the License.
-->
-<featureGenerators name="test">
- <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- </generator>
+<featureGenerators cache="true" name="test">
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
</featureGenerators>
--
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].