[
https://issues.apache.org/jira/browse/OPENNLP-1160?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16323467#comment-16323467
]
ASF GitHub Bot commented on OPENNLP-1160:
-----------------------------------------
kojisekig closed pull request #305: OPENNLP-1160: avoid letting users specify
CachedFeatureGeneratorFacto…
URL: https://github.com/apache/opennlp/pull/305
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index 867c1e043..bf55abfc3 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -469,13 +469,25 @@ static AdaptiveFeatureGenerator createGenerator(Element
generatorElement,
}
}
+ AdaptiveFeatureGenerator featureGenerator = null;
if (generators.size() == 1)
- return generators.get(0);
+ featureGenerator = generators.get(0);
else if (generators.size() > 1)
- return new AggregatedFeatureGenerator(generators.toArray(
+ featureGenerator = new AggregatedFeatureGenerator(generators.toArray(
new AdaptiveFeatureGenerator[generators.size()]));
else
throw new InvalidFormatException("featureGenerators must have one or
more generators");
+
+ // disallow manually specifying CachedFeatureGenerator
+ if (featureGenerator instanceof CachedFeatureGenerator)
+ throw new InvalidFormatException("CachedFeatureGeneratorFactory cannot
be specified manually." +
+ "Use cache=\"true\" attribute in featureGenerators element
instead.");
+
+ // check cache usage
+ if (Boolean.parseBoolean(generatorElement.getAttribute("cache")))
+ return new CachedFeatureGenerator(featureGenerator);
+ else
+ return featureGenerator;
}
else {
// support classic format
diff --git
a/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
b/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
index 32887cf78..1f60ad18b 100644
---
a/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
+++
b/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
@@ -18,24 +18,22 @@
-->
<!-- Default name finder feature generator configuration -->
-<featureGenerators name="nameFinder">
- <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
- <bool name="begin">true</bool>
- <bool name="end">false</bool>
- </generator>
- </generator>
+<featureGenerators cache="true" name="nameFinder">
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
</featureGenerators>
diff --git
a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
index c1be8ee6e..2137511a6 100644
---
a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
+++
b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
@@ -18,25 +18,23 @@
-->
<!-- Default pos tagger feature generator configuration -->
-<featureGenerators name="posTagger">
- <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.SuffixFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.PrefixFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
- <bool name="begin">true</bool>
- <bool name="end">false</bool>
- </generator>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.PosTaggerFeatureGeneratorFactory"/>
+<featureGenerators cache="true" name="posTagger">
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.SuffixFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.PrefixFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
</generator>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.PosTaggerFeatureGeneratorFactory"/>
</featureGenerators>
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
index 35a58bbe9..4e95b203e 100644
---
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
+++
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
@@ -208,7 +208,7 @@ public void
testAutomaticallyInsertAggregatedFeatureGenerator() throws Exception
@Test
public void testNotAutomaticallyInsertAggregatedFeatureGeneratorChild()
throws Exception {
InputStream generatorDescriptorIn = getClass().getResourceAsStream(
-
"/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml");
+
"/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml");
// If this fails the generator descriptor could not be found
// at the expected location
@@ -225,7 +225,7 @@ public void
testNotAutomaticallyInsertAggregatedFeatureGeneratorChild() throws E
@Test
public void testAutomaticallyInsertAggregatedFeatureGeneratorChildren()
throws Exception {
InputStream generatorDescriptorIn = getClass().getResourceAsStream(
-
"/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml");
+
"/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml");
// If this fails the generator descriptor could not be found
// at the expected location
@@ -244,4 +244,27 @@ public void
testAutomaticallyInsertAggregatedFeatureGeneratorChildren() throws E
Assert.assertTrue(afgen instanceof OutcomePriorFeatureGenerator);
}
}
+
+ @Test
+ public void testInsertCachedFeatureGenerator() throws Exception {
+ InputStream generatorDescriptorIn = getClass().getResourceAsStream(
+ "/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml");
+
+ // If this fails the generator descriptor could not be found
+ // at the expected location
+ Assert.assertNotNull(generatorDescriptorIn);
+
+ AdaptiveFeatureGenerator featureGenerator =
GeneratorFactory.create(generatorDescriptorIn, null);
+ Assert.assertTrue(featureGenerator instanceof CachedFeatureGenerator);
+ CachedFeatureGenerator cachedFeatureGenerator =
(CachedFeatureGenerator)featureGenerator;
+
+ Assert.assertTrue(cachedFeatureGenerator.getCachedFeatureGenerator()
+ instanceof AggregatedFeatureGenerator);
+ AggregatedFeatureGenerator aggregatedFeatureGenerator =
+
(AggregatedFeatureGenerator)cachedFeatureGenerator.getCachedFeatureGenerator();
+ Assert.assertEquals(3, aggregatedFeatureGenerator.getGenerators().size());
+ for (AdaptiveFeatureGenerator afg:
aggregatedFeatureGenerator.getGenerators()) {
+ Assert.assertTrue(afg instanceof OutcomePriorFeatureGenerator);
+ }
+ }
}
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
b/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
index b85090482..06c73df8a 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
@@ -18,20 +18,25 @@
-->
<!-- Default name finder feature generator configuration -->
-<generators>
- <cache>
- <generators>
- <window prevLength = "2" nextLength = "2">
- <tokenclass/>
- </window>
- <window prevLength = "2" nextLength = "2">
- <token/>
- </window>
- <definition/>
- <prevmap/>
- <bigram/>
- <sentence begin="true" end="false"/>
- <tokenpos model="en-pos-perceptron.bin"/>
- </generators>
- </cache>
-</generators>
\ No newline at end of file
+<featureGenerators cache="true" name="nameFinder">
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
+ <str name="model">en-pos-perceptron.bin</str>
+ </generator>
+</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
index 7464627e3..c8b5887af 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
@@ -15,33 +15,29 @@
~ limitations under the License.
-->
-<featureGenerators name="nameFinder">
- <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.AggregatedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator
class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
- <str name="model">pos-model.bin</str>
- </generator>
- </generator>
- <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
- <bool name="begin">true</bool>
- <bool name="end">false</bool>
- </generator>
- </generator>
+<featureGenerators cache="true" name="nameFinder">
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
+ <str name="model">pos-model.bin</str>
</generator>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
similarity index 67%
rename from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml
rename to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
index 7dbed59a0..08f1400c9 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
@@ -19,10 +19,8 @@
under the License.
-->
-<featureGenerators name="test">
- <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- </generator>
+<featureGenerators cache="true" name="test">
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
new file mode 100644
index 000000000..08f1400c9
--- /dev/null
+++
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<featureGenerators cache="true" name="test">
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
similarity index 79%
rename from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
rename to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
index ed7f2f698..801adad9c 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
@@ -19,8 +19,6 @@
under the License.
-->
-<featureGenerators name="test">
- <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- </generator>
+<featureGenerators cache="true" name="test">
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
</featureGenerators>
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> avoid letting users specify CachedFeatureGeneratorFactory in XML config
> -----------------------------------------------------------------------
>
> Key: OPENNLP-1160
> URL: https://issues.apache.org/jira/browse/OPENNLP-1160
> Project: OpenNLP
> Issue Type: Improvement
> Components: Formats, Name Finder
> Affects Versions: 1.8.3
> Reporter: Koji Sekiguchi
> Assignee: Koji Sekiguchi
> Priority: Minor
>
> This is similar to OPENNLP-1159. When I'm working on OPENNLP-1154, I think we
> should do it for better use.
> I'd like to implement this as an independent ticket from OPENNLP-1154 and
> OPENNLP-1159 to make patch easy to read.
> And this ticket is somewhat different from OPENNLP-1159 as users must be able
> to control the framework uses CachedFeatureGeneratorFactory or not.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)