This is an automated email from the ASF dual-hosted git repository.
koji pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new 0752419 OPENNLP-1154: change the XML format for feature generator
config (#286)
0752419 is described below
commit 07524191a42c58356915077020fad06b9939906a
Author: Koji Sekiguchi <[email protected]>
AuthorDate: Wed Dec 27 12:49:57 2017 +0900
OPENNLP-1154: change the XML format for feature generator config (#286)
---
.../AggregatedFeatureGeneratorFactory.java | 80 ++
.../featuregen/ArtifactToSerializerMapper.java | 1 +
.../BigramNameFeatureGeneratorFactory.java | 50 ++
.../BrownClusterBigramFeatureGeneratorFactory.java | 81 ++
...wnClusterTokenClassFeatureGeneratorFactory.java | 81 ++
.../BrownClusterTokenFeatureGeneratorFactory.java | 81 ++
.../featuregen/CachedFeatureGeneratorFactory.java | 80 ++
.../CharacterNgramFeatureGeneratorFactory.java | 73 ++
.../util/featuregen/CustomFeatureGenerator.java | 1 +
.../DefinitionFeatureGeneratorFactory.java | 54 ++
.../DictionaryFeatureGeneratorFactory.java | 84 ++
.../DocumentBeginFeatureGeneratorFactory.java | 49 ++
.../tools/util/featuregen/GeneratorFactory.java | 851 +++++++++------------
.../POSTaggerNameFeatureGeneratorFactory.java | 76 ++
.../PosTaggerFeatureGeneratorFactory.java | 49 ++
.../featuregen/PrefixFeatureGeneratorFactory.java | 63 ++
.../PreviousMapFeatureGeneratorFactory.java | 53 ++
.../SentenceFeatureGeneratorFactory.java | 65 ++
.../featuregen/SuffixFeatureGeneratorFactory.java | 63 ++
.../TokenClassFeatureGeneratorFactory.java | 66 ++
...ator.java => TokenFeatureGeneratorFactory.java} | 33 +-
.../TokenPatternFeatureGeneratorFactory.java | 52 ++
.../featuregen/WindowFeatureGeneratorFactory.java | 104 +++
.../WordClusterFeatureGeneratorFactory.java | 89 +++
.../tools/namefind/ner-default-features.xml | 41 +-
...atures.xml => ner-default-features_classic.xml} | 0
.../opennlp/tools/postag/pos-default-features.xml | 76 +-
...atures.xml => pos-default-features_classic.xml} | 0
.../FeatureGenWithSerializerMapping.java | 1 +
...java => GeneratorFactoryClassicFormatTest.java} | 19 +-
.../util/featuregen/GeneratorFactoryTest.java | 101 ++-
.../opennlp/tools/namefind/ner-pos-features.xml | 49 +-
...s-features.xml => ner-pos-features_classic.xml} | 2 +-
... CustomClassLoadingWithSerializers_classic.xml} | 0
...sLoading.xml => CustomClassLoading_classic.xml} | 0
.../FeatureGeneratorConfigWithUnkownElement.xml | 5 +-
...reGeneratorConfigWithUnkownElement_classic.xml} | 0
... TestDictionarySerializerMappingExtraction.xml} | 8 +-
...tionarySerializerMappingExtraction_classic.xml} | 0
....xml => TestFeatureGeneratorConfig_classic.xml} | 1 -
...hUnkownElement.xml => TestParametersConfig.xml} | 19 +-
...stTokenClassFeatureGeneratorConfig_classic.xml} | 4 +-
42 files changed, 1970 insertions(+), 635 deletions(-)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AggregatedFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AggregatedFeatureGeneratorFactory.java
new file mode 100644
index 0000000..e47dee5
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AggregatedFeatureGeneratorFactory.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * @see AggregatedFeatureGenerator
+ */
+public class AggregatedFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public AggregatedFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+
+ Collection<AdaptiveFeatureGenerator> aggregatedGenerators = new
LinkedList<>();
+
+ NodeList childNodes = generatorElement.getChildNodes();
+
+ for (int i = 0; i < childNodes.getLength(); i++) {
+ Node childNode = childNodes.item(i);
+ if (childNode instanceof Element) {
+ Element aggregatedGeneratorElement = (Element) childNode;
+ aggregatedGenerators.add(
+ GeneratorFactory.createGenerator(aggregatedGeneratorElement,
resourceManager));
+ }
+ }
+
+ return new AggregatedFeatureGenerator(aggregatedGenerators.toArray(
+ new AdaptiveFeatureGenerator[aggregatedGenerators.size()]));
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("generators", new AggregatedFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ List<AdaptiveFeatureGenerator> aggregatedGenerators = new ArrayList<>();
+ for (Map.Entry<String, Object> arg: args.entrySet()) {
+ if (arg.getKey().startsWith("generator#")) {
+ aggregatedGenerators.add((AdaptiveFeatureGenerator)arg.getValue());
+ }
+ }
+ return new AggregatedFeatureGenerator(aggregatedGenerators.toArray(
+ new AdaptiveFeatureGenerator[aggregatedGenerators.size()]));
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/ArtifactToSerializerMapper.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/ArtifactToSerializerMapper.java
index 1a0b68b..f472318 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/ArtifactToSerializerMapper.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/ArtifactToSerializerMapper.java
@@ -21,6 +21,7 @@ import java.util.Map;
import opennlp.tools.util.model.ArtifactSerializer;
+@Deprecated // TODO: (OPENNLP-1174) remove back-compat support when it is
unnecessary
public interface ArtifactToSerializerMapper {
Map<String, ArtifactSerializer<?>> getArtifactSerializerMapping();
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorFactory.java
new file mode 100644
index 0000000..c1f666a
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGeneratorFactory.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+
+public class BigramNameFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public BigramNameFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) {
+
+ return new BigramNameFeatureGenerator();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("bigram", new BigramNameFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new BigramNameFeatureGenerator();
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownClusterBigramFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownClusterBigramFeatureGeneratorFactory.java
new file mode 100644
index 0000000..668a2b9
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownClusterBigramFeatureGeneratorFactory.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.ArtifactSerializer;
+
+/**
+ * Generates Brown clustering features for token bigrams.
+ */
+public class BrownClusterBigramFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public BrownClusterBigramFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+
+ String dictResourceKey = generatorElement.getAttribute("dict");
+
+ Object dictResource = resourceManager.getResource(dictResourceKey);
+
+
+ if (!(dictResource instanceof BrownCluster)) {
+ throw new InvalidFormatException("Not a BrownLexicon resource for key: "
+ dictResourceKey);
+ }
+
+ return new BrownBigramFeatureGenerator((BrownCluster) dictResource);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("brownclusterbigram", new
BrownClusterBigramFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ // if resourceManager is null, we don't instantiate
+ if (resourceManager == null)
+ return null;
+
+ String dictResourceKey = getStr("dict");
+ Object dictResource = resourceManager.getResource(dictResourceKey);
+ if (!(dictResource instanceof BrownCluster)) {
+ throw new InvalidFormatException("Not a BrownLexicon resource for key: "
+ dictResourceKey);
+ }
+
+ return new BrownBigramFeatureGenerator((BrownCluster) dictResource);
+ }
+
+ @Override
+ public Map<String, ArtifactSerializer<?>> getArtifactSerializerMapping()
throws InvalidFormatException {
+ Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
+ mapping.put(getStr("dict"), new BrownCluster.BrownClusterSerializer());
+ return mapping;
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownClusterTokenClassFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownClusterTokenClassFeatureGeneratorFactory.java
new file mode 100644
index 0000000..905762b
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownClusterTokenClassFeatureGeneratorFactory.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.ArtifactSerializer;
+
+/**
+ * Generates Brown clustering features for token classes.
+ */
+public class BrownClusterTokenClassFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public BrownClusterTokenClassFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+
+ String dictResourceKey = generatorElement.getAttribute("dict");
+
+ Object dictResource = resourceManager.getResource(dictResourceKey);
+
+
+ if (!(dictResource instanceof BrownCluster)) {
+ throw new InvalidFormatException("Not a BrownLexicon resource for key: "
+ dictResourceKey);
+ }
+
+ return new BrownTokenClassFeatureGenerator((BrownCluster) dictResource);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("brownclustertokenclass", new
BrownClusterTokenClassFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ // if resourceManager is null, we don't instantiate
+ if (resourceManager == null)
+ return null;
+
+ String dictResourceKey = getStr("dict");
+ Object dictResource = resourceManager.getResource(dictResourceKey);
+ if (!(dictResource instanceof BrownCluster)) {
+ throw new InvalidFormatException("Not a BrownLexicon resource for key: "
+ dictResourceKey);
+ }
+
+ return new BrownTokenClassFeatureGenerator((BrownCluster) dictResource);
+ }
+
+ @Override
+ public Map<String, ArtifactSerializer<?>> getArtifactSerializerMapping()
throws InvalidFormatException {
+ Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
+ mapping.put(getStr("dict"), new BrownCluster.BrownClusterSerializer());
+ return mapping;
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownClusterTokenFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownClusterTokenFeatureGeneratorFactory.java
new file mode 100644
index 0000000..3d866ac
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownClusterTokenFeatureGeneratorFactory.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.ArtifactSerializer;
+
+/**
+ * Generates Brown clustering features for current token.
+ */
+public class BrownClusterTokenFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public BrownClusterTokenFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+
+ String dictResourceKey = generatorElement.getAttribute("dict");
+
+ Object dictResource = resourceManager.getResource(dictResourceKey);
+
+
+ if (!(dictResource instanceof BrownCluster)) {
+ throw new InvalidFormatException("Not a BrownLexicon resource for key: "
+ dictResourceKey);
+ }
+
+ return new BrownTokenFeatureGenerator((BrownCluster) dictResource);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("brownclustertoken", new
BrownClusterTokenFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ // if resourceManager is null, we don't instantiate
+ if (resourceManager == null)
+ return null;
+
+ String dictResourceKey = getStr("dict");
+ Object dictResource = resourceManager.getResource(dictResourceKey);
+ if (!(dictResource instanceof BrownCluster)) {
+ throw new InvalidFormatException("Not a BrownLexicon resource for key: "
+ dictResourceKey);
+ }
+
+ return new BrownTokenFeatureGenerator((BrownCluster) dictResource);
+ }
+
+ @Override
+ public Map<String, ArtifactSerializer<?>> getArtifactSerializerMapping()
throws InvalidFormatException {
+ Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
+ mapping.put(getStr("dict"), new BrownCluster.BrownClusterSerializer());
+ return mapping;
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorFactory.java
new file mode 100644
index 0000000..e59b449
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGeneratorFactory.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+
+import java.util.Map;
+
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * @see CachedFeatureGenerator
+ */
+public class CachedFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public CachedFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+
+ Element cachedGeneratorElement = null;
+
+ NodeList kids = generatorElement.getChildNodes();
+
+ for (int i = 0; i < kids.getLength(); i++) {
+ Node childNode = kids.item(i);
+
+ if (childNode instanceof Element) {
+ cachedGeneratorElement = (Element) childNode;
+ break;
+ }
+ }
+
+ if (cachedGeneratorElement == null) {
+ throw new InvalidFormatException("Could not find containing generator
element!");
+ }
+
+ AdaptiveFeatureGenerator cachedGenerator =
+ GeneratorFactory.createGenerator(cachedGeneratorElement,
resourceManager);
+
+ return new CachedFeatureGenerator(cachedGenerator);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("cache", new CachedFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ AdaptiveFeatureGenerator generator =
(AdaptiveFeatureGenerator)args.get("generator#0");
+ if (generator == null) {
+ throw new InvalidFormatException("Could not find containing generator
element!");
+ }
+ return new CachedFeatureGenerator(generator);
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorFactory.java
new file mode 100644
index 0000000..b375626
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CharacterNgramFeatureGeneratorFactory.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * @see CharacterNgramFeatureGenerator
+ */
+public class CharacterNgramFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public CharacterNgramFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+
+ String minString = generatorElement.getAttribute("min");
+
+ int min;
+
+ try {
+ min = Integer.parseInt(minString);
+ } catch (NumberFormatException e) {
+ throw new InvalidFormatException("min attribute '" + minString + "' is
not a number!", e);
+ }
+
+ String maxString = generatorElement.getAttribute("max");
+
+ int max;
+
+ try {
+ max = Integer.parseInt(maxString);
+ } catch (NumberFormatException e) {
+ throw new InvalidFormatException("max attribute '" + maxString + "' is
not a number!", e);
+ }
+
+ return new CharacterNgramFeatureGenerator(min, max);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("charngram", new CharacterNgramFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new CharacterNgramFeatureGenerator(getInt("min"), getInt("max"));
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java
index 55d6332..81245fc 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java
@@ -21,6 +21,7 @@ import java.util.Map;
import opennlp.tools.util.InvalidFormatException;
+@Deprecated // TODO: (OPENNLP-1174) remove back-compat support when it is
unnecessary
public abstract class CustomFeatureGenerator implements
AdaptiveFeatureGenerator {
/**
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DefinitionFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DefinitionFeatureGeneratorFactory.java
new file mode 100644
index 0000000..e33bacb
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DefinitionFeatureGeneratorFactory.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * @see DefinitionFeatureGeneratorFactory
+ */
+public class DefinitionFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ private static final String ELEMENT_NAME = "definition";
+
+ public DefinitionFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+ return new OutcomePriorFeatureGenerator();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put(ELEMENT_NAME, new DefinitionFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new OutcomePriorFeatureGenerator();
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGeneratorFactory.java
new file mode 100644
index 0000000..50b70e8
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGeneratorFactory.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.ArtifactSerializer;
+import opennlp.tools.util.model.DictionarySerializer;
+
+/**
+ * @see DictionaryFeatureGenerator
+ */
+public class DictionaryFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public DictionaryFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+
+ String dictResourceKey = generatorElement.getAttribute("dict");
+
+ Object dictResource = resourceManager.getResource(dictResourceKey);
+
+ if (!(dictResource instanceof Dictionary)) {
+ throw new InvalidFormatException("No dictionary resource for key: " +
dictResourceKey);
+ }
+
+ String prefix = generatorElement.getAttribute("prefix");
+
+ return new DictionaryFeatureGenerator(prefix, (Dictionary) dictResource);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("dictionary", new DictionaryFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ // if resourceManager is null, we don't instantiate
+ if (resourceManager == null)
+ return null;
+
+ String dictResourceKey = getStr("dict");
+ Object dictResource = resourceManager.getResource(dictResourceKey);
+ if (!(dictResource instanceof Dictionary)) {
+ throw new InvalidFormatException("No dictionary resource for key: " +
dictResourceKey);
+ }
+
+ return new DictionaryFeatureGenerator(getStr("prefix"), (Dictionary)
dictResource);
+ }
+
+ @Override
+ public Map<String, ArtifactSerializer<?>> getArtifactSerializerMapping()
throws InvalidFormatException {
+ Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
+ mapping.put(getStr("dict"), new DictionarySerializer());
+ return mapping;
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGeneratorFactory.java
new file mode 100644
index 0000000..8256f8e
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DocumentBeginFeatureGeneratorFactory.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+
+public class DocumentBeginFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public DocumentBeginFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) {
+ return new DocumentBeginFeatureGenerator();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("docbegin", new DocumentBeginFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new DocumentBeginFeatureGenerator();
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index 94bca02..26de2f9 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -19,13 +19,13 @@ package opennlp.tools.util.featuregen;
import java.io.IOException;
import java.io.InputStream;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.HashMap;
-import java.util.LinkedList;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import java.util.Objects;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.xpath.XPath;
@@ -38,10 +38,9 @@ import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
+import org.w3c.dom.Text;
import org.xml.sax.SAXException;
-import opennlp.tools.dictionary.Dictionary;
-import opennlp.tools.postag.POSModel;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.XmlUtil;
import opennlp.tools.util.ext.ExtensionLoader;
@@ -54,20 +53,29 @@ import opennlp.tools.util.model.POSModelSerializer;
*
* Example of an XML descriptor:
*<p>
- * <generators>
- * <charngram min = "2" max = "5"/>
- * <definition/>
- * <cache>
- * <window prevLength = "3" nextLength = "3">
- * <generators>
- * <prevmap/>
- * <sentence/>
- * <tokenclass/>
- * <tokenpattern/>
- * </generators>
- * </window>
- * </cache>
- * </generators>
+ * <generator
class="opennlp.tools.util.featuregen.AggregatedFeatureGeneratorFactory">
+ * <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
+ * <generator
class="opennlp.tools.util.featuregen.AggregatedFeatureGeneratorFactory">
+ * <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ * <int name="prevLength">2</int>
+ * <int name="nextLength">2</int>
+ * <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ * </generator>
+ * <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ * <int name="prevLength">2</int>
+ * <int name="nextLength">2</int>
+ * <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ * </generator>
+ * <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ * <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ * <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ * <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ * <bool name="begin">true</bool>
+ * <bool name="end">false</bool>
+ * </generator>
+ * </generator>
+ * </generator>
+ * </generator>
* </p>
*
* Each XML element is mapped to a {@link
GeneratorFactory.XmlFeatureGeneratorFactory} which
@@ -80,7 +88,7 @@ import opennlp.tools.util.model.POSModelSerializer;
* method.
*
* In the example above the generators element is mapped to the
- * {@link GeneratorFactory.AggregatedFeatureGeneratorFactory} which then
+ * {@link AggregatedFeatureGeneratorFactory} which then
* creates all the aggregated {@link AdaptiveFeatureGenerator}s to
* accomplish this it evaluates the mapping with the same mechanism
* and gives the child element to the corresponding factories. All
@@ -94,6 +102,7 @@ public class GeneratorFactory {
* an {@link AdaptiveFeatureGenerator} from an given XML {@link Element}
* which contains all necessary configuration if any.
*/
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
interface XmlFeatureGeneratorFactory {
/**
@@ -110,527 +119,236 @@ public class GeneratorFactory {
FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException;
}
- /**
- * @see AggregatedFeatureGenerator
- */
- static class AggregatedFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
+ public static abstract class AbstractXmlFeatureGeneratorFactory {
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+ protected Element generatorElement;
+ protected FeatureGeneratorResourceProvider resourceManager;
- Collection<AdaptiveFeatureGenerator> aggregatedGenerators = new
LinkedList<>();
+ // to respect the order <generator/> in AggregatedFeatureGenerator, let's
use LinkedHashMap
+ protected LinkedHashMap<String, Object> args;
- NodeList childNodes = generatorElement.getChildNodes();
+ public AbstractXmlFeatureGeneratorFactory() {
+ args = new LinkedHashMap<>();
+ }
+ public Map<String, ArtifactSerializer<?>>
+ getArtifactSerializerMapping() throws InvalidFormatException {
+ return null;
+ }
+
+ final void init(Element element, FeatureGeneratorResourceProvider
resourceManager)
+ throws InvalidFormatException {
+ this.generatorElement = element;
+ this.resourceManager = resourceManager;
+ int generators = 0;
+ NodeList childNodes = generatorElement.getChildNodes();
for (int i = 0; i < childNodes.getLength(); i++) {
Node childNode = childNodes.item(i);
if (childNode instanceof Element) {
- Element aggregatedGeneratorElement = (Element) childNode;
- aggregatedGenerators.add(
- GeneratorFactory.createGenerator(aggregatedGeneratorElement,
resourceManager));
+ Element elem = (Element)childNode;
+ String type = elem.getTagName();
+ if (type.equals("generator")) {
+ String key = "generator#" + Integer.toString(generators++);
+ AdaptiveFeatureGenerator afg = buildGenerator(elem,
resourceManager);
+ if (afg != null)
+ args.put(key, afg);
+ }
+ else {
+ String name = elem.getAttribute("name");
+ Node cn = elem.getFirstChild();
+ Text text = (Text)cn;
+
+ switch (type) {
+ case "int" :
+ args.put(name, Integer.parseInt(text.getWholeText()));
+ break;
+ case "long" :
+ args.put(name, Long.parseLong(text.getWholeText()));
+ break;
+ case "float" :
+ args.put(name, Float.parseFloat(text.getWholeText()));
+ break;
+ case "double" :
+ args.put(name, Double.parseDouble(text.getWholeText()));
+ break;
+ case "str" :
+ args.put(name, text.getWholeText());
+ break;
+ case "bool" :
+ args.put(name, Boolean.parseBoolean(text.getWholeText()));
+ break;
+ default:
+ throw new InvalidFormatException(
+ "child element must be one of generator, int, long, float,
double," +
+ " str or bool");
+ }
+ }
}
}
-
- return new AggregatedFeatureGenerator(aggregatedGenerators.toArray(
- new AdaptiveFeatureGenerator[aggregatedGenerators.size()]));
}
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("generators", new AggregatedFeatureGeneratorFactory());
- }
- }
-
- /**
- * @see CachedFeatureGenerator
- */
- static class CachedFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- private CachedFeatureGeneratorFactory() {
- }
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
-
- Element cachedGeneratorElement = null;
-
- NodeList kids = generatorElement.getChildNodes();
-
- for (int i = 0; i < kids.getLength(); i++) {
- Node childNode = kids.item(i);
-
- if (childNode instanceof Element) {
- cachedGeneratorElement = (Element) childNode;
- break;
- }
+ public int getInt(String name) throws InvalidFormatException {
+ Object value = args.get(name);
+ if (value == null) {
+ throw new InvalidFormatException("parameter " + name + " must be
set!");
}
-
- if (cachedGeneratorElement == null) {
- throw new InvalidFormatException("Could not find containing generator
element!");
+ else if (value instanceof Integer) {
+ return (Integer)value;
+ }
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
integer!");
}
-
- AdaptiveFeatureGenerator cachedGenerator =
- GeneratorFactory.createGenerator(cachedGeneratorElement,
resourceManager);
-
- return new CachedFeatureGenerator(cachedGenerator);
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("cache", new CachedFeatureGeneratorFactory());
}
- }
-
- /**
- * @see CharacterNgramFeatureGenerator
- */
- static class CharacterNgramFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
-
- String minString = generatorElement.getAttribute("min");
- int min;
-
- try {
- min = Integer.parseInt(minString);
- } catch (NumberFormatException e) {
- throw new InvalidFormatException("min attribute '" + minString + "' is
not a number!", e);
+ public int getInt(String name, int defValue) throws InvalidFormatException
{
+ Object value = args.get(name);
+ if (value == null) {
+ return defValue;
}
-
- String maxString = generatorElement.getAttribute("max");
-
- int max;
-
- try {
- max = Integer.parseInt(maxString);
- } catch (NumberFormatException e) {
- throw new InvalidFormatException("max attribute '" + maxString + "' is
not a number!", e);
+ else if (value instanceof Integer) {
+ return (Integer)value;
}
-
- return new CharacterNgramFeatureGenerator(min, max);
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("charngram", new CharacterNgramFeatureGeneratorFactory());
- }
- }
-
- /**
- * @see DefinitionFeatureGeneratorFactory
- */
- static class DefinitionFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- private static final String ELEMENT_NAME = "definition";
-
- private DefinitionFeatureGeneratorFactory() {
- }
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
- return new OutcomePriorFeatureGenerator();
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put(ELEMENT_NAME, new DefinitionFeatureGeneratorFactory());
- }
- }
-
- /**
- * @see DictionaryFeatureGenerator
- */
- static class DictionaryFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
-
- String dictResourceKey = generatorElement.getAttribute("dict");
-
- Object dictResource = resourceManager.getResource(dictResourceKey);
-
- if (!(dictResource instanceof Dictionary)) {
- throw new InvalidFormatException("No dictionary resource for key: " +
dictResourceKey);
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
integer!");
}
-
- String prefix = generatorElement.getAttribute("prefix");
-
- return new DictionaryFeatureGenerator(prefix, (Dictionary) dictResource);
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("dictionary", new DictionaryFeatureGeneratorFactory());
- }
- }
-
- static class DocumentBeginFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) {
- return new DocumentBeginFeatureGenerator();
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("docbegin", new DocumentBeginFeatureGeneratorFactory());
}
- }
-
- /**
- * Defines a word cluster generator factory; it reads an element containing
- * 'w2vwordcluster' as a tag name; these clusters are typically produced by
- * word2vec or clark pos induction systems.
- */
- static class WordClusterFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
-
- String dictResourceKey = generatorElement.getAttribute("dict");
- boolean lowerCaseDictionary =
"true".equals(generatorElement.getAttribute("lowerCase"));
- Object dictResource = resourceManager.getResource(dictResourceKey);
-
-
- if (!(dictResource instanceof WordClusterDictionary)) {
- throw new InvalidFormatException("Not a WordClusterDictionary resource
for key: "
- + dictResourceKey);
+ public long getLong(String name) throws InvalidFormatException {
+ Object value = args.get(name);
+ if (value == null) {
+ throw new InvalidFormatException("parameter " + name + " must be
set!");
}
-
- return new WordClusterFeatureGenerator((WordClusterDictionary)
dictResource,
- dictResourceKey, lowerCaseDictionary);
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("wordcluster", new WordClusterFeatureGeneratorFactory());
- }
- }
-
- /**
- * Generates Brown clustering features for current token.
- */
- static class BrownClusterTokenFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
-
- String dictResourceKey = generatorElement.getAttribute("dict");
-
- Object dictResource = resourceManager.getResource(dictResourceKey);
-
-
- if (!(dictResource instanceof BrownCluster)) {
- throw new InvalidFormatException("Not a BrownLexicon resource for key:
" + dictResourceKey);
+ else if (value instanceof Long) {
+ return (Long)value;
}
-
- return new BrownTokenFeatureGenerator((BrownCluster) dictResource);
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("brownclustertoken", new
BrownClusterTokenFeatureGeneratorFactory());
- }
- }
-
- /**
- * Generates Brown clustering features for token classes.
- */
- static class BrownClusterTokenClassFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
-
- String dictResourceKey = generatorElement.getAttribute("dict");
-
- Object dictResource = resourceManager.getResource(dictResourceKey);
-
-
- if (!(dictResource instanceof BrownCluster)) {
- throw new InvalidFormatException("Not a BrownLexicon resource for key:
" + dictResourceKey);
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
long!");
}
-
- return new BrownTokenClassFeatureGenerator((BrownCluster) dictResource);
}
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("brownclustertokenclass", new
BrownClusterTokenClassFeatureGeneratorFactory());
- }
- }
-
- /**
- * Generates Brown clustering features for token bigrams.
- */
- static class BrownClusterBigramFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
-
- String dictResourceKey = generatorElement.getAttribute("dict");
-
- Object dictResource = resourceManager.getResource(dictResourceKey);
-
-
- if (!(dictResource instanceof BrownCluster)) {
- throw new InvalidFormatException("Not a BrownLexicon resource for key:
" + dictResourceKey);
+ public long getLong(String name, long defValue) throws
InvalidFormatException {
+ Object value = args.get(name);
+ if (value == null) {
+ return defValue;
}
-
- return new BrownBigramFeatureGenerator((BrownCluster) dictResource);
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("brownclusterbigram", new
BrownClusterBigramFeatureGeneratorFactory());
- }
- }
-
- /**
- * @see PreviousMapFeatureGenerator
- */
- static class PreviousMapFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) {
- return new PreviousMapFeatureGenerator();
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("prevmap", new PreviousMapFeatureGeneratorFactory());
- }
- }
-
- // TODO: Add parameters ...
-
- /**
- * @see SentenceFeatureGenerator
- */
- static class SentenceFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) {
-
- String beginFeatureString = generatorElement.getAttribute("begin");
-
- boolean beginFeature = true;
- if (beginFeatureString.length() != 0)
- beginFeature = Boolean.parseBoolean(beginFeatureString);
-
- String endFeatureString = generatorElement.getAttribute("end");
- boolean endFeature = true;
- if (endFeatureString.length() != 0)
- endFeature = Boolean.parseBoolean(endFeatureString);
-
- return new SentenceFeatureGenerator(beginFeature, endFeature);
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("sentence", new SentenceFeatureGeneratorFactory());
- }
- }
-
- /**
- * @see TokenClassFeatureGenerator
- */
- static class TokenClassFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) {
-
- String attribute = generatorElement.getAttribute("wordAndClass");
-
- // Default to true.
- boolean generateWordAndClassFeature = true;
-
- if (!Objects.equals(attribute, "")) {
- // Anything other than "true" sets it to false.
- if (!"true".equalsIgnoreCase(attribute)) {
- generateWordAndClassFeature = false;
- }
+ else if (value instanceof Long) {
+ return (Long)value;
+ }
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
long!");
}
-
- return new TokenClassFeatureGenerator(generateWordAndClassFeature);
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("tokenclass", new TokenClassFeatureGeneratorFactory());
- }
- }
-
- static class TokenFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) {
-
- return new TokenFeatureGenerator();
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("token", new TokenFeatureGeneratorFactory());
- }
- }
-
- static class BigramNameFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) {
-
- return new BigramNameFeatureGenerator();
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("bigram", new BigramNameFeatureGeneratorFactory());
- }
- }
-
- /**
- * @see TokenPatternFeatureGenerator
- */
- static class TokenPatternFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) {
- return new TokenPatternFeatureGenerator();
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("tokenpattern", new
TokenPatternFeatureGeneratorFactory());
- }
- }
-
- static class PosTaggerFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider
resourceManager) {
- return new PosTaggerFeatureGenerator();
- }
-
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("postagger", new PosTaggerFeatureGeneratorFactory());
}
- }
-
- /**
- * @see WindowFeatureGenerator
- */
- static class WindowFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
-
- Element nestedGeneratorElement = null;
- NodeList kids = generatorElement.getChildNodes();
-
- for (int i = 0; i < kids.getLength(); i++) {
- Node childNode = kids.item(i);
-
- if (childNode instanceof Element) {
- nestedGeneratorElement = (Element) childNode;
- break;
- }
+ public float getFloat(String name) throws InvalidFormatException {
+ Object value = args.get(name);
+ if (value == null) {
+ throw new InvalidFormatException("parameter " + name + " must be
set!");
}
-
- if (nestedGeneratorElement == null) {
- throw new InvalidFormatException("window feature generator must
contain" +
- " an aggregator element");
+ else if (value instanceof Float) {
+ return (Float)value;
}
-
- AdaptiveFeatureGenerator nestedGenerator =
- GeneratorFactory.createGenerator(nestedGeneratorElement,
resourceManager);
-
- String prevLengthString = generatorElement.getAttribute("prevLength");
-
- int prevLength;
-
- try {
- prevLength = Integer.parseInt(prevLengthString);
- } catch (NumberFormatException e) {
- throw new InvalidFormatException("prevLength attribute '" +
prevLengthString
- + "' is not a number!", e);
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
float!");
}
+ }
- String nextLengthString = generatorElement.getAttribute("nextLength");
-
- int nextLength;
-
- try {
- nextLength = Integer.parseInt(nextLengthString);
- } catch (NumberFormatException e) {
- throw new InvalidFormatException("nextLength attribute '" +
nextLengthString
- + "' is not a number!", e);
+ public float getFloat(String name, float defValue) throws
InvalidFormatException {
+ Object value = args.get(name);
+ if (value == null) {
+ return defValue;
+ }
+ else if (value instanceof Float) {
+ return (Float)value;
+ }
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
float!");
}
-
- return new WindowFeatureGenerator(nestedGenerator, prevLength,
nextLength);
}
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("window", new WindowFeatureGeneratorFactory());
+ public double getDouble(String name) throws InvalidFormatException {
+ Object value = args.get(name);
+ if (value == null) {
+ throw new InvalidFormatException("parameter " + name + " must be
set!");
+ }
+ else if (value instanceof Double) {
+ return (Double)value;
+ }
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
double!");
+ }
}
- }
-
- /**
- * @see TokenPatternFeatureGenerator
- */
- static class PrefixFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) {
-
- String attribute = generatorElement.getAttribute("length");
-
- int prefixLength = PrefixFeatureGenerator.DEFAULT_MAX_LENGTH;
-
- if (!Objects.equals(attribute, "")) {
- prefixLength = Integer.parseInt(attribute);
+ public double getDouble(String name, double defValue) throws
InvalidFormatException {
+ Object value = args.get(name);
+ if (value == null) {
+ return defValue;
+ }
+ else if (value instanceof Double) {
+ return (Double)value;
+ }
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
double!");
}
-
- return new PrefixFeatureGenerator(prefixLength);
}
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("prefix", new PrefixFeatureGeneratorFactory());
+ public String getStr(String name) throws InvalidFormatException {
+ Object value = args.get(name);
+ if (value == null) {
+ throw new InvalidFormatException("parameter " + name + " must be
set!");
+ }
+ else if (value instanceof String) {
+ return (String)value;
+ }
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
double!");
+ }
}
- }
-
- /**
- * @see TokenPatternFeatureGenerator
- */
- static class SuffixFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider resourceManager) {
-
- String attribute = generatorElement.getAttribute("length");
-
- int suffixLength = SuffixFeatureGenerator.DEFAULT_MAX_LENGTH;
-
- if (!Objects.equals(attribute, "")) {
- suffixLength = Integer.parseInt(attribute);
+ public String getStr(String name, String defValue) throws
InvalidFormatException {
+ Object value = args.get(name);
+ if (value == null) {
+ return defValue;
+ }
+ else if (value instanceof String) {
+ return (String)value;
+ }
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
String!");
}
-
- return new SuffixFeatureGenerator(suffixLength);
}
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("suffix", new SuffixFeatureGeneratorFactory());
+ public boolean getBool(String name) throws InvalidFormatException {
+ Object value = args.get(name);
+ if (value == null) {
+ throw new InvalidFormatException("parameter " + name + " must be
set!");
+ }
+ else if (value instanceof Boolean) {
+ return (Boolean)value;
+ }
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
boolean!");
+ }
}
- }
-
-
-
- /**
- * @see TokenPatternFeatureGenerator
- */
- static class POSTaggerNameFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
-
- public AdaptiveFeatureGenerator create(Element generatorElement,
- FeatureGeneratorResourceProvider
resourceManager)
- throws InvalidFormatException {
-
- String modelResourceKey = generatorElement.getAttribute("model");
-
- POSModel model = (POSModel)resourceManager.getResource(modelResourceKey);
-
- return new POSTaggerNameFeatureGenerator(model);
+ public boolean getBool(String name, boolean defValue) throws
InvalidFormatException {
+ Object value = args.get(name);
+ if (value == null) {
+ return defValue;
+ }
+ else if (value instanceof Boolean) {
+ return (Boolean)value;
+ }
+ else {
+ throw new InvalidFormatException("parameter " + name + " must be
boolean!");
+ }
}
- static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
- factoryMap.put("tokenpos", new POSTaggerNameFeatureGeneratorFactory());
- }
+ /**
+ *
+ * @return null if the subclass uses {@link #resourceManager} to
instantiate
+ * @throws InvalidFormatException
+ */
+ public abstract AdaptiveFeatureGenerator create() throws
InvalidFormatException;
}
// TODO: We have to support custom resources here. How does it work ?!
@@ -642,6 +360,7 @@ public class GeneratorFactory {
// When training, the descriptor could be consulted first to register the
serializers, and afterwards
// they are stored in the model.
+ // TODO: (OPENNLP-1174) just remove this class when back-compat is no longer
needed
static class CustomFeatureGeneratorFactory implements
XmlFeatureGeneratorFactory {
public AdaptiveFeatureGenerator create(Element generatorElement,
@@ -680,8 +399,10 @@ public class GeneratorFactory {
}
}
+ // TODO: (OPENNLP-1174) just remove when back-compat is no longer needed
private static Map<String, XmlFeatureGeneratorFactory> factories = new
HashMap<>();
+ // TODO: (OPENNLP-1174) just remove when back-compat is no longer needed
static {
AggregatedFeatureGeneratorFactory.register(factories);
CachedFeatureGeneratorFactory.register(factories);
@@ -718,18 +439,85 @@ public class GeneratorFactory {
*
* @return
*/
+ @Deprecated // TODO: (OPENNLP-1174) remove back-compat support when it is
unnecessary
static AdaptiveFeatureGenerator createGenerator(Element generatorElement,
FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
String elementName = generatorElement.getTagName();
- XmlFeatureGeneratorFactory generatorFactory = factories.get(elementName);
+ // check it is new format?
+ if (elementName.equals("featureGenerators")) {
+ Element firstElem = getFirstChild(generatorElement);
+ if (firstElem != null) {
+ if (firstElem.getTagName().equals("generator")) {
+ return buildGenerator(firstElem, resourceManager);
+ }
+ else
+ throw new InvalidFormatException("Unexpected element: " +
elementName);
+ }
+ else
+ throw new InvalidFormatException("featureGenerators must have one or
more generators");
+ }
+ else {
+ // support classic format
+ XmlFeatureGeneratorFactory generatorFactory = factories.get(elementName);
+ if (generatorFactory != null) {
+ return generatorFactory.create(generatorElement, resourceManager);
+ }
+ else
+ throw new InvalidFormatException("Unexpected element: " + elementName);
+ }
+ }
- if (generatorFactory == null) {
- throw new InvalidFormatException("Unexpected element: " + elementName);
+ static Element getFirstChild(Element elem) {
+ NodeList nodes = elem.getChildNodes();
+ for (int i = 0; i < nodes.getLength(); i++) {
+ if (nodes.item(i) instanceof Element) {
+ return (Element)nodes.item(i);
+ }
}
+ return null;
+ }
- return generatorFactory.create(generatorElement, resourceManager);
+ /**
+ * Creates a {@link AdaptiveFeatureGenerator} for the provided element.
+ * To accomplish this it looks up the corresponding factory by the
+ * element tag name. The factory is then responsible for the creation
+ * of the generator from the element.
+ *
+ * @param generatorElement
+ * @param resourceManager
+ *
+ * @return
+ */
+ static AdaptiveFeatureGenerator buildGenerator(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+ String className = generatorElement.getAttribute("class");
+ if (className == null) {
+ throw new InvalidFormatException("generator must have class attribute");
+ }
+ else {
+ try {
+ Class factoryClass = Class.forName(className);
+ try {
+ Constructor constructor = factoryClass.getConstructor();
+ AbstractXmlFeatureGeneratorFactory factory =
+ (AbstractXmlFeatureGeneratorFactory)constructor.newInstance();
+ factory.init(generatorElement, resourceManager);
+ return factory.create();
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException(e);
+ } catch (InvocationTargetException e) {
+ throw new RuntimeException(e);
+ } catch (InstantiationException e) {
+ throw new RuntimeException(e);
+ } catch (IllegalAccessException e) {
+ throw new RuntimeException(e);
+ }
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ }
}
private static org.w3c.dom.Document createDOM(InputStream xmlDescriptorIn)
@@ -772,22 +560,87 @@ public class GeneratorFactory {
Element generatorElement = xmlDescriptorDOM.getDocumentElement();
+ // TODO: (OPENNLP-1174) use #buildGenerator() after back-compat support is
gone
return createGenerator(generatorElement, resourceManager);
}
public static Map<String, ArtifactSerializer<?>>
extractArtifactSerializerMappings(
InputStream xmlDescriptorIn) throws IOException {
- Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
-
org.w3c.dom.Document xmlDescriptorDOM = createDOM(xmlDescriptorIn);
+ Element element = xmlDescriptorDOM.getDocumentElement();
+
+ String elementName = element.getTagName();
+
+ // check it is new format?
+ if (elementName.equals("featureGenerators")) {
+ Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
+ NodeList nodes = element.getChildNodes();
+ for (int i = 0; i < nodes.getLength(); i++) {
+ if (nodes.item(i) instanceof Element) {
+ Element childElem = (Element)nodes.item(i);
+ if (childElem.getTagName().equals("generator")) {
+ extractArtifactSerializerMappings(mapping, childElem);
+ }
+ }
+ }
+ return mapping;
+ }
+ else {
+ return extractArtifactSerializerMappingsClassicFormat(element);
+ }
+ }
+
+ static void extractArtifactSerializerMappings(Map<String,
ArtifactSerializer<?>> mapping, Element element) {
+ String className = element.getAttribute("class");
+ if (className != null) {
+ try {
+ Class factoryClass = Class.forName(className);
+ try {
+ Constructor constructor = factoryClass.getConstructor();
+ AbstractXmlFeatureGeneratorFactory factory =
+ (AbstractXmlFeatureGeneratorFactory)constructor.newInstance();
+ factory.init(element, null);
+ Map<String, ArtifactSerializer<?>> map =
factory.getArtifactSerializerMapping();
+ if (map != null)
+ mapping.putAll(map);
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException(e);
+ } catch (InvocationTargetException e) {
+ throw new RuntimeException(e);
+ } catch (InstantiationException e) {
+ throw new RuntimeException(e);
+ } catch (IllegalAccessException e) {
+ throw new RuntimeException(e);
+ } catch (InvalidFormatException ignored) {
+ }
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ NodeList nodes = element.getChildNodes();
+ for (int i = 0; i < nodes.getLength(); i++) {
+ if (nodes.item(i) instanceof Element) {
+ Element childElem = (Element)nodes.item(i);
+ if (childElem.getTagName().equals("generator")) {
+ extractArtifactSerializerMappings(mapping, childElem);
+ }
+ }
+ }
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) remove back-compat support when it is
unnecessary
+ static Map<String, ArtifactSerializer<?>>
extractArtifactSerializerMappingsClassicFormat(
+ Element elem) throws IOException {
+ Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
XPath xPath = XPathFactory.newInstance().newXPath();
NodeList customElements;
try {
XPathExpression exp = xPath.compile("//custom");
- customElements = (NodeList)
exp.evaluate(xmlDescriptorDOM.getDocumentElement(), XPathConstants.NODESET);
+ customElements = (NodeList) exp.evaluate(elem, XPathConstants.NODESET);
} catch (XPathExpressionException e) {
throw new IllegalStateException("The hard coded XPath expression should
always be valid!");
}
@@ -810,7 +663,7 @@ public class GeneratorFactory {
NodeList allElements;
try {
XPathExpression exp = xPath.compile("//*");
- allElements = (NodeList)
exp.evaluate(xmlDescriptorDOM.getDocumentElement(), XPathConstants.NODESET);
+ allElements = (NodeList) exp.evaluate(elem, XPathConstants.NODESET);
} catch (XPathExpressionException e) {
throw new IllegalStateException("The hard coded XPath expression should
always be valid!");
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/POSTaggerNameFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/POSTaggerNameFeatureGeneratorFactory.java
new file mode 100644
index 0000000..627f932
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/POSTaggerNameFeatureGeneratorFactory.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.ArtifactSerializer;
+import opennlp.tools.util.model.POSModelSerializer;
+
+/**
+ * @see POSTaggerNameFeatureGenerator
+ */
+public class POSTaggerNameFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public POSTaggerNameFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager)
+ throws InvalidFormatException {
+
+ String modelResourceKey = generatorElement.getAttribute("model");
+
+ POSModel model = (POSModel)resourceManager.getResource(modelResourceKey);
+
+ return new POSTaggerNameFeatureGenerator(model);
+
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("tokenpos", new POSTaggerNameFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ // if resourceManager is null, we don't instantiate
+ if (resourceManager == null)
+ return null;
+
+ String modelResourceKey = getStr("model");
+ POSModel model = (POSModel)resourceManager.getResource(modelResourceKey);
+ return new POSTaggerNameFeatureGenerator(model);
+ }
+
+ @Override
+ public Map<String, ArtifactSerializer<?>> getArtifactSerializerMapping()
throws InvalidFormatException {
+ Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
+ mapping.put(getStr("model"), new POSModelSerializer());
+ return mapping;
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorFactory.java
new file mode 100644
index 0000000..db4a09a
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PosTaggerFeatureGeneratorFactory.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+
+public class PosTaggerFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public PosTaggerFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) {
+ return new PosTaggerFeatureGenerator();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("postagger", new PosTaggerFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new PosTaggerFeatureGenerator();
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorFactory.java
new file mode 100644
index 0000000..4bd73dd
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorFactory.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+import java.util.Objects;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * @see PrefixFeatureGenerator
+ */
+public class PrefixFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public PrefixFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider
resourceManager) {
+
+ String attribute = generatorElement.getAttribute("length");
+
+ int prefixLength = PrefixFeatureGenerator.DEFAULT_MAX_LENGTH;
+
+ if (!Objects.equals(attribute, "")) {
+ prefixLength = Integer.parseInt(attribute);
+ }
+
+ return new PrefixFeatureGenerator(prefixLength);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("prefix", new PrefixFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new PrefixFeatureGenerator(getInt("length",
+ PrefixFeatureGenerator.DEFAULT_MAX_LENGTH));
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorFactory.java
new file mode 100644
index 0000000..855197d
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousMapFeatureGeneratorFactory.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * @see PreviousMapFeatureGenerator
+ */
+public class PreviousMapFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public PreviousMapFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) {
+ return new PreviousMapFeatureGenerator();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("prevmap", new PreviousMapFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new PreviousMapFeatureGenerator();
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorFactory.java
new file mode 100644
index 0000000..9977852
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SentenceFeatureGeneratorFactory.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * @see SentenceFeatureGenerator
+ */
+public class SentenceFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public SentenceFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) {
+
+ String beginFeatureString = generatorElement.getAttribute("begin");
+
+ boolean beginFeature = true;
+ if (beginFeatureString.length() != 0)
+ beginFeature = Boolean.parseBoolean(beginFeatureString);
+
+ String endFeatureString = generatorElement.getAttribute("end");
+ boolean endFeature = true;
+ if (endFeatureString.length() != 0)
+ endFeature = Boolean.parseBoolean(endFeatureString);
+
+ return new SentenceFeatureGenerator(beginFeature, endFeature);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("sentence", new SentenceFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ String beginFeatureString = generatorElement.getAttribute("begin");
+ return new SentenceFeatureGenerator(getBool("begin", true), getBool("end",
true));
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorFactory.java
new file mode 100644
index 0000000..99c2454
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorFactory.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+import java.util.Objects;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * @see SuffixFeatureGenerator
+ */
+public class SuffixFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public SuffixFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) {
+
+ String attribute = generatorElement.getAttribute("length");
+
+ int suffixLength = SuffixFeatureGenerator.DEFAULT_MAX_LENGTH;
+
+ if (!Objects.equals(attribute, "")) {
+ suffixLength = Integer.parseInt(attribute);
+ }
+
+ return new SuffixFeatureGenerator(suffixLength);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("suffix", new SuffixFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new SuffixFeatureGenerator(getInt("length",
+ SuffixFeatureGenerator.DEFAULT_MAX_LENGTH));
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorFactory.java
new file mode 100644
index 0000000..bcedf7f
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenClassFeatureGeneratorFactory.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+import java.util.Objects;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * @see TokenClassFeatureGenerator
+ */
+public class TokenClassFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public TokenClassFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) {
+
+ String attribute = generatorElement.getAttribute("wordAndClass");
+
+ // Default to true.
+ boolean generateWordAndClassFeature = true;
+
+ if (!Objects.equals(attribute, "")) {
+ // Anything other than "true" sets it to false.
+ if (!"true".equalsIgnoreCase(attribute)) {
+ generateWordAndClassFeature = false;
+ }
+ }
+
+ return new TokenClassFeatureGenerator(generateWordAndClassFeature);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("tokenclass", new TokenClassFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new TokenClassFeatureGenerator(getBool("wordAndClass", true));
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorFactory.java
similarity index 50%
copy from
opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java
copy to
opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorFactory.java
index 55d6332..20612f5 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenFeatureGeneratorFactory.java
@@ -19,16 +19,31 @@ package opennlp.tools.util.featuregen;
import java.util.Map;
+import org.w3c.dom.Element;
+
import opennlp.tools.util.InvalidFormatException;
-public abstract class CustomFeatureGenerator implements
AdaptiveFeatureGenerator {
+public class TokenFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public TokenFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) {
+ return new TokenFeatureGenerator();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("token", new TokenFeatureGeneratorFactory());
+ }
- /**
- * Initialized the Custom Feature Generator with defined properties and
loaded resources.
- *
- * @param properties
- * @param resourceProvider
- */
- public abstract void init(Map<String, String> properties,
FeatureGeneratorResourceProvider resourceProvider)
- throws InvalidFormatException;
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new TokenFeatureGenerator();
+ }
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorFactory.java
new file mode 100644
index 0000000..92a1d5a
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGeneratorFactory.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * @see TokenPatternFeatureGenerator
+ */
+public class TokenPatternFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public TokenPatternFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) {
+ return new TokenPatternFeatureGenerator();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("tokenpattern", new TokenPatternFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new TokenPatternFeatureGenerator();
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WindowFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WindowFeatureGeneratorFactory.java
new file mode 100644
index 0000000..21d7657
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WindowFeatureGeneratorFactory.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import opennlp.tools.util.InvalidFormatException;
+
+/**
+ * @see WindowFeatureGenerator
+ */
+public class WindowFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public WindowFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+
+ Element nestedGeneratorElement = null;
+
+ NodeList kids = generatorElement.getChildNodes();
+
+ for (int i = 0; i < kids.getLength(); i++) {
+ Node childNode = kids.item(i);
+
+ if (childNode instanceof Element) {
+ nestedGeneratorElement = (Element) childNode;
+ break;
+ }
+ }
+
+ if (nestedGeneratorElement == null) {
+ throw new InvalidFormatException("window feature generator must contain"
+
+ " an aggregator element");
+ }
+
+ AdaptiveFeatureGenerator nestedGenerator =
+ GeneratorFactory.createGenerator(nestedGeneratorElement,
resourceManager);
+
+ String prevLengthString = generatorElement.getAttribute("prevLength");
+
+ int prevLength;
+
+ try {
+ prevLength = Integer.parseInt(prevLengthString);
+ } catch (NumberFormatException e) {
+ throw new InvalidFormatException("prevLength attribute '" +
prevLengthString
+ + "' is not a number!", e);
+ }
+
+ String nextLengthString = generatorElement.getAttribute("nextLength");
+
+ int nextLength;
+
+ try {
+ nextLength = Integer.parseInt(nextLengthString);
+ } catch (NumberFormatException e) {
+ throw new InvalidFormatException("nextLength attribute '" +
nextLengthString
+ + "' is not a number!", e);
+ }
+
+ return new WindowFeatureGenerator(nestedGenerator, prevLength, nextLength);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("window", new WindowFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ AdaptiveFeatureGenerator generator =
(AdaptiveFeatureGenerator)args.get("generator#0");
+ if (generator == null) {
+ throw new InvalidFormatException("window feature generator must contain"
+
+ " an aggregator element");
+ }
+ return new WindowFeatureGenerator(generator,
+ getInt("prevLength"), getInt("nextLength"));
+ }
+}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGeneratorFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGeneratorFactory.java
new file mode 100644
index 0000000..5ad99de
--- /dev/null
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGeneratorFactory.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.w3c.dom.Element;
+
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.ArtifactSerializer;
+
+/**
+ * Defines a word cluster generator factory; it reads an element containing
+ * 'w2vwordcluster' as a tag name; these clusters are typically produced by
+ * word2vec or clark pos induction systems.
+ */
+public class WordClusterFeatureGeneratorFactory
+ extends GeneratorFactory.AbstractXmlFeatureGeneratorFactory
+ implements GeneratorFactory.XmlFeatureGeneratorFactory {
+
+ public WordClusterFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ public AdaptiveFeatureGenerator create(Element generatorElement,
+ FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
+
+ String dictResourceKey = generatorElement.getAttribute("dict");
+ boolean lowerCaseDictionary =
"true".equals(generatorElement.getAttribute("lowerCase"));
+
+ Object dictResource = resourceManager.getResource(dictResourceKey);
+
+
+ if (!(dictResource instanceof WordClusterDictionary)) {
+ throw new InvalidFormatException("Not a WordClusterDictionary resource
for key: "
+ + dictResourceKey);
+ }
+
+ return new WordClusterFeatureGenerator((WordClusterDictionary)
dictResource,
+ dictResourceKey, lowerCaseDictionary);
+ }
+
+ @Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no
longer needed
+ static void register(Map<String,
GeneratorFactory.XmlFeatureGeneratorFactory> factoryMap) {
+ factoryMap.put("wordcluster", new WordClusterFeatureGeneratorFactory());
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ // if resourceManager is null, we don't instantiate
+ if (resourceManager == null)
+ return null;
+
+ String dictResourceKey = getStr("dict");
+ boolean lowerCaseDictionary = getBool("lowerCase");
+ Object dictResource = resourceManager.getResource(dictResourceKey);
+ if (!(dictResource instanceof WordClusterDictionary)) {
+ throw new InvalidFormatException("Not a WordClusterDictionary resource
for key: "
+ + dictResourceKey);
+ }
+
+ return new WordClusterFeatureGenerator((WordClusterDictionary)
dictResource,
+ dictResourceKey, lowerCaseDictionary);
+ }
+
+ @Override
+ public Map<String, ArtifactSerializer<?>> getArtifactSerializerMapping()
throws InvalidFormatException {
+ Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
+ mapping.put(getStr("dict"), new
WordClusterDictionary.WordClusterDictionarySerializer());
+ return mapping;
+ }
+}
diff --git
a/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
b/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
index f5b91ee..58288e1 100644
---
a/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
+++
b/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
@@ -18,19 +18,28 @@
-->
<!-- Default name finder feature generator configuration -->
-<generators>
- <cache>
- <generators>
- <window prevLength = "2" nextLength = "2">
- <tokenclass/>
- </window>
- <window prevLength = "2" nextLength = "2">
- <token/>
- </window>
- <definition/>
- <prevmap/>
- <bigram/>
- <sentence begin="true" end="false"/>
- </generators>
- </cache>
-</generators>
\ No newline at end of file
+<featureGenerators name="nameFinder">
+ <generator
class="opennlp.tools.util.featuregen.AggregatedFeatureGeneratorFactory">
+ <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
+ <generator
class="opennlp.tools.util.featuregen.AggregatedFeatureGeneratorFactory">
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
+ </generator>
+ </generator>
+ </generator>
+</featureGenerators>
diff --git
a/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
b/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features_classic.xml
similarity index 100%
copy from
opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
copy to
opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features_classic.xml
diff --git
a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
index 0be1fc8..466cba7 100644
---
a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
+++
b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
@@ -1,38 +1,46 @@
<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
-->
-
<!-- Default pos tagger feature generator configuration -->
-<generators>
- <cache>
- <generators>
- <definition/>
- <suffix/>
- <prefix/>
- <window prevLength = "2" nextLength = "2">
- <token/>
- </window>
- <window prevLength = "2" nextLength = "2">
- <sentence begin="true" end="false"/>
- </window>
- <tokenclass/>
- <postagger/>
- </generators>
- </cache>
-</generators>
+<featureGenerators name="posTagger">
+ <generator
class="opennlp.tools.util.featuregen.AggregatedFeatureGeneratorFactory">
+ <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
+ <generator
class="opennlp.tools.util.featuregen.AggregatedFeatureGeneratorFactory">
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.SuffixFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.PrefixFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.PosTaggerFeatureGeneratorFactory"/>
+ </generator>
+ </generator>
+ </generator>
+</featureGenerators>
diff --git
a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features_classic.xml
similarity index 100%
copy from
opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
copy to
opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features_classic.xml
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java
index 37a92e7..816f308 100644
---
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java
+++
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java
@@ -25,6 +25,7 @@ import java.util.Map;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.model.ArtifactSerializer;
+@Deprecated // TODO: (OPENNLP-1174) remove back-compat support when it is
unnecessary
public class FeatureGenWithSerializerMapping extends CustomFeatureGenerator
implements ArtifactToSerializerMapper {
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryClassicFormatTest.java
similarity index 86%
copy from
opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
copy to
opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryClassicFormatTest.java
index 7405537..cdf1404 100644
---
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
+++
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryClassicFormatTest.java
@@ -31,12 +31,13 @@ import
opennlp.tools.util.featuregen.WordClusterDictionary.WordClusterDictionary
import opennlp.tools.util.model.ArtifactSerializer;
import opennlp.tools.util.model.DictionarySerializer;
-public class GeneratorFactoryTest {
+@Deprecated // TODO: (OPENNLP-1174) just remove when back-compat is no longer
needed
+public class GeneratorFactoryClassicFormatTest {
@Test
public void testCreationWithTokenClassFeatureGenerator() throws Exception {
InputStream generatorDescriptorIn = getClass().getResourceAsStream(
-
"/opennlp/tools/util/featuregen/TestTokenClassFeatureGeneratorConfig.xml");
+
"/opennlp/tools/util/featuregen/TestTokenClassFeatureGeneratorConfig_classic.xml");
// If this fails the generator descriptor could not be found
// at the expected location
@@ -83,7 +84,7 @@ public class GeneratorFactoryTest {
@Test
public void testCreationWithCustomGenerator() throws Exception {
InputStream generatorDescriptorIn = getClass().getResourceAsStream(
- "/opennlp/tools/util/featuregen/CustomClassLoading.xml");
+ "/opennlp/tools/util/featuregen/CustomClassLoading_classic.xml");
// If this fails the generator descriptor could not be found
// at the expected location
@@ -109,7 +110,7 @@ public class GeneratorFactoryTest {
public void testCreationWithUnkownElement() throws IOException {
try (InputStream descIn = getClass().getResourceAsStream(
-
"/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml")) {
+
"/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement_classic.xml"))
{
GeneratorFactory.create(descIn, null);
}
}
@@ -118,7 +119,7 @@ public class GeneratorFactoryTest {
public void testArtifactToSerializerMappingExtraction() throws IOException {
// TODO: Define a new one here with custom elements ...
InputStream descIn = getClass().getResourceAsStream(
-
"/opennlp/tools/util/featuregen/CustomClassLoadingWithSerializers.xml");
+
"/opennlp/tools/util/featuregen/CustomClassLoadingWithSerializers_classic.xml");
Map<String, ArtifactSerializer<?>> mapping =
GeneratorFactory.extractArtifactSerializerMappings(descIn);
@@ -130,11 +131,15 @@ public class GeneratorFactoryTest {
public void testDictionaryArtifactToSerializerMappingExtraction() throws
IOException {
InputStream descIn = getClass().getResourceAsStream(
-
"/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtractionxml");
+
"/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtraction_classic.xml");
Map<String, ArtifactSerializer<?>> mapping =
- GeneratorFactory.extractArtifactSerializerMappings(descIn);
+ GeneratorFactory.extractArtifactSerializerMappings(descIn);
Assert.assertTrue(mapping.get("test.dictionary") instanceof
DictionarySerializer);
+ // TODO: if make the following effective, the test fails.
+ // this is strange because DictionaryFeatureGeneratorFactory cast
dictResource to Dictionary...
+ //Assert.assertTrue(mapping.get("test.dictionary") instanceof
+ // opennlp.tools.dictionary.Dictionary);
}
}
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
index 7405537..f974011 100644
---
a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
+++
b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
@@ -21,18 +21,61 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.List;
import java.util.Map;
import org.junit.Assert;
import org.junit.Test;
import opennlp.tools.util.InvalidFormatException;
-import
opennlp.tools.util.featuregen.WordClusterDictionary.WordClusterDictionarySerializer;
import opennlp.tools.util.model.ArtifactSerializer;
import opennlp.tools.util.model.DictionarySerializer;
public class GeneratorFactoryTest {
+ static class TestParametersFeatureGeneratorFactory extends
+ GeneratorFactory.AbstractXmlFeatureGeneratorFactory {
+
+ public TestParametersFeatureGeneratorFactory() {
+ super();
+ }
+
+ @Override
+ public AdaptiveFeatureGenerator create() throws InvalidFormatException {
+ return new TestParametersFeatureGenerator(
+ getInt("intParam"),
+ getFloat("floatParam"),
+ getLong("longParam"),
+ getDouble("doubleParam"),
+ getBool("boolParam"),
+ getStr("strParam"));
+ }
+ }
+
+ static class TestParametersFeatureGenerator implements
AdaptiveFeatureGenerator {
+
+ public final int ip;
+ public final float fp;
+ public final long lp;
+ public final double dp;
+ public final boolean bp;
+ public final String sp;
+
+ TestParametersFeatureGenerator(int ip, float fp, long lp, double dp,
boolean bp, String sp) {
+ this.ip = ip;
+ this.fp = fp;
+ this.lp = lp;
+ this.dp = dp;
+ this.bp = bp;
+ this.sp = sp;
+ }
+
+ @Override
+ public void createFeatures(List<String> features, String[] tokens, int
index,
+ String[] previousOutcomes) {
+ }
+ }
+
@Test
public void testCreationWithTokenClassFeatureGenerator() throws Exception {
InputStream generatorDescriptorIn = getClass().getResourceAsStream(
@@ -80,27 +123,6 @@ public class GeneratorFactoryTest {
Assert.assertEquals(0, expectedGenerators.size());
}
- @Test
- public void testCreationWithCustomGenerator() throws Exception {
- InputStream generatorDescriptorIn = getClass().getResourceAsStream(
- "/opennlp/tools/util/featuregen/CustomClassLoading.xml");
-
- // If this fails the generator descriptor could not be found
- // at the expected location
- Assert.assertNotNull(generatorDescriptorIn);
-
- AggregatedFeatureGenerator aggregatedGenerator =
- (AggregatedFeatureGenerator)
GeneratorFactory.create(generatorDescriptorIn, null);
-
- Collection<AdaptiveFeatureGenerator> embeddedGenerator =
aggregatedGenerator.getGenerators();
-
- Assert.assertEquals(1, embeddedGenerator.size());
-
- for (AdaptiveFeatureGenerator generator : embeddedGenerator) {
- Assert.assertEquals(TokenFeatureGenerator.class.getName(),
generator.getClass().getName());
- }
- }
-
/**
* Tests the creation from a descriptor which contains an unkown element.
* The creation should fail with an {@link InvalidFormatException}
@@ -115,26 +137,39 @@ public class GeneratorFactoryTest {
}
@Test
- public void testArtifactToSerializerMappingExtraction() throws IOException {
- // TODO: Define a new one here with custom elements ...
+ public void testDictionaryArtifactToSerializerMappingExtraction() throws
IOException {
+
InputStream descIn = getClass().getResourceAsStream(
-
"/opennlp/tools/util/featuregen/CustomClassLoadingWithSerializers.xml");
+
"/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtraction.xml");
Map<String, ArtifactSerializer<?>> mapping =
- GeneratorFactory.extractArtifactSerializerMappings(descIn);
+ GeneratorFactory.extractArtifactSerializerMappings(descIn);
- Assert.assertTrue(mapping.get("test.resource") instanceof
WordClusterDictionarySerializer);
+ Assert.assertTrue(mapping.get("test.dictionary") instanceof
DictionarySerializer);
+ // TODO: if make the following effective, the test fails.
+ // this is strange because DictionaryFeatureGeneratorFactory cast
dictResource to Dictionary...
+ //Assert.assertTrue(mapping.get("test.dictionary") instanceof
+ // opennlp.tools.dictionary.Dictionary);
}
@Test
- public void testDictionaryArtifactToSerializerMappingExtraction() throws
IOException {
+ public void testParameters() throws Exception {
+ InputStream generatorDescriptorIn = getClass().getResourceAsStream(
+ "/opennlp/tools/util/featuregen/TestParametersConfig.xml");
- InputStream descIn = getClass().getResourceAsStream(
-
"/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtractionxml");
+ // If this fails the generator descriptor could not be found
+ // at the expected location
+ Assert.assertNotNull(generatorDescriptorIn);
- Map<String, ArtifactSerializer<?>> mapping =
- GeneratorFactory.extractArtifactSerializerMappings(descIn);
+ AdaptiveFeatureGenerator generator =
GeneratorFactory.create(generatorDescriptorIn, null);
+ Assert.assertTrue(generator instanceof TestParametersFeatureGenerator);
- Assert.assertTrue(mapping.get("test.dictionary") instanceof
DictionarySerializer);
+ TestParametersFeatureGenerator featureGenerator =
(TestParametersFeatureGenerator)generator;
+ Assert.assertEquals(123, featureGenerator.ip);
+ Assert.assertEquals(45, featureGenerator.fp, 0.1);
+ Assert.assertEquals(67890, featureGenerator.lp);
+ Assert.assertEquals(123456.789, featureGenerator.dp, 0.1);
+ Assert.assertTrue(featureGenerator.bp);
+ Assert.assertEquals("HELLO", featureGenerator.sp);
}
}
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
index 7600e38..7464627 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
@@ -15,22 +15,33 @@
~ limitations under the License.
-->
-<generators>
- <cache>
- <generators>
- <window prevLength = "2" nextLength = "2">
- <tokenclass/>
- </window>
- <window prevLength = "2" nextLength = "2">
- <token/>
- </window>
- <window prevLength = "2" nextLength = "2">
- <tokenpos model="pos-model.bin"/>
- </window>
- <definition/>
- <prevmap/>
- <bigram/>
- <sentence begin="true" end="false"/>
- </generators>
- </cache>
-</generators>
\ No newline at end of file
+<featureGenerators name="nameFinder">
+ <generator
class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
+ <generator
class="opennlp.tools.util.featuregen.AggregatedFeatureGeneratorFactory">
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator
class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
+ <str name="model">pos-model.bin</str>
+ </generator>
+ </generator>
+ <generator
class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ <generator
class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
+ </generator>
+ </generator>
+</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features_classic.xml
similarity index 98%
copy from
opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
copy to
opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features_classic.xml
index 7600e38..71c3bae 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features_classic.xml
@@ -33,4 +33,4 @@
<sentence begin="true" end="false"/>
</generators>
</cache>
-</generators>
\ No newline at end of file
+</generators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/CustomClassLoadingWithSerializers.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/CustomClassLoadingWithSerializers_classic.xml
similarity index 100%
rename from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/CustomClassLoadingWithSerializers.xml
rename to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/CustomClassLoadingWithSerializers_classic.xml
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/CustomClassLoading.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/CustomClassLoading_classic.xml
similarity index 100%
copy from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/CustomClassLoading.xml
copy to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/CustomClassLoading_classic.xml
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
index 7a67ea8..d8207f4 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
@@ -19,8 +19,7 @@
under the License.
-->
-<generators>
- <definition/>
+<featureGenerators>
<unkown-element/>
-</generators>
+</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement_classic.xml
similarity index 100%
copy from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
copy to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement_classic.xml
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/CustomClassLoading.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtraction.xml
similarity index 79%
rename from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/CustomClassLoading.xml
rename to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtraction.xml
index d22556a..8c68c8b 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/CustomClassLoading.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtraction.xml
@@ -17,6 +17,8 @@
under the License.
-->
-<generators>
- <custom class="opennlp.tools.util.featuregen.TokenFeatureGenerator"/>
-</generators>
\ No newline at end of file
+<featureGenerators name="test">
+ <generator
class="opennlp.tools.util.featuregen.DictionaryFeatureGeneratorFactory">
+ <str name="dict">test.dictionary</str>
+ </generator>
+</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtractionxml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtraction_classic.xml
similarity index 100%
rename from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtractionxml
rename to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestDictionarySerializerMappingExtraction_classic.xml
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestFeatureGeneratorConfig_classic.xml
similarity index 97%
copy from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
copy to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestFeatureGeneratorConfig_classic.xml
index 7a67ea8..6518948 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestFeatureGeneratorConfig_classic.xml
@@ -21,6 +21,5 @@
<generators>
<definition/>
- <unkown-element/>
</generators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestParametersConfig.xml
similarity index 65%
copy from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
copy to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestParametersConfig.xml
index 7a67ea8..b03459e 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestParametersConfig.xml
@@ -8,9 +8,9 @@
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
-
+
http://www.apache.org/licenses/LICENSE-2.0
-
+
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -19,8 +19,13 @@
under the License.
-->
-<generators>
- <definition/>
- <unkown-element/>
-</generators>
-
+<featureGenerators name="testParameters">
+ <generator
class="opennlp.tools.util.featuregen.GeneratorFactoryTest$TestParametersFeatureGeneratorFactory">
+ <int name="intParam">123</int>
+ <float name="floatParam">45</float>
+ <long name="longParam">67890</long>
+ <double name="doubleParam">123456.789</double>
+ <bool name="boolParam">true</bool>
+ <str name="strParam">HELLO</str>
+ </generator>
+</featureGenerators>
diff --git
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestTokenClassFeatureGeneratorConfig_classic.xml
similarity index 96%
copy from
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
copy to
opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestTokenClassFeatureGeneratorConfig_classic.xml
index 7a67ea8..c680248 100644
---
a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/FeatureGeneratorConfigWithUnkownElement.xml
+++
b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestTokenClassFeatureGeneratorConfig_classic.xml
@@ -20,7 +20,5 @@
-->
<generators>
- <definition/>
- <unkown-element/>
+ <tokenclass wordAndClass="true"/>
</generators>
-
--
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].