Author: ragerri
Date: Fri May 22 12:38:24 2015
New Revision: 1681091
URL: http://svn.apache.org/r1681091
Log:
OPENNLP-775 add support for lowercased word cluster dictionaries
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java?rev=1681091&r1=1681090&r2=1681091&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
Fri May 22 12:38:24 2015
@@ -284,6 +284,7 @@ public class GeneratorFactory {
FeatureGeneratorResourceProvider resourceManager) throws
InvalidFormatException {
String dictResourceKey = generatorElement.getAttribute("dict");
+ boolean lowerCaseDictionary =
"true".equals(generatorElement.getAttribute("lowerCase"));
Object dictResource = resourceManager.getResource(dictResourceKey);
@@ -292,7 +293,7 @@ public class GeneratorFactory {
throw new InvalidFormatException("Not a WordClusterDictionary resource
for key: " + dictResourceKey);
}
- return new WordClusterFeatureGenerator((WordClusterDictionary)
dictResource, dictResourceKey);
+ return new WordClusterFeatureGenerator((WordClusterDictionary)
dictResource, dictResourceKey, lowerCaseDictionary);
}
static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java?rev=1681091&r1=1681090&r2=1681091&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
Fri May 22 12:38:24 2015
@@ -19,21 +19,29 @@ package opennlp.tools.util.featuregen;
import java.util.List;
+import opennlp.tools.util.StringUtil;
+
public class WordClusterFeatureGenerator extends FeatureGeneratorAdapter {
private WordClusterDictionary tokenDictionary;
private String resourceName;
+ private boolean lowerCaseDictionary;
- public WordClusterFeatureGenerator(WordClusterDictionary dict, String
dictResourceKey) {
+ public WordClusterFeatureGenerator(WordClusterDictionary dict, String
dictResourceKey, boolean lowerCaseDictionary) {
tokenDictionary = dict;
resourceName = dictResourceKey;
+ this.lowerCaseDictionary = lowerCaseDictionary;
}
public void createFeatures(List<String> features, String[] tokens, int index,
String[] previousOutcomes) {
- String clusterId = tokenDictionary.lookupToken(tokens[index]);
-
+ String clusterId;
+ if (lowerCaseDictionary) {
+ clusterId =
tokenDictionary.lookupToken(StringUtil.toLowerCase(tokens[index]));
+ } else {
+ clusterId = tokenDictionary.lookupToken(tokens[index]);
+ }
if (clusterId != null) {
features.add(resourceName + clusterId);
}