Author: ragerri
Date: Fri May 22 12:38:24 2015
New Revision: 1681091

URL: http://svn.apache.org/r1681091
Log:
OPENNLP-775 add support for lowercased word cluster dictionaries

Modified:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java?rev=1681091&r1=1681090&r2=1681091&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
 Fri May 22 12:38:24 2015
@@ -284,6 +284,7 @@ public class GeneratorFactory {
         FeatureGeneratorResourceProvider resourceManager) throws 
InvalidFormatException {
 
       String dictResourceKey = generatorElement.getAttribute("dict");
+      boolean lowerCaseDictionary = 
"true".equals(generatorElement.getAttribute("lowerCase"));
 
       Object dictResource = resourceManager.getResource(dictResourceKey);
 
@@ -292,7 +293,7 @@ public class GeneratorFactory {
         throw new InvalidFormatException("Not a WordClusterDictionary resource 
for key: " + dictResourceKey);
       }
 
-      return new WordClusterFeatureGenerator((WordClusterDictionary) 
dictResource, dictResourceKey);
+      return new WordClusterFeatureGenerator((WordClusterDictionary) 
dictResource, dictResourceKey, lowerCaseDictionary);
     }
 
     static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java?rev=1681091&r1=1681090&r2=1681091&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
 Fri May 22 12:38:24 2015
@@ -19,21 +19,29 @@ package opennlp.tools.util.featuregen;
 
 import java.util.List;
 
+import opennlp.tools.util.StringUtil;
+
 public class WordClusterFeatureGenerator extends FeatureGeneratorAdapter {
 
   private WordClusterDictionary tokenDictionary;
   private String resourceName;
+  private boolean lowerCaseDictionary;
 
-  public WordClusterFeatureGenerator(WordClusterDictionary dict, String 
dictResourceKey) {
+  public WordClusterFeatureGenerator(WordClusterDictionary dict, String 
dictResourceKey, boolean lowerCaseDictionary) {
       tokenDictionary = dict;
       resourceName = dictResourceKey;
+      this.lowerCaseDictionary = lowerCaseDictionary;
   }
 
   public void createFeatures(List<String> features, String[] tokens, int index,
       String[] previousOutcomes) {
 
-    String clusterId = tokenDictionary.lookupToken(tokens[index]);
-
+    String clusterId;
+    if (lowerCaseDictionary) {
+      clusterId = 
tokenDictionary.lookupToken(StringUtil.toLowerCase(tokens[index]));
+    } else {
+      clusterId = tokenDictionary.lookupToken(tokens[index]);
+    }
     if (clusterId != null) {
       features.add(resourceName + clusterId);
     }


Reply via email to