Author: ragerri
Date: Mon Mar  9 21:16:12 2015
New Revision: 1665360

URL: http://svn.apache.org/r1665360
Log:
OPENNLP-715 refactoring from specific word2vec naming to wordcluster namings

Added:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
Modified:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
    
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java
    
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java?rev=1665360&r1=1665359&r2=1665360&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
 Mon Mar  9 21:16:12 2015
@@ -36,7 +36,7 @@ import opennlp.tools.util.SequenceCodec;
 import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
 import opennlp.tools.util.featuregen.AggregatedFeatureGenerator;
 import opennlp.tools.util.featuregen.BrownCluster;
-import opennlp.tools.util.featuregen.W2VClassesDictionary;
+import opennlp.tools.util.featuregen.WordClusterDictionary;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.BaseModel;
 import opennlp.tools.util.model.ModelUtil;
@@ -253,7 +253,7 @@ public class TokenNameFinderModel extend
    * objects, the convention is to add its element tag name as key of the 
serializer map.
    * For example, the element tag name for the {@code 
WordClusterFeatureGenerator} which
    * uses {@code W2VClassesDictionary} objects serialized by the {@code 
W2VClassesDictionarySerializer}
-   * is 'w2vwordcluster', which is the key used to add the serializer to the 
map.
+   * is 'wordcluster', which is the key used to add the serializer to the map.
    * @return the map containing the added serializers
    */
   public static Map<String, ArtifactSerializer> createArtifactSerializers()  {
@@ -268,7 +268,7 @@ public class TokenNameFinderModel extend
     Map<String, ArtifactSerializer> serializers = 
BaseModel.createArtifactSerializers();
 
     serializers.put("featuregen", new ByteArraySerializer());
-    serializers.put("w2vwordcluster", new 
W2VClassesDictionary.W2VClassesDictionarySerializer());
+    serializers.put("wordcluster", new 
WordClusterDictionary.WordClusterDictionarySerializer());
     serializers.put("brownclustertoken", new 
BrownCluster.BrownClusterSerializer());
     serializers.put("brownclustertokenclass", new 
BrownCluster.BrownClusterSerializer());
     serializers.put("brownclusterbigram", new 
BrownCluster.BrownClusterSerializer());

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java?rev=1665360&r1=1665359&r2=1665360&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
 Mon Mar  9 21:16:12 2015
@@ -278,7 +278,7 @@ public class GeneratorFactory {
    * 'w2vwordcluster' as a tag name; these clusters are typically produced by
    * word2vec or clark pos induction systems.
    */
-  static class W2VClassesFeatureGeneratorFactory implements 
XmlFeatureGeneratorFactory {
+  static class WordClusterFeatureGeneratorFactory implements 
XmlFeatureGeneratorFactory {
 
     public AdaptiveFeatureGenerator create(Element generatorElement,
         FeatureGeneratorResourceProvider resourceManager) throws 
InvalidFormatException {
@@ -288,15 +288,15 @@ public class GeneratorFactory {
       Object dictResource = resourceManager.getResource(dictResourceKey);
 
 
-      if (!(dictResource instanceof W2VClassesDictionary)) {
-        throw new InvalidFormatException("Not a W2VClassesDictionary resource 
for key: " + dictResourceKey);
+      if (!(dictResource instanceof WordClusterDictionary)) {
+        throw new InvalidFormatException("Not a WordClusterDictionary resource 
for key: " + dictResourceKey);
       }
 
-      return new WordClusterFeatureGenerator((W2VClassesDictionary) 
dictResource, dictResourceKey);
+      return new WordClusterFeatureGenerator((WordClusterDictionary) 
dictResource, dictResourceKey);
     }
 
     static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
-      factoryMap.put("w2vwordcluster", new 
W2VClassesFeatureGeneratorFactory());
+      factoryMap.put("wordcluster", new WordClusterFeatureGeneratorFactory());
     }
   }
   
@@ -628,7 +628,7 @@ public class GeneratorFactory {
     PrefixFeatureGeneratorFactory.register(factories);
     SuffixFeatureGeneratorFactory.register(factories);
     WindowFeatureGeneratorFactory.register(factories);
-    W2VClassesFeatureGeneratorFactory.register(factories);
+    WordClusterFeatureGeneratorFactory.register(factories);
     BrownClusterTokenFeatureGeneratorFactory.register(factories);
     BrownClusterTokenClassFeatureGeneratorFactory.register(factories);
     BrownClusterBigramFeatureGeneratorFactory.register(factories);

Added: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java?rev=1665360&view=auto
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
 (added)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
 Mon Mar  9 21:16:12 2015
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Map;
+
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.ArtifactSerializer;
+import opennlp.tools.util.model.SerializableArtifact;
+
+public class WordClusterDictionary implements SerializableArtifact {
+
+  public static class WordClusterDictionarySerializer implements 
ArtifactSerializer<WordClusterDictionary> {
+
+    public WordClusterDictionary create(InputStream in) throws IOException,
+        InvalidFormatException {
+      return new WordClusterDictionary(in);
+    }
+
+    public void serialize(WordClusterDictionary artifact, OutputStream out)
+        throws IOException {
+      artifact.serialize(out);
+    }
+  }
+
+  private Map<String, String> tokenToClusterMap = new HashMap<String, 
String>();
+
+  /**
+   * Read word2vec and clark clustering style lexicons.
+   * @param in the inputstream
+   * @throws IOException the io exception
+   */
+  public WordClusterDictionary(InputStream in) throws IOException {
+
+    BufferedReader reader = new BufferedReader(new InputStreamReader(in, 
Charset.forName("UTF-8")));
+
+    String line;
+    while ((line = reader.readLine()) != null) {
+      String parts[] = line.split(" ");
+      if (parts.length == 3) {
+        tokenToClusterMap.put(parts[0], parts[1]);
+      } else if (parts.length == 2) {
+        tokenToClusterMap.put(parts[0], parts[1]);
+      }
+    }
+  }
+
+  public String lookupToken(String string) {
+    return tokenToClusterMap.get(string);
+  }
+
+  public void serialize(OutputStream out) throws IOException {
+    Writer writer = new BufferedWriter(new OutputStreamWriter(out));
+
+    for (Map.Entry<String, String> entry : tokenToClusterMap.entrySet()) {
+      writer.write(entry.getKey() + " " + entry.getValue() + "\n");
+    }
+
+    writer.flush();
+  }
+
+  public Class<?> getArtifactSerializerClass() {
+    return WordClusterDictionarySerializer.class;
+  }
+}

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java?rev=1665360&r1=1665359&r2=1665360&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
 Mon Mar  9 21:16:12 2015
@@ -21,10 +21,10 @@ import java.util.List;
 
 public class WordClusterFeatureGenerator extends FeatureGeneratorAdapter {
 
-  private W2VClassesDictionary tokenDictionary;
+  private WordClusterDictionary tokenDictionary;
   private String resourceName;
 
-  public WordClusterFeatureGenerator(W2VClassesDictionary dict, String 
dictResourceKey) {
+  public WordClusterFeatureGenerator(WordClusterDictionary dict, String 
dictResourceKey) {
       tokenDictionary = dict;
       resourceName = dictResourceKey;
   }

Modified: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java?rev=1665360&r1=1665359&r2=1665360&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java
 Mon Mar  9 21:16:12 2015
@@ -44,7 +44,7 @@ public class FeatureGenWithSerializerMap
   @Override
   public Map<String, ArtifactSerializer<?>> getArtifactSerializerMapping() {
     Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
-    mapping.put("test.resource", new 
W2VClassesDictionary.W2VClassesDictionarySerializer());
+    mapping.put("test.resource", new 
WordClusterDictionary.WordClusterDictionarySerializer());
     return Collections.unmodifiableMap(mapping);
   }
 

Modified: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java?rev=1665360&r1=1665359&r2=1665360&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
 Mon Mar  9 21:16:12 2015
@@ -28,7 +28,7 @@ import java.util.Collection;
 import java.util.Map;
 
 import opennlp.tools.util.InvalidFormatException;
-import 
opennlp.tools.util.featuregen.W2VClassesDictionary.W2VClassesDictionarySerializer;
+import 
opennlp.tools.util.featuregen.WordClusterDictionary.WordClusterDictionarySerializer;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.SerializableArtifact;
 
@@ -113,6 +113,6 @@ public class GeneratorFactoryTest {
     Map<String, ArtifactSerializer<?>> mapping =
         GeneratorFactory.extractCustomArtifactSerializerMappings(descIn);
     
-    assertTrue(mapping.get("test.resource") instanceof 
W2VClassesDictionarySerializer);
+    assertTrue(mapping.get("test.resource") instanceof 
WordClusterDictionarySerializer);
   }
 }
\ No newline at end of file


Reply via email to