NameFinderME.java

Jörn Kottmann Mon, 06 Jun 2011 01:27:55 -0700

Hi,

I might be mistaken, but the train method you added also needs
to place the descriptor in the model. Very similar to the train method
which takes the descriptor, cutoff and iterations.


Jörn

On 6/3/11 7:34 AM, [email protected] wrote:

Author: colen
Date: Fri Jun  3 05:34:34 2011
New Revision: 1130898

URL: http://svn.apache.org/viewvc?rev=1130898&view=rev
Log:
OPENNLP-195 Added train method that takes params argument and the 
generatorDescriptor and resourceMap

Modified:
     
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
     
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java

Modified: 
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: 
http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1130898&r1=1130897&r2=1130898&view=diff
==============================================================================
--- 
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
 (original)
+++ 
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
 Fri Jun  3 05:34:34 2011
@@ -22,11 +22,9 @@ import java.io.FileInputStream;
  import java.io.IOException;
  import java.io.InputStream;
  import java.nio.charset.Charset;
-import java.util.Collections;
  import java.util.HashMap;
  import java.util.Map;

-import opennlp.model.TrainUtil;
  import opennlp.tools.cmdline.CLI;
  import opennlp.tools.cmdline.CmdLineTool;
  import opennlp.tools.cmdline.CmdLineUtil;
@@ -187,8 +185,9 @@ public final class TokenNameFinderTraine
             parameters.getCutoff());
        }
        else {
-        model = 
opennlp.tools.namefind.NameFinderME.train(parameters.getLanguage(), 
parameters.getType(), sampleStream, mlParams, null,
-            Collections.<String, Object>emptyMap());
+        model = opennlp.tools.namefind.NameFinderME.train(
+            parameters.getLanguage(), parameters.getType(), sampleStream,
+            mlParams, featureGeneratorBytes, resources);
        }
      }
      catch (IOException e) {

Modified: 
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
URL: 
http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1130898&r1=1130897&r2=1130898&view=diff
==============================================================================
--- 
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
 (original)
+++ 
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
 Fri Jun  3 05:34:34 2011
@@ -19,10 +19,7 @@
  package opennlp.tools.namefind;

  import java.io.ByteArrayInputStream;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
  import java.io.IOException;
-import java.io.InputStreamReader;
  import java.io.ObjectStreamException;
  import java.util.ArrayList;
  import java.util.Collections;
@@ -40,11 +37,8 @@ import opennlp.model.EventStream;
  import opennlp.model.MaxentModel;
  import opennlp.model.TrainUtil;
  import opennlp.model.TwoPassDataIndexer;
-import opennlp.tools.postag.POSSampleSequenceStream;
  import opennlp.tools.util.BeamSearch;
-import opennlp.tools.util.HashSumEventStream;
  import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.PlainTextByLineStream;
  import opennlp.tools.util.Sequence;
  import opennlp.tools.util.SequenceValidator;
  import opennlp.tools.util.Span;
@@ -61,8 +55,6 @@ import opennlp.tools.util.featuregen.Sen
  import opennlp.tools.util.featuregen.TokenClassFeatureGenerator;
  import opennlp.tools.util.featuregen.TokenFeatureGenerator;
  import opennlp.tools.util.featuregen.WindowFeatureGenerator;
-import opennlp.tools.util.model.BaseModel;
-import opennlp.tools.util.model.ModelUtil;

  /**
   * Class for creating a maximum-entropy-based name finder.
@@ -210,6 +202,26 @@ public class NameFinderME implements Tok
             });
    }

+  private static AdaptiveFeatureGenerator createFeatureGenerator(
+      byte[] generatorDescriptor, final Map<String, Object>  resources)
+      throws IOException {
+    AdaptiveFeatureGenerator featureGenerator;
+
+    if (generatorDescriptor != null) {
+      featureGenerator = GeneratorFactory.create(new ByteArrayInputStream(
+          generatorDescriptor), new FeatureGeneratorResourceProvider() {
+
+        public Object getResource(String key) {
+          return resources.get(key);
+        }
+      });
+    } else {
+      featureGenerator = null;
+    }
+
+    return featureGenerator;
+  }
+
    public Span[] find(String[] tokens) {
      return find(tokens, EMPTY);
    }
@@ -328,6 +340,26 @@ public class NameFinderME implements Tok
       return sprobs;
     }

+   /**
+    * Trains a name finder model.
+    *
+    * @param languageCode
+    *          the language of the training data
+    * @param type
+    *          null or an override type for all types in the training data
+    * @param samples
+    *          the training data
+    * @param trainParams
+    *          machine learning train parameters
+    * @param generator
+    *          null or the feature generator
+    * @param resources
+    *          the resources for the name finder or null if none
+    *
+    * @return the newly trained model
+    *
+    * @throws IOException
+    */
     public static TokenNameFinderModel train(String languageCode, String type, 
ObjectStream<NameSample>  samples,
         TrainingParameters trainParams, AdaptiveFeatureGenerator generator, final 
Map<String, Object>  resources) throws IOException {

@@ -358,6 +390,34 @@ public class NameFinderME implements Tok
           resources, manifestInfoEntries);
     }

+  /**
+   * Trains a name finder model.
+   *
+   * @param languageCode
+   *          the language of the training data
+   * @param type
+   *          null or an override type for all types in the training data
+   * @param samples
+   *          the training data
+   * @param trainParams
+   *          machine learning train parameters
+   * @param featureGeneratorBytes
+   *          descriptor to configure the feature generation or null
+   * @param resources
+   *          the resources for the name finder or null if none
+   *
+   * @return the newly trained model
+   *
+   * @throws IOException
+   */
+  public static TokenNameFinderModel train(String languageCode, String type,
+      ObjectStream<NameSample>  samples, TrainingParameters trainParams,
+      byte[] featureGeneratorBytes, final Map<String, Object>  resources)
+      throws IOException {
+    return train(languageCode, type, samples, trainParams,
+        createFeatureGenerator(featureGeneratorBytes, resources), resources);
+  }
+
     /**
      * Trains a name finder model.
      *
@@ -403,19 +463,7 @@ public class NameFinderME implements Tok

       // TODO: Pass in resource manager ...

-     AdaptiveFeatureGenerator featureGenerator;
-
-     if (generatorDescriptor != null) {
-       featureGenerator = GeneratorFactory.create(new 
ByteArrayInputStream(generatorDescriptor), new 
FeatureGeneratorResourceProvider() {
-
-        public Object getResource(String key) {
-          return resources.get(key);
-        }
-      });
-     }
-     else {
-       featureGenerator = null;
-     }
+     AdaptiveFeatureGenerator featureGenerator = 
createFeatureGenerator(generatorDescriptor, resources);

       TokenNameFinderModel model = train(languageCode, type, samples, 
featureGenerator,
           resources, iterations, cutoff);
@@ -427,7 +475,6 @@ public class NameFinderME implements Tok
       return model;
     }

-
    @Deprecated
    public static GISModel train(EventStream es, int iterations, int cut) 
throws IOException {
      return GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut));

Re: svn commit: r1130898 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: cmdline/namefind/TokenNameFinderTrainerTool.java namefind/NameFinderME.java

Reply via email to