Hi, I might be mistaken, but the train method you added also needs to place the descriptor in the model. Very similar to the train method which takes the descriptor, cutoff and iterations.
Jörn On 6/3/11 7:34 AM, [email protected] wrote:
Author: colen Date: Fri Jun 3 05:34:34 2011 New Revision: 1130898 URL: http://svn.apache.org/viewvc?rev=1130898&view=rev Log: OPENNLP-195 Added train method that takes params argument and the generatorDescriptor and resourceMap Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1130898&r1=1130897&r2=1130898&view=diff ============================================================================== --- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original) +++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Fri Jun 3 05:34:34 2011 @@ -22,11 +22,9 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; -import java.util.Collections; import java.util.HashMap; import java.util.Map; -import opennlp.model.TrainUtil; import opennlp.tools.cmdline.CLI; import opennlp.tools.cmdline.CmdLineTool; import opennlp.tools.cmdline.CmdLineUtil; @@ -187,8 +185,9 @@ public final class TokenNameFinderTraine parameters.getCutoff()); } else { - model = opennlp.tools.namefind.NameFinderME.train(parameters.getLanguage(), parameters.getType(), sampleStream, mlParams, null, - Collections.<String, Object>emptyMap()); + model = opennlp.tools.namefind.NameFinderME.train( + parameters.getLanguage(), parameters.getType(), sampleStream, + mlParams, featureGeneratorBytes, resources); } } catch (IOException e) { Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1130898&r1=1130897&r2=1130898&view=diff ============================================================================== --- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java (original) +++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java Fri Jun 3 05:34:34 2011 @@ -19,10 +19,7 @@ package opennlp.tools.namefind; import java.io.ByteArrayInputStream; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStreamReader; import java.io.ObjectStreamException; import java.util.ArrayList; import java.util.Collections; @@ -40,11 +37,8 @@ import opennlp.model.EventStream; import opennlp.model.MaxentModel; import opennlp.model.TrainUtil; import opennlp.model.TwoPassDataIndexer; -import opennlp.tools.postag.POSSampleSequenceStream; import opennlp.tools.util.BeamSearch; -import opennlp.tools.util.HashSumEventStream; import opennlp.tools.util.ObjectStream; -import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.Sequence; import opennlp.tools.util.SequenceValidator; import opennlp.tools.util.Span; @@ -61,8 +55,6 @@ import opennlp.tools.util.featuregen.Sen import opennlp.tools.util.featuregen.TokenClassFeatureGenerator; import opennlp.tools.util.featuregen.TokenFeatureGenerator; import opennlp.tools.util.featuregen.WindowFeatureGenerator; -import opennlp.tools.util.model.BaseModel; -import opennlp.tools.util.model.ModelUtil; /** * Class for creating a maximum-entropy-based name finder. @@ -210,6 +202,26 @@ public class NameFinderME implements Tok }); } + private static AdaptiveFeatureGenerator createFeatureGenerator( + byte[] generatorDescriptor, final Map<String, Object> resources) + throws IOException { + AdaptiveFeatureGenerator featureGenerator; + + if (generatorDescriptor != null) { + featureGenerator = GeneratorFactory.create(new ByteArrayInputStream( + generatorDescriptor), new FeatureGeneratorResourceProvider() { + + public Object getResource(String key) { + return resources.get(key); + } + }); + } else { + featureGenerator = null; + } + + return featureGenerator; + } + public Span[] find(String[] tokens) { return find(tokens, EMPTY); } @@ -328,6 +340,26 @@ public class NameFinderME implements Tok return sprobs; } + /** + * Trains a name finder model. + * + * @param languageCode + * the language of the training data + * @param type + * null or an override type for all types in the training data + * @param samples + * the training data + * @param trainParams + * machine learning train parameters + * @param generator + * null or the feature generator + * @param resources + * the resources for the name finder or null if none + * + * @return the newly trained model + * + * @throws IOException + */ public static TokenNameFinderModel train(String languageCode, String type, ObjectStream<NameSample> samples, TrainingParameters trainParams, AdaptiveFeatureGenerator generator, final Map<String, Object> resources) throws IOException { @@ -358,6 +390,34 @@ public class NameFinderME implements Tok resources, manifestInfoEntries); } + /** + * Trains a name finder model. + * + * @param languageCode + * the language of the training data + * @param type + * null or an override type for all types in the training data + * @param samples + * the training data + * @param trainParams + * machine learning train parameters + * @param featureGeneratorBytes + * descriptor to configure the feature generation or null + * @param resources + * the resources for the name finder or null if none + * + * @return the newly trained model + * + * @throws IOException + */ + public static TokenNameFinderModel train(String languageCode, String type, + ObjectStream<NameSample> samples, TrainingParameters trainParams, + byte[] featureGeneratorBytes, final Map<String, Object> resources) + throws IOException { + return train(languageCode, type, samples, trainParams, + createFeatureGenerator(featureGeneratorBytes, resources), resources); + } + /** * Trains a name finder model. * @@ -403,19 +463,7 @@ public class NameFinderME implements Tok // TODO: Pass in resource manager ... - AdaptiveFeatureGenerator featureGenerator; - - if (generatorDescriptor != null) { - featureGenerator = GeneratorFactory.create(new ByteArrayInputStream(generatorDescriptor), new FeatureGeneratorResourceProvider() { - - public Object getResource(String key) { - return resources.get(key); - } - }); - } - else { - featureGenerator = null; - } + AdaptiveFeatureGenerator featureGenerator = createFeatureGenerator(generatorDescriptor, resources); TokenNameFinderModel model = train(languageCode, type, samples, featureGenerator, resources, iterations, cutoff); @@ -427,7 +475,6 @@ public class NameFinderME implements Tok return model; } - @Deprecated public static GISModel train(EventStream es, int iterations, int cut) throws IOException { return GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut));
