I have a lot of data laying around. How do I train it?
On Mon, Nov 25, 2013 at 3:02 PM, Jörn Kottmann <[email protected]> wrote: > Actually that code should have compiled just fine against maxent 3.0.3. > > Anyway, the reason for the seperation from opennlp-tools is that we need > to first build/finish the tooling > to train the coref component. In my opinion this will be easier if we just > let the code continue to use the old > maxent library. After that is accomplished we could start updating and > refactoring it and re-integrate it into opennlp-tools. > > Do you have some data sets you could train it on? I am happy to provide > assitance and point out issues I encountered. > > Jörn > > > On 11/24/2013 04:08 AM, [email protected] wrote: > >> Author: markg >> Date: Sun Nov 24 03:08:54 2013 >> New Revision: 1544904 >> >> URL: http://svn.apache.org/r1544904 >> Log: >> OPENNLP-621 >> Fixed errors and changed all approprate imports to opennlp.tools.ml. >> Builds but no testing done yet. >> >> Modified: >> opennlp/sandbox/opennlp-coref/ (props changed) >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/CorefModel.java >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/resolver/DefaultNonReferentialResolver.java >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/resolver/MaxentResolver.java >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/sim/GenderModel.java >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/sim/NumberModel.java >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/sim/SimilarityModel.java >> >> Propchange: opennlp/sandbox/opennlp-coref/ >> ------------------------------------------------------------ >> ------------------ >> --- svn:ignore (added) >> +++ svn:ignore Sun Nov 24 03:08:54 2013 >> @@ -0,0 +1 @@ >> +target >> >> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/CorefModel.java >> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/ >> src/main/java/opennlp/tools/coref/CorefModel.java?rev= >> 1544904&r1=1544903&r2=1544904&view=diff >> ============================================================ >> ================== >> --- >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/CorefModel.java >> (original) >> +++ >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/CorefModel.java >> Sun Nov 24 03:08:54 2013 >> @@ -26,9 +26,10 @@ import java.io.FileOutputStream; >> import java.io.FileReader; >> import java.io.IOException; >> import java.util.zip.GZIPInputStream; >> - >> -import opennlp.maxent.io.BinaryGISModelReader; >> -import opennlp.model.AbstractModel; >> +import opennlp.tools.ml.maxent.io.BinaryGISModelReader; >> +//import opennlp.maxent.io.BinaryGISModelReader; >> +//import opennlp.model.AbstractModel; >> +import opennlp.tools.ml.model.AbstractModel; >> import opennlp.tools.dictionary.Dictionary; >> import opennlp.tools.util.StringList; >> import opennlp.tools.util.model.BaseModel; >> >> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/resolver/DefaultNonReferentialResolver.java >> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/ >> src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver. >> java?rev=1544904&r1=1544903&r2=1544904&view=diff >> ============================================================ >> ================== >> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/resolver/DefaultNonReferentialResolver.java (original) >> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/resolver/DefaultNonReferentialResolver.java Sun Nov 24 03:08:54 >> 2013 >> @@ -25,14 +25,26 @@ import java.util.ArrayList; >> import java.util.Iterator; >> import java.util.List; >> -import opennlp.maxent.GIS; >> -import opennlp.maxent.io.BinaryGISModelReader; >> -import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> -import opennlp.model.Event; >> -import opennlp.model.MaxentModel; >> +//import opennlp.maxent.GIS; >> +//import opennlp.maxent.io.BinaryGISModelReader; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> +//import opennlp.maxent.GIS; >> +import opennlp.tools.ml.maxent.io.BinaryGISModelReader; >> +import opennlp.tools.ml.maxent.GIS; >> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter; >> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> +//import opennlp.model.Event; >> +import opennlp.tools.ml.model.MaxentModel; >> +//import opennlp.model.MaxentModel; >> + >> +import opennlp.tools.ml.model.EventStream; >> +//import opennlp.model.MaxentModel; >> import opennlp.tools.coref.mention.MentionContext; >> import opennlp.tools.coref.mention.Parse; >> +import opennlp.tools.ml.model.Event; >> import opennlp.tools.util.CollectionEventStream; >> /** >> @@ -124,7 +136,7 @@ public class DefaultNonReferentialResolv >> } >> writer.close(); >> } >> - (new SuffixSensitiveGISModelWriter(GIS.trainModel(new >> CollectionEventStream(events),100,10),new File(modelName+modelExtension) >> )).persist(); >> + (new SuffixSensitiveGISModelWriter(GIS.trainModel((EventStream)new >> CollectionEventStream(events),100,10),new File(modelName+modelExtension) >> )).persist(); >> } >> } >> } >> >> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/resolver/MaxentResolver.java >> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/ >> src/main/java/opennlp/tools/coref/resolver/MaxentResolver. >> java?rev=1544904&r1=1544903&r2=1544904&view=diff >> ============================================================ >> ================== >> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/resolver/MaxentResolver.java (original) >> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/resolver/MaxentResolver.java Sun Nov 24 03:08:54 2013 >> @@ -24,15 +24,28 @@ import java.util.ArrayList; >> import java.util.Iterator; >> import java.util.List; >> -import opennlp.maxent.GIS; >> -import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> -import opennlp.model.Event; >> -import opennlp.model.MaxentModel; >> +//import opennlp.maxent.GIS; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> +//import opennlp.model.EventStream; >> +//import opennlp.model.MaxentModel; >> + >> + >> +import opennlp.tools.ml.maxent.GIS; >> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter; >> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.GIS; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> +//import opennlp.model.Event; >> +//import opennlp.model.MaxentModel; >> +import opennlp.tools.ml.model.MaxentModel; >> +import opennlp.tools.ml.model.EventStream; >> import opennlp.tools.coref.DiscourseEntity; >> import opennlp.tools.coref.DiscourseModel; >> import opennlp.tools.coref.mention.MentionContext; >> import opennlp.tools.coref.sim.TestSimilarityModel; >> +import opennlp.tools.ml.model.Event; >> import opennlp.tools.util.CollectionEventStream; >> /** >> @@ -55,7 +68,7 @@ public abstract class MaxentResolver ext >> private double[] candProbs; >> private int sameIndex; >> private ResolverMode mode; >> - private List<Event> events; >> + private List<opennlp.tools.ml.model.Event> events; >> /** When true, this designates that the resolver should use the >> first referent encountered which it >> * more preferable than non-reference. When false all non-excluded >> referents within this resolvers range >> @@ -314,7 +327,7 @@ public abstract class MaxentResolver ext >> } >> writer.close(); >> } >> - (new SuffixSensitiveGISModelWriter(GIS.trainModel(new >> CollectionEventStream(events),100,10),new File(modelName+modelExtension) >> )).persist(); >> + (new SuffixSensitiveGISModelWriter(GIS.trainModel((EventStream)new >> CollectionEventStream(events),100,10),new File(modelName+modelExtension) >> )).persist(); >> nonReferentialResolver.train(); >> } >> } >> >> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/sim/GenderModel.java >> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/ >> src/main/java/opennlp/tools/coref/sim/GenderModel.java? >> rev=1544904&r1=1544903&r2=1544904&view=diff >> ============================================================ >> ================== >> --- >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java >> (original) >> +++ >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java >> Sun Nov 24 03:08:54 2013 >> @@ -25,17 +25,26 @@ import java.io.FileWriter; >> import java.io.IOException; >> import java.io.InputStreamReader; >> import java.util.ArrayList; >> +import java.util.Collection; >> import java.util.HashSet; >> import java.util.Iterator; >> import java.util.List; >> import java.util.Set; >> -import opennlp.maxent.GIS; >> -import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> -import opennlp.model.Event; >> -import opennlp.model.MaxentModel; >> +//import opennlp.maxent.GIS; >> +import opennlp.tools.ml.maxent.GIS; >> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter; >> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> +//import opennlp.model.Event; >> +import opennlp.tools.ml.model.Event; >> +import opennlp.tools.ml.model.MaxentModel; >> +//import opennlp.model.MaxentModel; >> import opennlp.tools.coref.resolver.ResolverUtils; >> +import opennlp.tools.ml.model.AbstractModel; >> + >> +import opennlp.tools.ml.model.EventStream; >> import opennlp.tools.util.CollectionEventStream; >> import opennlp.tools.util.HashList; >> @@ -51,7 +60,7 @@ public class GenderModel implements Test >> private String modelName; >> private String modelExtension = ".bin.gz"; >> private MaxentModel testModel; >> - private List<Event> events; >> + private Collection<Event> events; >> private boolean debugOn = true; >> private Set<String> maleNames; >> @@ -267,9 +276,8 @@ public class GenderModel implements Test >> writer.close(); >> } >> new SuffixSensitiveGISModelWriter( >> - GIS.trainModel( >> - new CollectionEventStream(events), true), >> - new File(modelName+modelExtension)).persist(); >> + // GIS.trainModel((EventStream)new >> CollectionEventStream(events), true)).persist(); >> + (AbstractModel) GIS.trainModel((EventStream)new >> CollectionEventStream(events), true), new File(modelName+modelExtension) >> ).persist(); >> } >> public int getFemaleIndex() { >> >> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/sim/NumberModel.java >> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/ >> src/main/java/opennlp/tools/coref/sim/NumberModel.java? >> rev=1544904&r1=1544903&r2=1544904&view=diff >> ============================================================ >> ================== >> --- >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java >> (original) >> +++ >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java >> Sun Nov 24 03:08:54 2013 >> @@ -22,12 +22,16 @@ import java.io.IOException; >> import java.util.ArrayList; >> import java.util.Iterator; >> import java.util.List; >> - >> -import opennlp.maxent.GIS; >> -import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> -import opennlp.model.Event; >> -import opennlp.model.MaxentModel; >> +import opennlp.tools.ml.maxent.GIS; >> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter; >> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.GIS; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> +//import opennlp.model.Event; >> +import opennlp.tools.ml.model.Event; >> +//import opennlp.model.MaxentModel; >> +import opennlp.tools.ml.model.MaxentModel; >> import opennlp.tools.coref.resolver.ResolverUtils; >> import opennlp.tools.util.CollectionEventStream; >> import opennlp.tools.util.HashList; >> >> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/ >> coref/sim/SimilarityModel.java >> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/ >> src/main/java/opennlp/tools/coref/sim/SimilarityModel. >> java?rev=1544904&r1=1544903&r2=1544904&view=diff >> ============================================================ >> ================== >> --- >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java >> (original) >> +++ >> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java >> Sun Nov 24 03:08:54 2013 >> @@ -29,12 +29,17 @@ import java.util.Iterator; >> import java.util.List; >> import java.util.Map; >> import java.util.Set; >> - >> -import opennlp.maxent.GIS; >> -import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> -import opennlp.model.Event; >> -import opennlp.model.MaxentModel; >> +import opennlp.tools.ml.maxent.GIS; >> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter; >> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.GIS; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader; >> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter; >> +import opennlp.tools.ml.model.Event; >> +//import opennlp.model.MaxentModel; >> +import opennlp.tools.ml.model.MaxentModel; >> +//import opennlp.model.Event; >> +//import opennlp.model.MaxentModel; >> import opennlp.tools.coref.resolver.ResolverUtils; >> import opennlp.tools.util.CollectionEventStream; >> import opennlp.tools.util.HashList; >> >> >> >
