DummyPOSTaggerFactoy.java -> DummyPOSTaggerFactoRy.java Aliaksandr
On Sun, Feb 12, 2012 at 2:17 AM, <[email protected]> wrote: > Author: colen > Date: Sun Feb 12 01:17:01 2012 > New Revision: 1243188 > > URL: http://svn.apache.org/viewvc?rev=1243188&view=rev > Log: > OPENNLP-429: Modified the BaseModel behavior to allow serializers provided > by tool factories. > > Changed BaseModel to allow loading artifacts and serializers in two steps. > The first will load basic artifacts and serializers, so we can load the > manifest. Latter we can load information from manifest (factory name), get > more serializers using this information, and finally loading more artifacts > and serializers. To do that I had to change the BaseModel constructor, > moving some of its code two methods that can be called by the sub-class at > the right time. > All Model implementations had to change to add the post constructor > actions; > > Added: > > > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java > (with props) > > > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java > (with props) > > > incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java > (with props) > > > incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java > (with props) > Modified: > > > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java > > > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefModel.java > > > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java > > > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java > > > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java > > > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java > > > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java > > > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java > > > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java > > Modified: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java > (original) > +++ > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java > Sun Feb 12 01:17:01 2012 > @@ -47,7 +47,8 @@ public class ChunkerModel extends BaseMo > super(COMPONENT_NAME, languageCode, manifestInfoEntries); > > artifactMap.put(CHUNKER_MODEL_ENTRY_NAME, chunkerModel); > - > + > + loadArtifactSerializers(); > checkArtifactMap(); > } > > @@ -57,6 +58,9 @@ public class ChunkerModel extends BaseMo > > public ChunkerModel(InputStream in) throws IOException, > InvalidFormatException { > super(COMPONENT_NAME, in); > + loadArtifactSerializers(); > + finishLoadingArtifacts(in); > + checkArtifactMap(); > } > > @Override > > Modified: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefModel.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefModel.java > (original) > +++ > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefModel.java > Sun Feb 12 01:17:01 2012 > @@ -110,6 +110,9 @@ public class CorefModel extends BaseMode > > artifactMap.put(PLURAL_PRONOUN_RESOLVER_MODEL_ENTRY_NAME, > createModel(project + File.separator + "tmodel.bin.gz")); > + > + loadArtifactSerializers(); > + checkArtifactMap(); > } > > private AbstractModel createModel(String fileName) throws IOException { > > Modified: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java > (original) > +++ > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java > Sun Feb 12 01:17:01 2012 > @@ -35,7 +35,7 @@ public class DoccatModel extends BaseMod > super(COMPONENT_NAME, languageCode, manifestInfoEntries); > > artifactMap.put(DOCCAT_MODEL_ENTRY_NAME, doccatModel); > - > + loadArtifactSerializers(); > checkArtifactMap(); > } > > @@ -45,6 +45,9 @@ public class DoccatModel extends BaseMod > > public DoccatModel(InputStream in) throws IOException, > InvalidFormatException { > super(COMPONENT_NAME, in); > + loadArtifactSerializers(); > + finishLoadingArtifacts(in); > + checkArtifactMap(); > } > > @Override > > Modified: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java > (original) > +++ > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java > Sun Feb 12 01:17:01 2012 > @@ -95,7 +95,7 @@ public class TokenNameFinderModel extend > // TODO: Add checks to not put resources where no serializer exists, > // make that case fail here, should be done in the BaseModel > artifactMap.putAll(resources); > - > + loadArtifactSerializers(); > checkArtifactMap(); > } > > @@ -106,6 +106,9 @@ public class TokenNameFinderModel extend > > public TokenNameFinderModel(InputStream in) throws IOException, > InvalidFormatException { > super(COMPONENT_NAME, in); > + loadArtifactSerializers(); > + finishLoadingArtifacts(in); > + checkArtifactMap(); > } > > /** > > Modified: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java > (original) > +++ > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java > Sun Feb 12 01:17:01 2012 > @@ -127,7 +127,7 @@ public class ParserModel extends BaseMod > artifactMap.put(CHUNKER_TAGGER_MODEL_ENTRY_NAME, chunkerTagger); > > artifactMap.put(HEAD_RULES_MODEL_ENTRY_NAME, headRules); > - > + loadArtifactSerializers(); > checkArtifactMap(); > } > > @@ -149,6 +149,9 @@ public class ParserModel extends BaseMod > > public ParserModel(InputStream in) throws IOException, > InvalidFormatException { > super(COMPONENT_NAME, in); > + loadArtifactSerializers(); > + finishLoadingArtifacts(in); > + checkArtifactMap(); > } > > @Override > > Modified: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java > (original) > +++ > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java > Sun Feb 12 01:17:01 2012 > @@ -30,6 +30,7 @@ import java.util.Set; > import opennlp.model.AbstractModel; > import opennlp.tools.dictionary.Dictionary; > import opennlp.tools.util.InvalidFormatException; > +import opennlp.tools.util.model.ArtifactProvider; > import opennlp.tools.util.model.ArtifactSerializer; > import opennlp.tools.util.model.BaseModel; > import opennlp.tools.util.model.UncloseableInputStream; > @@ -67,6 +68,8 @@ public final class POSModel extends Base > private static final String NGRAM_DICTIONARY_ENTRY_NAME = > "ngram.dictionary"; > private static final String FACTORY_NAME = "pos.factory"; > > + private POSTaggerFactory posTaggerFactory = null; > + > public POSModel(String languageCode, AbstractModel posModel, > POSDictionary tagDictionary, Dictionary ngramDict, Map<String, > String> manifestInfoEntries) { > > @@ -95,9 +98,12 @@ public final class POSModel extends Base > artifactMap.put(NGRAM_DICTIONARY_ENTRY_NAME, ngramDict); > > // The factory is optional > - if (posFactory!=null) > - setManifestProperty(FACTORY_NAME, > posFactory.getClass().getCanonicalName()); > + if (posFactory!=null) { > + setManifestProperty(FACTORY_NAME, > posFactory.getClass().getCanonicalName()); > + artifactMap.putAll(posFactory.createArtifactMap()); > + } > > + loadArtifactSerializers(); > checkArtifactMap(); > } > > @@ -108,6 +114,9 @@ public final class POSModel extends Base > > public POSModel(InputStream in) throws IOException, > InvalidFormatException { > super(COMPONENT_NAME, in); > + loadArtifactSerializers(); > + finishLoadingArtifacts(in); > + checkArtifactMap(); > } > > @Override > @@ -118,6 +127,9 @@ public final class POSModel extends Base > super.createArtifactSerializers(serializers); > > POSDictionarySerializer.register(serializers); > + > + if(getFactory() != null) > + serializers.putAll(getFactory().createArtifactSerializersMap()); > } > > @Override > @@ -192,10 +204,14 @@ public final class POSModel extends Base > * @return tag dictionary or null if not used > */ > public POSDictionary getTagDictionary() { > + if(getFactory() != null) > + return getFactory().getPOSDictionary(); > return (POSDictionary) artifactMap.get(TAG_DICTIONARY_ENTRY_NAME); > } > > public POSTaggerFactory getFactory() { > + if(this.posTaggerFactory != null) > + return this.posTaggerFactory; > String factoryName = getManifestProperty(FACTORY_NAME); > POSTaggerFactory theFactory = null; > Class<?> factoryClass = null; > @@ -211,8 +227,8 @@ public final class POSModel extends Base > Constructor<?> constructor = null; > if(factoryClass != null) { > try { > - constructor = factoryClass.getConstructor(Dictionary.class, > POSDictionary.class); > - theFactory = (POSTaggerFactory) > constructor.newInstance(getNgramDictionary(), getTagDictionary()); > + constructor = factoryClass.getConstructor(ArtifactProvider.class); > + theFactory = (POSTaggerFactory) constructor.newInstance(this); > } catch (NoSuchMethodException e) { > // ignore, will try another constructor > } catch (Exception e) { > > Modified: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java?rev=1243188&r1=1243187&r2=1243188&view=diff > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java > (original) > +++ > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java > Sun Feb 12 01:17:01 2012 > @@ -18,33 +18,61 @@ > package opennlp.tools.postag; > > import opennlp.tools.dictionary.Dictionary; > +import opennlp.tools.util.BaseToolFactory; > import opennlp.tools.util.SequenceValidator; > +import opennlp.tools.util.model.ArtifactProvider; > > -public class POSTaggerFactory { > +/** > + * > + */ > +public class POSTaggerFactory extends BaseToolFactory { > > protected Dictionary ngramDictionary; > protected POSDictionary posDictionary; > - > - public POSTaggerFactory() { > + > + /** > + * Creates a {@link POSTaggerFactory} that provides the default > implementation > + * of the resources. > + */ > + public POSTaggerFactory() { > } > > - public POSTaggerFactory(POSModel model) { > - if(model != null) { > - this.ngramDictionary = model.getNgramDictionary(); > - this.posDictionary = model.getTagDictionary(); > - } > + /** > + * Creates a {@link POSTaggerFactory} with an {@link ArtifactProvider} > that > + * will be used to retrieve artifacts. > + * <p> > + * Sub-classes should implement a constructor with this signatures and > call > + * this constructor. > + * <p> > + * This will be used to load the factory from a serialized POSModel. > + */ > + public POSTaggerFactory(ArtifactProvider artifactProvider) { > + super(artifactProvider); > } > > - public POSTaggerFactory(Dictionary ngramDictionary, POSDictionary > posDictionary) { > + /** > + * Creates a {@link POSTaggerFactory}. Use this constructor to > + * programmatically create a factory. > + * > + * @param ngramDictionary > + * @param posDictionary > + */ > + public POSTaggerFactory(Dictionary ngramDictionary, > + POSDictionary posDictionary) { > this.ngramDictionary = ngramDictionary; > this.posDictionary = posDictionary; > } > > + public POSDictionary getPOSDictionary() { > + return this.posDictionary; > + } > + > public POSContextGenerator getPOSContextGenerator() { > return new DefaultPOSContextGenerator(0, ngramDictionary); > } > > public SequenceValidator<String> getSequenceValidator() { > - return new DefaultPOSSequenceValidator(posDictionary); > + return new DefaultPOSSequenceValidator(getPOSDictionary()); > } > + > } > > Modified: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java > (original) > +++ > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java > Sun Feb 12 01:17:01 2012 > @@ -67,7 +67,7 @@ public class SentenceModel extends BaseM > // EOS characters are optional > if (eosCharacters!=null) > setManifestProperty(EOS_CHARACTERS_PROPERTY, > eosCharArrayToString(eosCharacters)); > - > + loadArtifactSerializers(); > checkArtifactMap(); > } > > @@ -80,6 +80,9 @@ public class SentenceModel extends BaseM > > public SentenceModel(InputStream in) throws IOException, > InvalidFormatException { > super(COMPONENT_NAME, in); > + loadArtifactSerializers(); > + finishLoadingArtifacts(in); > + checkArtifactMap(); > } > > @Override > > Modified: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java > (original) > +++ > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java > Sun Feb 12 01:17:01 2012 > @@ -69,7 +69,7 @@ public final class TokenizerModel extend > // Abbreviations are optional > if (abbreviations != null) > artifactMap.put(ABBREVIATIONS_ENTRY_NAME, abbreviations); > - > + loadArtifactSerializers(); > checkArtifactMap(); > } > > @@ -108,6 +108,9 @@ public final class TokenizerModel extend > */ > public TokenizerModel(InputStream in) throws IOException, > InvalidFormatException { > super(COMPONENT_NAME, in); > + loadArtifactSerializers(); > + finishLoadingArtifacts(in); > + checkArtifactMap(); > } > > /** > > Added: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java?rev=1243188&view=auto > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java > (added) > +++ > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java > Sun Feb 12 01:17:01 2012 > @@ -0,0 +1,79 @@ > +/* > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreemnets. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +package opennlp.tools.util; > + > +import java.util.HashMap; > +import java.util.Map; > + > +import opennlp.tools.util.model.ArtifactProvider; > +import opennlp.tools.util.model.ArtifactSerializer; > +import opennlp.tools.util.model.BaseModel; > + > +/** > + * Base class for all tool factories. > + * > + * Extensions of this class should: <li>implement an empty constructor > (TODO is > + * it necessary?) <li>implement a constructor that takes the > + * {@link ArtifactProvider} and calls {@link #BaseToolFactory(Map)} > <li>override > + * {@link #createArtifactMap()} and {@link > #createArtifactSerializersMap()} > + * methods if necessary. > + */ > +public abstract class BaseToolFactory { > + > + protected final ArtifactProvider artifactProvider; > + > + /** > + * All sub-classes should have an empty constructor > + */ > + public BaseToolFactory() { > + this.artifactProvider = null; > + } > + > + /** > + * All sub-classes should have a constructor whith this signature > + */ > + public BaseToolFactory(ArtifactProvider artifactProvider) { > + this.artifactProvider = artifactProvider; > + } > + > + /** > + * Creates a {@link Map} with pairs of keys and {@link > ArtifactSerializer}. > + * The models implementation should call this method from > + * {@link BaseModel#createArtifactSerializersMap} > + * <p> > + * The base implementation will return a {@link HashMap} that should be > + * populated by sub-classes. > + */ > + @SuppressWarnings("rawtypes") > + public Map<String, ArtifactSerializer> createArtifactSerializersMap() { > + return new HashMap<String, ArtifactSerializer>(); > + } > + > + /** > + * Creates a {@link Map} with pairs of keys and objects. The models > + * implementation should call this constructor that creates a model > + * programmatically. > + * <p> > + * The base implementation will return a {@link HashMap} that should be > + * populated by sub-classes. > + */ > + public Map<String, Object> createArtifactMap() { > + return new HashMap<String, Object>(); > + } > + > +} > > Propchange: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java > > ------------------------------------------------------------------------------ > svn:mime-type = text/plain > > Added: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java?rev=1243188&view=auto > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java > (added) > +++ > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java > Sun Feb 12 01:17:01 2012 > @@ -0,0 +1,30 @@ > +/* > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreemnets. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +package opennlp.tools.util.model; > + > +/** > + * Provides access to model persisted artifacts. > + */ > +public interface ArtifactProvider { > + > + /** > + * Gets an artifact by name > + */ > + public <T> T getArtifact(String key); > + > +} > > Propchange: > incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java > > ------------------------------------------------------------------------------ > svn:mime-type = text/plain > > Added: > incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java?rev=1243188&view=auto > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java > (added) > +++ > incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java > Sun Feb 12 01:17:01 2012 > @@ -0,0 +1,131 @@ > +/* > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +package opennlp.tools.postag; > + > +import java.io.IOException; > +import java.io.InputStream; > +import java.io.OutputStream; > +import java.util.Map; > + > +import opennlp.tools.dictionary.Dictionary; > +import opennlp.tools.util.InvalidFormatException; > +import opennlp.tools.util.SequenceValidator; > +import opennlp.tools.util.model.ArtifactProvider; > +import opennlp.tools.util.model.ArtifactSerializer; > +import opennlp.tools.util.model.UncloseableInputStream; > + > +public class DummyPOSTaggerFactoy extends POSTaggerFactory { > + > + > + private static final String DUMMY_POSDICT = "DUMMY_POSDICT"; > + private DummyPOSDictionary dict; > + > + public DummyPOSTaggerFactoy(Dictionary ngramDictionary, > DummyPOSDictionary posDictionary) { > + super(ngramDictionary, null); > + this.dict = posDictionary; > + } > + > + public DummyPOSTaggerFactoy(ArtifactProvider artifactProvider) { > + super(artifactProvider); > + } > + > + @Override > + public SequenceValidator<String> getSequenceValidator() { > + return new DummyPOSSequenceValidator(); > + } > + > + public POSDictionary getPOSDictionary() { > + return (POSDictionary) artifactProvider.getArtifact(DUMMY_POSDICT); > + } > + > + @Override > + public POSContextGenerator getPOSContextGenerator() { > + return new DummyPOSContextGenerator(this.ngramDictionary); > + } > + > + @Override > + @SuppressWarnings("rawtypes") > + public Map<String, ArtifactSerializer> createArtifactSerializersMap() { > + Map<String, ArtifactSerializer> serializers = > super.createArtifactSerializersMap(); > + > + serializers.put(DUMMY_POSDICT, new DummyPOSDictionarySerializer()); > + return serializers; > + } > + > + @Override > + public Map<String, Object> createArtifactMap() { > + Map<String, Object> artifactMap = super.createArtifactMap(); > + if(this.dict != null) > + artifactMap.put(DUMMY_POSDICT, this.dict); > + return artifactMap; > + } > + > + static class DummyPOSContextGenerator extends > DefaultPOSContextGenerator { > + > + public DummyPOSContextGenerator(Dictionary dict) { > + super(dict); > + } > + > + } > + > + static class DummyPOSDictionarySerializer implements > ArtifactSerializer<DummyPOSDictionary> { > + > + public DummyPOSDictionary create(InputStream in) throws IOException, > + InvalidFormatException { > + return DummyPOSDictionary.create(new UncloseableInputStream(in)); > + } > + > + public void serialize(DummyPOSDictionary artifact, OutputStream out) > + throws IOException { > + artifact.serialize(out); > + } > + } > + > + static class DummyPOSSequenceValidator implements > SequenceValidator<String> { > + > + public boolean validSequence(int i, String[] inputSequence, > + String[] outcomesSequence, String outcome) { > + return true; > + } > + > + } > + > + static class DummyPOSDictionary extends POSDictionary { > + > + private POSDictionary dict; > + > + public DummyPOSDictionary(POSDictionary dict) { > + this.dict = dict; > + } > + > + public static DummyPOSDictionary create( > + UncloseableInputStream uncloseableInputStream) throws > InvalidFormatException, IOException { > + return new > DummyPOSDictionary(POSDictionary.create(uncloseableInputStream)); > + } > + > + public void serialize(OutputStream out) throws IOException { > + dict.serialize(out); > + } > + > + public String[] getTags(String word) { > + return dict.getTags(word); > + } > + > + } > + > +} > \ No newline at end of file > > Propchange: > incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java > > ------------------------------------------------------------------------------ > svn:mime-type = text/plain > > Added: > incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java > URL: > http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java?rev=1243188&view=auto > > ============================================================================== > --- > incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java > (added) > +++ > incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java > Sun Feb 12 01:17:01 2012 > @@ -0,0 +1,88 @@ > +/* > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +package opennlp.tools.postag; > + > +import static org.junit.Assert.*; > + > +import java.io.ByteArrayInputStream; > +import java.io.ByteArrayOutputStream; > +import java.io.IOException; > +import java.io.InputStream; > +import java.io.InputStreamReader; > + > +import opennlp.tools.postag.DummyPOSTaggerFactoy.DummyPOSContextGenerator; > +import opennlp.tools.postag.DummyPOSTaggerFactoy.DummyPOSDictionary; > +import > opennlp.tools.postag.DummyPOSTaggerFactoy.DummyPOSSequenceValidator; > +import opennlp.tools.util.ObjectStream; > +import opennlp.tools.util.TrainingParameters; > +import opennlp.tools.util.model.ModelType; > + > +import org.junit.Test; > + > +/** > + * Tests for the {@link POSTaggerFactory} class. > + */ > +public class POSTaggerFactoryTest { > + > + private static ObjectStream<POSSample> createSampleStream() > + throws IOException { > + InputStream in = POSTaggerFactoryTest.class.getClassLoader() > + > .getResourceAsStream("opennlp/tools/postag/AnnotatedSentences.txt"); > + > + return new WordTagSampleStream((new InputStreamReader(in))); > + } > + > + static POSModel trainPOSModel(ModelType type, POSTaggerFactory factory) > + throws IOException { > + return POSTaggerME.train("en", createSampleStream(), > + TrainingParameters.defaultParams(), factory, null, null); > + } > + > + @Test > + public void testPOSTaggerWithCustomFactory() throws IOException { > + DummyPOSDictionary posDict = new DummyPOSDictionary( > + POSDictionary.create(POSDictionaryTest.class > + .getResourceAsStream("TagDictionaryCaseSensitive.xml"))); > + > + POSModel posModel = trainPOSModel(ModelType.MAXENT, > + new DummyPOSTaggerFactoy(null, posDict)); > + > + POSTaggerFactory factory = posModel.getFactory(); > + assertTrue(factory.getPOSDictionary() instanceof DummyPOSDictionary); > + assertTrue(factory.getPOSContextGenerator() instanceof > DummyPOSContextGenerator); > + assertTrue(factory.getSequenceValidator() instanceof > DummyPOSSequenceValidator); > + > + ByteArrayOutputStream out = new ByteArrayOutputStream(); > + posModel.serialize(out); > + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); > + > + POSModel fromSerialized = new POSModel(in); > + > + factory = fromSerialized.getFactory(); > + assertTrue(factory.getPOSDictionary() instanceof DummyPOSDictionary); > + assertTrue(factory.getPOSContextGenerator() instanceof > DummyPOSContextGenerator); > + assertTrue(factory.getSequenceValidator() instanceof > DummyPOSSequenceValidator); > + } > + > + @Test > + public void testBuildNGramDictionary() throws IOException { > + ObjectStream<POSSample> samples = createSampleStream(); > + > + POSTaggerME.buildNGramDictionary(samples, 0); > + } > +} > \ No newline at end of file > > Propchange: > incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java > > ------------------------------------------------------------------------------ > svn:mime-type = text/plain > > >
