DummyPOSTaggerFactoy.java  ->  DummyPOSTaggerFactoRy.java

Aliaksandr

On Sun, Feb 12, 2012 at 2:17 AM, <[email protected]> wrote:

> Author: colen
> Date: Sun Feb 12 01:17:01 2012
> New Revision: 1243188
>
> URL: http://svn.apache.org/viewvc?rev=1243188&view=rev
> Log:
> OPENNLP-429: Modified the BaseModel behavior to allow serializers provided
> by tool factories.
>
> Changed BaseModel to allow loading artifacts and serializers in two steps.
> The first will load basic artifacts and serializers, so we can load the
> manifest. Latter we can load information from manifest (factory name), get
> more serializers using this information, and finally loading more artifacts
> and serializers. To do that I had to change the BaseModel constructor,
> moving some of its code two methods that can be called by the sub-class at
> the right time.
> All Model implementations had to change to add the post constructor
> actions;
>
> Added:
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
>   (with props)
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java
>   (with props)
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java
>   (with props)
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
>   (with props)
> Modified:
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefModel.java
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java
>
>  
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
>
> Modified:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
> (original)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
> Sun Feb 12 01:17:01 2012
> @@ -47,7 +47,8 @@ public class ChunkerModel extends BaseMo
>     super(COMPONENT_NAME, languageCode, manifestInfoEntries);
>
>     artifactMap.put(CHUNKER_MODEL_ENTRY_NAME, chunkerModel);
> -
> +
> +    loadArtifactSerializers();
>     checkArtifactMap();
>   }
>
> @@ -57,6 +58,9 @@ public class ChunkerModel extends BaseMo
>
>   public ChunkerModel(InputStream in) throws IOException,
> InvalidFormatException {
>     super(COMPONENT_NAME, in);
> +    loadArtifactSerializers();
> +    finishLoadingArtifacts(in);
> +    checkArtifactMap();
>   }
>
>   @Override
>
> Modified:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefModel.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefModel.java
> (original)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/coref/CorefModel.java
> Sun Feb 12 01:17:01 2012
> @@ -110,6 +110,9 @@ public class CorefModel extends BaseMode
>
>     artifactMap.put(PLURAL_PRONOUN_RESOLVER_MODEL_ENTRY_NAME,
>         createModel(project + File.separator + "tmodel.bin.gz"));
> +
> +    loadArtifactSerializers();
> +    checkArtifactMap();
>   }
>
>   private AbstractModel createModel(String fileName) throws IOException {
>
> Modified:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
> (original)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
> Sun Feb 12 01:17:01 2012
> @@ -35,7 +35,7 @@ public class DoccatModel extends BaseMod
>     super(COMPONENT_NAME, languageCode, manifestInfoEntries);
>
>     artifactMap.put(DOCCAT_MODEL_ENTRY_NAME, doccatModel);
> -
> +    loadArtifactSerializers();
>     checkArtifactMap();
>   }
>
> @@ -45,6 +45,9 @@ public class DoccatModel extends BaseMod
>
>   public DoccatModel(InputStream in) throws IOException,
> InvalidFormatException {
>     super(COMPONENT_NAME, in);
> +    loadArtifactSerializers();
> +    finishLoadingArtifacts(in);
> +    checkArtifactMap();
>   }
>
>   @Override
>
> Modified:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
> (original)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
> Sun Feb 12 01:17:01 2012
> @@ -95,7 +95,7 @@ public class TokenNameFinderModel extend
>     // TODO: Add checks to not put resources where no serializer exists,
>     // make that case fail here, should be done in the BaseModel
>     artifactMap.putAll(resources);
> -
> +    loadArtifactSerializers();
>     checkArtifactMap();
>   }
>
> @@ -106,6 +106,9 @@ public class TokenNameFinderModel extend
>
>   public TokenNameFinderModel(InputStream in) throws IOException,
> InvalidFormatException {
>     super(COMPONENT_NAME, in);
> +    loadArtifactSerializers();
> +    finishLoadingArtifacts(in);
> +    checkArtifactMap();
>   }
>
>   /**
>
> Modified:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
> (original)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
> Sun Feb 12 01:17:01 2012
> @@ -127,7 +127,7 @@ public class ParserModel extends BaseMod
>     artifactMap.put(CHUNKER_TAGGER_MODEL_ENTRY_NAME, chunkerTagger);
>
>     artifactMap.put(HEAD_RULES_MODEL_ENTRY_NAME, headRules);
> -
> +    loadArtifactSerializers();
>     checkArtifactMap();
>   }
>
> @@ -149,6 +149,9 @@ public class ParserModel extends BaseMod
>
>   public ParserModel(InputStream in) throws IOException,
> InvalidFormatException {
>     super(COMPONENT_NAME, in);
> +    loadArtifactSerializers();
> +    finishLoadingArtifacts(in);
> +    checkArtifactMap();
>   }
>
>   @Override
>
> Modified:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
> (original)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
> Sun Feb 12 01:17:01 2012
> @@ -30,6 +30,7 @@ import java.util.Set;
>  import opennlp.model.AbstractModel;
>  import opennlp.tools.dictionary.Dictionary;
>  import opennlp.tools.util.InvalidFormatException;
> +import opennlp.tools.util.model.ArtifactProvider;
>  import opennlp.tools.util.model.ArtifactSerializer;
>  import opennlp.tools.util.model.BaseModel;
>  import opennlp.tools.util.model.UncloseableInputStream;
> @@ -67,6 +68,8 @@ public final class POSModel extends Base
>   private static final String NGRAM_DICTIONARY_ENTRY_NAME =
> "ngram.dictionary";
>   private static final String FACTORY_NAME = "pos.factory";
>
> +  private POSTaggerFactory posTaggerFactory = null;
> +
>   public POSModel(String languageCode, AbstractModel posModel,
>       POSDictionary tagDictionary, Dictionary ngramDict, Map<String,
> String> manifestInfoEntries) {
>
> @@ -95,9 +98,12 @@ public final class POSModel extends Base
>       artifactMap.put(NGRAM_DICTIONARY_ENTRY_NAME, ngramDict);
>
>     // The factory is optional
> -    if (posFactory!=null)
> -        setManifestProperty(FACTORY_NAME,
> posFactory.getClass().getCanonicalName());
> +    if (posFactory!=null) {
> +      setManifestProperty(FACTORY_NAME,
> posFactory.getClass().getCanonicalName());
> +      artifactMap.putAll(posFactory.createArtifactMap());
> +    }
>
> +    loadArtifactSerializers();
>     checkArtifactMap();
>   }
>
> @@ -108,6 +114,9 @@ public final class POSModel extends Base
>
>   public POSModel(InputStream in) throws IOException,
> InvalidFormatException {
>     super(COMPONENT_NAME, in);
> +    loadArtifactSerializers();
> +    finishLoadingArtifacts(in);
> +    checkArtifactMap();
>   }
>
>   @Override
> @@ -118,6 +127,9 @@ public final class POSModel extends Base
>     super.createArtifactSerializers(serializers);
>
>     POSDictionarySerializer.register(serializers);
> +
> +    if(getFactory() != null)
> +      serializers.putAll(getFactory().createArtifactSerializersMap());
>   }
>
>   @Override
> @@ -192,10 +204,14 @@ public final class POSModel extends Base
>    * @return tag dictionary or null if not used
>    */
>   public POSDictionary getTagDictionary() {
> +    if(getFactory() != null)
> +      return getFactory().getPOSDictionary();
>     return (POSDictionary) artifactMap.get(TAG_DICTIONARY_ENTRY_NAME);
>   }
>
>   public POSTaggerFactory getFactory() {
> +    if(this.posTaggerFactory != null)
> +      return this.posTaggerFactory;
>     String factoryName = getManifestProperty(FACTORY_NAME);
>     POSTaggerFactory theFactory = null;
>     Class<?> factoryClass = null;
> @@ -211,8 +227,8 @@ public final class POSModel extends Base
>     Constructor<?> constructor = null;
>     if(factoryClass != null) {
>       try {
> -        constructor = factoryClass.getConstructor(Dictionary.class,
> POSDictionary.class);
> -        theFactory = (POSTaggerFactory)
> constructor.newInstance(getNgramDictionary(), getTagDictionary());
> +        constructor = factoryClass.getConstructor(ArtifactProvider.class);
> +        theFactory = (POSTaggerFactory) constructor.newInstance(this);
>       } catch (NoSuchMethodException e) {
>         // ignore, will try another constructor
>       } catch (Exception e) {
>
> Modified:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java?rev=1243188&r1=1243187&r2=1243188&view=diff
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
> (original)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
> Sun Feb 12 01:17:01 2012
> @@ -18,33 +18,61 @@
>  package opennlp.tools.postag;
>
>  import opennlp.tools.dictionary.Dictionary;
> +import opennlp.tools.util.BaseToolFactory;
>  import opennlp.tools.util.SequenceValidator;
> +import opennlp.tools.util.model.ArtifactProvider;
>
> -public class POSTaggerFactory {
> +/**
> + *
> + */
> +public class POSTaggerFactory extends BaseToolFactory {
>
>   protected Dictionary ngramDictionary;
>   protected POSDictionary posDictionary;
> -
> -  public POSTaggerFactory() {
> +
> +  /**
> +   * Creates a {@link POSTaggerFactory} that provides the default
> implementation
> +   * of the resources.
> +   */
> +  public POSTaggerFactory() {
>   }
>
> -  public POSTaggerFactory(POSModel model) {
> -    if(model != null) {
> -      this.ngramDictionary = model.getNgramDictionary();
> -      this.posDictionary = model.getTagDictionary();
> -    }
> +  /**
> +   * Creates a {@link POSTaggerFactory} with an {@link ArtifactProvider}
> that
> +   * will be used to retrieve artifacts.
> +   * <p>
> +   * Sub-classes should implement a constructor with this signatures and
> call
> +   * this constructor.
> +   * <p>
> +   * This will be used to load the factory from a serialized POSModel.
> +   */
> +  public POSTaggerFactory(ArtifactProvider artifactProvider) {
> +    super(artifactProvider);
>   }
>
> -  public POSTaggerFactory(Dictionary ngramDictionary, POSDictionary
> posDictionary) {
> +  /**
> +   * Creates a {@link POSTaggerFactory}. Use this constructor to
> +   * programmatically create a factory.
> +   *
> +   * @param ngramDictionary
> +   * @param posDictionary
> +   */
> +  public POSTaggerFactory(Dictionary ngramDictionary,
> +      POSDictionary posDictionary) {
>     this.ngramDictionary = ngramDictionary;
>     this.posDictionary = posDictionary;
>   }
>
> +  public POSDictionary getPOSDictionary() {
> +    return this.posDictionary;
> +  }
> +
>   public POSContextGenerator getPOSContextGenerator() {
>     return new DefaultPOSContextGenerator(0, ngramDictionary);
>   }
>
>   public SequenceValidator<String> getSequenceValidator() {
> -    return new DefaultPOSSequenceValidator(posDictionary);
> +    return new DefaultPOSSequenceValidator(getPOSDictionary());
>   }
> +
>  }
>
> Modified:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java
> (original)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java
> Sun Feb 12 01:17:01 2012
> @@ -67,7 +67,7 @@ public class SentenceModel extends BaseM
>     // EOS characters are optional
>     if (eosCharacters!=null)
>       setManifestProperty(EOS_CHARACTERS_PROPERTY,
> eosCharArrayToString(eosCharacters));
> -
> +    loadArtifactSerializers();
>     checkArtifactMap();
>   }
>
> @@ -80,6 +80,9 @@ public class SentenceModel extends BaseM
>
>   public SentenceModel(InputStream in) throws IOException,
> InvalidFormatException {
>     super(COMPONENT_NAME, in);
> +    loadArtifactSerializers();
> +    finishLoadingArtifacts(in);
> +    checkArtifactMap();
>   }
>
>   @Override
>
> Modified:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java?rev=1243188&r1=1243187&r2=1243188&view=diff
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
> (original)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
> Sun Feb 12 01:17:01 2012
> @@ -69,7 +69,7 @@ public final class TokenizerModel extend
>     // Abbreviations are optional
>     if (abbreviations != null)
>       artifactMap.put(ABBREVIATIONS_ENTRY_NAME, abbreviations);
> -
> +    loadArtifactSerializers();
>     checkArtifactMap();
>   }
>
> @@ -108,6 +108,9 @@ public final class TokenizerModel extend
>    */
>   public TokenizerModel(InputStream in) throws IOException,
> InvalidFormatException {
>     super(COMPONENT_NAME, in);
> +    loadArtifactSerializers();
> +    finishLoadingArtifacts(in);
> +    checkArtifactMap();
>   }
>
>   /**
>
> Added:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java?rev=1243188&view=auto
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
> (added)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
> Sun Feb 12 01:17:01 2012
> @@ -0,0 +1,79 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreemnets.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +package opennlp.tools.util;
> +
> +import java.util.HashMap;
> +import java.util.Map;
> +
> +import opennlp.tools.util.model.ArtifactProvider;
> +import opennlp.tools.util.model.ArtifactSerializer;
> +import opennlp.tools.util.model.BaseModel;
> +
> +/**
> + * Base class for all tool factories.
> + *
> + * Extensions of this class should: <li>implement an empty constructor
> (TODO is
> + * it necessary?) <li>implement a constructor that takes the
> + * {@link ArtifactProvider} and calls {@link #BaseToolFactory(Map)}
> <li>override
> + * {@link #createArtifactMap()} and {@link
> #createArtifactSerializersMap()}
> + * methods if necessary.
> + */
> +public abstract class BaseToolFactory {
> +
> +  protected final ArtifactProvider artifactProvider;
> +
> +  /**
> +   * All sub-classes should have an empty constructor
> +   */
> +  public BaseToolFactory() {
> +    this.artifactProvider = null;
> +  }
> +
> +  /**
> +   * All sub-classes should have a constructor whith this signature
> +   */
> +  public BaseToolFactory(ArtifactProvider artifactProvider) {
> +    this.artifactProvider = artifactProvider;
> +  }
> +
> +  /**
> +   * Creates a {@link Map} with pairs of keys and {@link
> ArtifactSerializer}.
> +   * The models implementation should call this method from
> +   * {@link BaseModel#createArtifactSerializersMap}
> +   * <p>
> +   * The base implementation will return a {@link HashMap} that should be
> +   * populated by sub-classes.
> +   */
> +  @SuppressWarnings("rawtypes")
> +  public Map<String, ArtifactSerializer> createArtifactSerializersMap() {
> +    return new HashMap<String, ArtifactSerializer>();
> +  }
> +
> +  /**
> +   * Creates a {@link Map} with pairs of keys and objects. The models
> +   * implementation should call this constructor that creates a model
> +   * programmatically.
> +   * <p>
> +   * The base implementation will return a {@link HashMap} that should be
> +   * populated by sub-classes.
> +   */
> +  public Map<String, Object> createArtifactMap() {
> +    return new HashMap<String, Object>();
> +  }
> +
> +}
>
> Propchange:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
>
> ------------------------------------------------------------------------------
>    svn:mime-type = text/plain
>
> Added:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java?rev=1243188&view=auto
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java
> (added)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java
> Sun Feb 12 01:17:01 2012
> @@ -0,0 +1,30 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreemnets.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +package opennlp.tools.util.model;
> +
> +/**
> + * Provides access to model persisted artifacts.
> + */
> +public interface ArtifactProvider {
> +
> +  /**
> +   * Gets an artifact by name
> +   */
> +  public <T> T getArtifact(String key);
> +
> +}
>
> Propchange:
> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/ArtifactProvider.java
>
> ------------------------------------------------------------------------------
>    svn:mime-type = text/plain
>
> Added:
> incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java?rev=1243188&view=auto
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java
> (added)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java
> Sun Feb 12 01:17:01 2012
> @@ -0,0 +1,131 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +package opennlp.tools.postag;
> +
> +import java.io.IOException;
> +import java.io.InputStream;
> +import java.io.OutputStream;
> +import java.util.Map;
> +
> +import opennlp.tools.dictionary.Dictionary;
> +import opennlp.tools.util.InvalidFormatException;
> +import opennlp.tools.util.SequenceValidator;
> +import opennlp.tools.util.model.ArtifactProvider;
> +import opennlp.tools.util.model.ArtifactSerializer;
> +import opennlp.tools.util.model.UncloseableInputStream;
> +
> +public class DummyPOSTaggerFactoy extends POSTaggerFactory {
> +
> +
> +  private static final String DUMMY_POSDICT = "DUMMY_POSDICT";
> +  private DummyPOSDictionary dict;
> +
> +  public DummyPOSTaggerFactoy(Dictionary ngramDictionary,
> DummyPOSDictionary posDictionary) {
> +    super(ngramDictionary, null);
> +    this.dict = posDictionary;
> +  }
> +
> +  public DummyPOSTaggerFactoy(ArtifactProvider artifactProvider) {
> +    super(artifactProvider);
> +  }
> +
> +  @Override
> +  public SequenceValidator<String> getSequenceValidator() {
> +    return new DummyPOSSequenceValidator();
> +  }
> +
> +  public POSDictionary getPOSDictionary() {
> +    return (POSDictionary) artifactProvider.getArtifact(DUMMY_POSDICT);
> +  }
> +
> +  @Override
> +  public POSContextGenerator getPOSContextGenerator() {
> +    return new DummyPOSContextGenerator(this.ngramDictionary);
> +  }
> +
> +  @Override
> +  @SuppressWarnings("rawtypes")
> +  public Map<String, ArtifactSerializer> createArtifactSerializersMap() {
> +    Map<String, ArtifactSerializer> serializers =
> super.createArtifactSerializersMap();
> +
> +    serializers.put(DUMMY_POSDICT, new DummyPOSDictionarySerializer());
> +    return serializers;
> +  }
> +
> +  @Override
> +  public Map<String, Object> createArtifactMap() {
> +    Map<String, Object> artifactMap = super.createArtifactMap();
> +    if(this.dict != null)
> +      artifactMap.put(DUMMY_POSDICT, this.dict);
> +    return artifactMap;
> +  }
> +
> +  static class DummyPOSContextGenerator extends
> DefaultPOSContextGenerator {
> +
> +    public DummyPOSContextGenerator(Dictionary dict) {
> +      super(dict);
> +    }
> +
> +  }
> +
> +  static class DummyPOSDictionarySerializer implements
> ArtifactSerializer<DummyPOSDictionary> {
> +
> +    public DummyPOSDictionary create(InputStream in) throws IOException,
> +        InvalidFormatException {
> +      return DummyPOSDictionary.create(new UncloseableInputStream(in));
> +    }
> +
> +    public void serialize(DummyPOSDictionary artifact, OutputStream out)
> +        throws IOException {
> +      artifact.serialize(out);
> +    }
> +  }
> +
> +  static class DummyPOSSequenceValidator implements
> SequenceValidator<String> {
> +
> +    public boolean validSequence(int i, String[] inputSequence,
> +        String[] outcomesSequence, String outcome) {
> +      return true;
> +    }
> +
> +  }
> +
> +  static class DummyPOSDictionary extends POSDictionary {
> +
> +    private POSDictionary dict;
> +
> +    public DummyPOSDictionary(POSDictionary dict) {
> +      this.dict = dict;
> +    }
> +
> +    public static DummyPOSDictionary create(
> +        UncloseableInputStream uncloseableInputStream) throws
> InvalidFormatException, IOException {
> +      return new
> DummyPOSDictionary(POSDictionary.create(uncloseableInputStream));
> +    }
> +
> +    public void serialize(OutputStream out) throws IOException {
> +      dict.serialize(out);
> +    }
> +
> +    public String[] getTags(String word) {
> +      return dict.getTags(word);
> +    }
> +
> +  }
> +
> +}
> \ No newline at end of file
>
> Propchange:
> incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactoy.java
>
> ------------------------------------------------------------------------------
>    svn:mime-type = text/plain
>
> Added:
> incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
> URL:
> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java?rev=1243188&view=auto
>
> ==============================================================================
> ---
> incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
> (added)
> +++
> incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
> Sun Feb 12 01:17:01 2012
> @@ -0,0 +1,88 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one or more
> + * contributor license agreements.  See the NOTICE file distributed with
> + * this work for additional information regarding copyright ownership.
> + * The ASF licenses this file to You under the Apache License, Version 2.0
> + * (the "License"); you may not use this file except in compliance with
> + * the License. You may obtain a copy of the License at
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +package opennlp.tools.postag;
> +
> +import static org.junit.Assert.*;
> +
> +import java.io.ByteArrayInputStream;
> +import java.io.ByteArrayOutputStream;
> +import java.io.IOException;
> +import java.io.InputStream;
> +import java.io.InputStreamReader;
> +
> +import opennlp.tools.postag.DummyPOSTaggerFactoy.DummyPOSContextGenerator;
> +import opennlp.tools.postag.DummyPOSTaggerFactoy.DummyPOSDictionary;
> +import
> opennlp.tools.postag.DummyPOSTaggerFactoy.DummyPOSSequenceValidator;
> +import opennlp.tools.util.ObjectStream;
> +import opennlp.tools.util.TrainingParameters;
> +import opennlp.tools.util.model.ModelType;
> +
> +import org.junit.Test;
> +
> +/**
> + * Tests for the {@link POSTaggerFactory} class.
> + */
> +public class POSTaggerFactoryTest {
> +
> +  private static ObjectStream<POSSample> createSampleStream()
> +      throws IOException {
> +    InputStream in = POSTaggerFactoryTest.class.getClassLoader()
> +
>  .getResourceAsStream("opennlp/tools/postag/AnnotatedSentences.txt");
> +
> +    return new WordTagSampleStream((new InputStreamReader(in)));
> +  }
> +
> +  static POSModel trainPOSModel(ModelType type, POSTaggerFactory factory)
> +      throws IOException {
> +    return POSTaggerME.train("en", createSampleStream(),
> +        TrainingParameters.defaultParams(), factory, null, null);
> +  }
> +
> +  @Test
> +  public void testPOSTaggerWithCustomFactory() throws IOException {
> +    DummyPOSDictionary posDict = new DummyPOSDictionary(
> +        POSDictionary.create(POSDictionaryTest.class
> +            .getResourceAsStream("TagDictionaryCaseSensitive.xml")));
> +
> +    POSModel posModel = trainPOSModel(ModelType.MAXENT,
> +        new DummyPOSTaggerFactoy(null, posDict));
> +
> +    POSTaggerFactory factory = posModel.getFactory();
> +    assertTrue(factory.getPOSDictionary() instanceof DummyPOSDictionary);
> +    assertTrue(factory.getPOSContextGenerator() instanceof
> DummyPOSContextGenerator);
> +    assertTrue(factory.getSequenceValidator() instanceof
> DummyPOSSequenceValidator);
> +
> +    ByteArrayOutputStream out = new ByteArrayOutputStream();
> +    posModel.serialize(out);
> +    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
> +
> +    POSModel fromSerialized = new POSModel(in);
> +
> +    factory = fromSerialized.getFactory();
> +    assertTrue(factory.getPOSDictionary() instanceof DummyPOSDictionary);
> +    assertTrue(factory.getPOSContextGenerator() instanceof
> DummyPOSContextGenerator);
> +    assertTrue(factory.getSequenceValidator() instanceof
> DummyPOSSequenceValidator);
> +  }
> +
> +  @Test
> +  public void testBuildNGramDictionary() throws IOException {
> +    ObjectStream<POSSample> samples = createSampleStream();
> +
> +    POSTaggerME.buildNGramDictionary(samples, 0);
> +  }
> +}
> \ No newline at end of file
>
> Propchange:
> incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
>
> ------------------------------------------------------------------------------
>    svn:mime-type = text/plain
>
>
>

Reply via email to