Repository: opennlp Updated Branches: refs/heads/trunk c657cdeda -> 639b9f0ae
Remove deprecated API from the POS Tagger See issue OPENNLP-883 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/639b9f0a Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/639b9f0a Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/639b9f0a Branch: refs/heads/trunk Commit: 639b9f0ae2200d7a7365eeb43227ca067330bc83 Parents: c657cde Author: Jörn Kottmann <[email protected]> Authored: Tue Nov 22 01:39:50 2016 +0100 Committer: Jörn Kottmann <[email protected]> Committed: Tue Nov 22 01:39:50 2016 +0100 ---------------------------------------------------------------------- .../opennlp/tools/postag/POSDictionary.java | 76 -------------- .../java/opennlp/tools/postag/POSModel.java | 55 ---------- .../java/opennlp/tools/postag/POSTagger.java | 29 ----- .../tools/postag/POSTaggerCrossValidator.java | 35 ------- .../java/opennlp/tools/postag/POSTaggerME.java | 105 +------------------ .../opennlp/tools/postag/POSTaggerMETest.java | 9 +- .../java/opennlp/uima/postag/POSTagger.java | 8 +- .../opennlp/uima/postag/POSTaggerTrainer.java | 22 ++-- 8 files changed, 23 insertions(+), 316 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java index 7904d83..ef237a8 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java @@ -65,82 +65,6 @@ public class POSDictionary implements Iterable<String>, MutableTagDictionary { } /** - * Creates a tag dictionary with contents of specified file. - * - * @param file The file name for the tag dictionary. - * - * @throws IOException when the specified file can not be read. - * - * @deprecated Use {@link POSDictionary#create(InputStream)} instead, old format might removed. - */ - @Deprecated - public POSDictionary(String file) throws IOException { - this(file, null, true); - } - - /** - * Creates a tag dictionary with contents of specified file and using specified - * case to determine how to access entries in the tag dictionary. - * - * @param file The file name for the tag dictionary. - * @param caseSensitive Specifies whether the tag dictionary is case sensitive or not. - * - * @throws IOException when the specified file can not be read. - * - * @deprecated Use {@link POSDictionary#create(InputStream)} instead, old format might removed. - */ - @Deprecated - public POSDictionary(String file, boolean caseSensitive) throws IOException { - this(file, null, caseSensitive); - } - - - /** - * Creates a tag dictionary with contents of specified file and using specified case to determine how to access entries in the tag dictionary. - * - * @param file The file name for the tag dictionary. - * @param encoding The encoding of the tag dictionary file. - * @param caseSensitive Specifies whether the tag dictionary is case sensitive or not. - * - * @throws IOException when the specified file can not be read. - * - * @deprecated Use {@link POSDictionary#create(InputStream)} instead, old format might removed. - */ - @Deprecated - public POSDictionary(String file, String encoding, boolean caseSensitive) throws IOException { - this(new BufferedReader(encoding == null ? new FileReader(file) : new InputStreamReader(new FileInputStream(file),encoding)), caseSensitive); - } - - /** - * Create tag dictionary object with contents of specified file and using specified case to determine how to access entries in the tag dictionary. - * - * @param reader A reader for the tag dictionary. - * @param caseSensitive Specifies whether the tag dictionary is case sensitive or not. - * - * @throws IOException when the specified file can not be read. - * - * @deprecated Use {@link POSDictionary#create(InputStream)} instead, old format might removed. - */ - @Deprecated - public POSDictionary(BufferedReader reader, boolean caseSensitive) throws IOException { - dictionary = new HashMap<String, String[]>(); - this.caseSensitive = caseSensitive; - for (String line = reader.readLine(); line != null; line = reader.readLine()) { - String[] parts = line.split(" "); - String[] tags = new String[parts.length - 1]; - for (int ti = 0, tl = parts.length - 1; ti < tl; ti++) { - tags[ti] = parts[ti + 1]; - } - if (caseSensitive) { - dictionary.put(parts[0], tags); - } - else { - dictionary.put(StringUtil.toLowerCase(parts[0]), tags); - } - } - } - - /** * Returns a list of valid tags for the specified word. * * @param word The word. http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java index 446d1e6..34b9f79 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java @@ -45,29 +45,6 @@ public final class POSModel extends BaseModel { public static final String POS_MODEL_ENTRY_NAME = "pos.model"; - /** - * @deprecated Use - * {@link #POSModel(String, MaxentModel, Map, POSTaggerFactory)} - * instead. - */ - public POSModel(String languageCode, MaxentModel posModel, - POSDictionary tagDictionary, Dictionary ngramDict, Map<String, String> manifestInfoEntries) { - - this(languageCode, posModel, manifestInfoEntries, new POSTaggerFactory( - ngramDict, tagDictionary)); - } - - /** - * @deprecated Use - * {@link #POSModel(String, MaxentModel, Map, POSTaggerFactory)} - * instead. - */ - public POSModel(String languageCode, MaxentModel posModel, - POSDictionary tagDictionary, Dictionary ngramDict) { - this(languageCode, posModel, POSTaggerME.DEFAULT_BEAM_SIZE, null, new POSTaggerFactory(ngramDict, - tagDictionary)); - } - public POSModel(String languageCode, SequenceClassificationModel<String> posModel, Map<String, String> manifestInfoEntries, POSTaggerFactory posFactory) { @@ -139,7 +116,6 @@ public final class POSModel extends BaseModel { * @deprecated use getPosSequenceModel instead. This method will be removed soon. */ @Deprecated - public MaxentModel getPosModel() { if (artifactMap.get(POS_MODEL_ENTRY_NAME) instanceof MaxentModel) { return (MaxentModel) artifactMap.get(POS_MODEL_ENTRY_NAME); @@ -171,37 +147,6 @@ public final class POSModel extends BaseModel { } } - /** - * Retrieves the tag dictionary. - * - * @return tag dictionary or null if not used - * - * @deprecated Use {@link POSModel#getFactory()} to get a - * {@link POSTaggerFactory} and - * {@link POSTaggerFactory#getTagDictionary()} to get a - * {@link TagDictionary}. - * - * @throws IllegalStateException - * if the TagDictionary is not an instance of POSDictionary - */ - public POSDictionary getTagDictionary() { - if (getFactory() != null) { - TagDictionary dict = getFactory().getTagDictionary(); - if (dict != null) { - if (dict instanceof POSDictionary) { - return (POSDictionary) dict; - } - String clazz = dict.getClass().getCanonicalName(); - throw new IllegalStateException("Can not get a dictionary of type " - + clazz - + " using the deprecated method POSModel.getTagDictionary() " - + "because it can only return dictionaries of type POSDictionary. " - + "Use POSModel.getFactory().getTagDictionary() instead."); - } - } - return null; - } - public POSTaggerFactory getFactory() { return (POSTaggerFactory) this.toolFactory; } http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTagger.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTagger.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTagger.java index 3cfc522..f081916 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTagger.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTagger.java @@ -28,18 +28,6 @@ public interface POSTagger { /** * Assigns the sentence of tokens pos tags. - * - * @param sentence - * The sentence of tokens to be tagged. - * @return a list of pos tags for each token provided in sentence. - * - * @deprecated call <code> tag(String[]) </code> instead - */ - @Deprecated - public List<String> tag(List<String> sentence); - - /** - * Assigns the sentence of tokens pos tags. * @param sentence The sentece of tokens to be tagged. * @return an array of pos tags for each token provided in sentence. */ @@ -47,23 +35,6 @@ public interface POSTagger { public String[] tag(String[] sentence, Object[] additionaContext); - /** - * Assigns the sentence of space-delimied tokens pos tags. - * @param sentence The sentece of space-delimited tokens to be tagged. - * @return a string of space-delimited pos tags for each token provided in sentence. - * - * @deprecated call <code> tag(String[]) instead </code> use WhiteSpaceTokenizer.INSTANCE.tokenize - * to obtain the String array. - */ - @Deprecated - public String tag(String sentence); - - /** - * @deprecated call <code> topKSequences(String[]) </code> instead - */ - @Deprecated - public Sequence[] topKSequences(List<String> sentence); - public Sequence[] topKSequences(String[] sentence); public Sequence[] topKSequences(String[] sentence, Object[] additionaContext); http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java index c767268..27854dc 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java @@ -79,41 +79,6 @@ public class POSTaggerCrossValidator { } /** - * @deprecated use - * {@link #POSTaggerCrossValidator(String, TrainingParameters, POSTaggerFactory, POSTaggerEvaluationMonitor...)} - * instead and pass in a {@link POSTaggerFactory}. - */ - public POSTaggerCrossValidator(String languageCode, - TrainingParameters trainParam, POSDictionary tagDictionary, - POSTaggerEvaluationMonitor... listeners) { - this(languageCode, trainParam, create(null, tagDictionary), listeners); - } - - /** - * @deprecated use - * {@link #POSTaggerCrossValidator(String, TrainingParameters, POSTaggerFactory, POSTaggerEvaluationMonitor...)} - * instead and pass in the name of {@link POSTaggerFactory} - * sub-class. - */ - public POSTaggerCrossValidator(String languageCode, - TrainingParameters trainParam, POSDictionary tagDictionary, - Integer ngramCutoff, POSTaggerEvaluationMonitor... listeners) { - this(languageCode, trainParam, create(null, tagDictionary), listeners); - this.ngramCutoff = ngramCutoff; - } - - /** - * @deprecated use - * {@link #POSTaggerCrossValidator(String, TrainingParameters, POSTaggerFactory, POSTaggerEvaluationMonitor...)} - * instead and pass in a {@link POSTaggerFactory}. - */ - public POSTaggerCrossValidator(String languageCode, - TrainingParameters trainParam, POSDictionary tagDictionary, - Dictionary ngramDictionary, POSTaggerEvaluationMonitor... listeners) { - this(languageCode, trainParam, create(ngramDictionary, tagDictionary), listeners); - } - - /** * Starts the evaluation. * * @param samples http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java index e2e5188..e4c1c1b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java @@ -90,39 +90,7 @@ public class POSTaggerME implements POSTagger { private SequenceValidator<String> sequenceValidator; /** - * Initializes the current instance with the provided - * model and provided beam size. - * - * @param model - * @param beamSize - * - * @deprecated the beam size should be specified in the params during training - */ - @Deprecated - public POSTaggerME(POSModel model, int beamSize, int cacheSize) { - POSTaggerFactory factory = model.getFactory(); - - modelPackage = model; - - // TODO: Why is this the beam size?! not cache size? - contextGen = factory.getPOSContextGenerator(beamSize); - tagDictionary = factory.getTagDictionary(); - size = beamSize; - - sequenceValidator = factory.getSequenceValidator(); - - if (model.getPosSequenceModel() != null) { - this.model = model.getPosSequenceModel(); - } - else { - this.model = new opennlp.tools.ml.BeamSearch<String>(beamSize, - model.getPosModel(), cacheSize); - } - } - - /** - * Initializes the current instance with the provided model - * and the default beam size of 3. + * Initializes the current instance with the provided model. * * @param model */ @@ -156,21 +124,6 @@ public class POSTaggerME implements POSTagger { } /** - * Returns the number of different tags predicted by this model. - * - * @return the number of different tags predicted by this model. - * @deprecated use getAllPosTags instead! - */ - @Deprecated - public int getNumTags() { - - // TODO: Lets discuss on the dev list how to do this properly! - // Nobody needs the number of tags, if the tags are not available. - - return model.getOutcomes().length; - } - - /** * Retrieves an array of all possible part-of-speech tags from the * tagger. * @@ -180,12 +133,6 @@ public class POSTaggerME implements POSTagger { return model.getOutcomes(); } - @Deprecated - public List<String> tag(List<String> sentence) { - bestSequence = model.bestSequence(sentence.toArray(new String[sentence.size()]), null, contextGen, sequenceValidator); - return bestSequence.getOutcomes(); - } - public String[] tag(String[] sentence) { return this.tag(sentence, null); } @@ -215,12 +162,6 @@ public class POSTaggerME implements POSTagger { return tags; } - @Deprecated - public Sequence[] topKSequences(List<String> sentence) { - return model.bestSequences(size, sentence.toArray(new String[sentence.size()]), null, - contextGen, sequenceValidator); - } - public Sequence[] topKSequences(String[] sentence) { return this.topKSequences(sentence, null); } @@ -247,19 +188,6 @@ public class POSTaggerME implements POSTagger { return bestSequence.getProbs(); } - @Deprecated - public String tag(String sentence) { - List<String> toks = new ArrayList<String>(); - StringTokenizer st = new StringTokenizer(sentence); - while (st.hasMoreTokens()) - toks.add(st.nextToken()); - List<String> tags = tag(toks); - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < tags.size(); i++) - sb.append(toks.get(i) + "/" + tags.get(i) + " "); - return sb.toString().trim(); - } - public String[] getOrderedTags(List<String> words, List<String> tags, int index) { return getOrderedTags(words,tags,index,null); } @@ -349,37 +277,6 @@ public class POSTaggerME implements POSTagger { } } - /** - * @deprecated use - * {@link #train(String, ObjectStream, TrainingParameters, POSTaggerFactory)} - * instead and pass in a {@link POSTaggerFactory}. - */ - public static POSModel train(String languageCode, ObjectStream<POSSample> samples, TrainingParameters trainParams, - POSDictionary tagDictionary, Dictionary ngramDictionary) throws IOException { - - return train(languageCode, samples, trainParams, new POSTaggerFactory( - ngramDictionary, tagDictionary)); - } - - /** - * @deprecated use - * {@link #train(String, ObjectStream, TrainingParameters, POSTaggerFactory)} - * instead and pass in a {@link POSTaggerFactory} and a - * {@link TrainingParameters}. - */ - @Deprecated - public static POSModel train(String languageCode, ObjectStream<POSSample> samples, ModelType modelType, POSDictionary tagDictionary, - Dictionary ngramDictionary, int cutoff, int iterations) throws IOException { - - TrainingParameters params = new TrainingParameters(); - - params.put(TrainingParameters.ALGORITHM_PARAM, modelType.toString()); - params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(iterations)); - params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(cutoff)); - - return train(languageCode, samples, params, tagDictionary, ngramDictionary); - } - public static Dictionary buildNGramDictionary(ObjectStream<POSSample> samples, int cutoff) throws IOException { http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java index 1d99687..996b233 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertEquals; import java.io.IOException; +import opennlp.tools.util.TrainingParameters; import org.junit.Test; import opennlp.tools.formats.ResourceAsStreamFactory; @@ -50,8 +51,12 @@ public class POSTaggerMETest { * @throws IOException */ static POSModel trainPOSModel(ModelType type) throws IOException { - // TODO: also use tag dictionary for training - return POSTaggerME.train("en", createSampleStream(), type, null, null, 5, 100); + TrainingParameters params = new TrainingParameters(); + params.put(TrainingParameters.ALGORITHM_PARAM, type.toString()); + params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100)); + params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(5)); + + return POSTaggerME.train("en", createSampleStream(), params, new POSTaggerFactory()); } @Test http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java ---------------------------------------------------------------------- diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java index 5e77e9d..2fdc47c 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java +++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java @@ -17,6 +17,7 @@ package opennlp.uima.postag; +import java.util.Arrays; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -127,7 +128,7 @@ public final class POSTagger extends CasAnnotator_ImplBase { if (beamSize == null) beamSize = POSTaggerME.DEFAULT_BEAM_SIZE; - this.posTagger = new POSTaggerME(model, beamSize, 0); + this.posTagger = new POSTaggerME(model); } /** @@ -174,7 +175,8 @@ public final class POSTagger extends CasAnnotator_ImplBase { sentenceTokenList.add(tokenAnnotation.getCoveredText()); } - final List<String> posTags = this.posTagger.tag(sentenceTokenList); + final List<String> posTags = Arrays.asList(this.posTagger.tag( + sentenceTokenList.toArray(new String[sentenceTokenList.size()]))); double posProbabilities[] = null; @@ -231,4 +233,4 @@ public final class POSTagger extends CasAnnotator_ImplBase { public void destroy() { this.posTagger = null; } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9f0a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java ---------------------------------------------------------------------- diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java index 9f377be..e9bb048 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java @@ -27,11 +27,9 @@ import java.util.Iterator; import java.util.List; import opennlp.tools.ml.maxent.GIS; -import opennlp.tools.postag.POSDictionary; -import opennlp.tools.postag.POSModel; -import opennlp.tools.postag.POSSample; -import opennlp.tools.postag.POSTaggerME; +import opennlp.tools.postag.*; import opennlp.tools.util.ObjectStreamUtils; +import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.model.ModelType; import opennlp.uima.util.AnnotatorUtil; import opennlp.uima.util.CasConsumerUtil; @@ -116,12 +114,8 @@ public class POSTaggerTrainer extends CasConsumer_ImplBase { TAG_DICTIONARY_NAME); if (tagDictionaryName != null) { - try { - InputStream dictIn = AnnotatorUtil.getResourceAsStream(mContext, tagDictionaryName); - - // TODO: ask Tom if case sensitivity must be configureable - tagDictionary = new POSDictionary(new BufferedReader(new InputStreamReader(dictIn)), false); - + try (InputStream dictIn = AnnotatorUtil.getResourceAsStream(mContext, tagDictionaryName)) { + tagDictionary = POSDictionary.create(dictIn); } catch (final IOException e) { // if this fails just print error message and continue final String message = "IOException during tag dictionary reading, " @@ -207,9 +201,13 @@ public class POSTaggerTrainer extends CasConsumer_ImplBase { GIS.PRINT_MESSAGES = false; + TrainingParameters params = new TrainingParameters(); + params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100)); + params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(5)); + POSModel posTaggerModel = POSTaggerME.train(language, ObjectStreamUtils.createObjectStream(mPOSSamples), - ModelType.MAXENT, tagDictionary, null, 100, 5); + params, new POSTaggerFactory(null, tagDictionary)); // dereference to allow garbage collection mPOSSamples = null; @@ -234,4 +232,4 @@ public class POSTaggerTrainer extends CasConsumer_ImplBase { // dereference to allow garbage collection mPOSSamples = null; } -} \ No newline at end of file +}
