Repository: opennlp Updated Branches: refs/heads/master 09e627c07 -> 9a9366c78
OPENNLP-944: Remove deprecated Indexer code from ML, this closes apache/opennlp#96 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/9a9366c7 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/9a9366c7 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/9a9366c7 Branch: refs/heads/master Commit: 9a9366c785b9975d0ea208c357a3353652e6d63a Parents: 09e627c Author: smarthi <[email protected]> Authored: Fri Jan 27 18:07:45 2017 -0500 Committer: smarthi <[email protected]> Committed: Fri Jan 27 18:07:45 2017 -0500 ---------------------------------------------------------------------- .../languagemodel/LanguageModelTool.java | 105 +++++----- .../cmdline/parser/BuildModelUpdaterTool.java | 8 +- .../cmdline/parser/CheckModelUpdaterTool.java | 9 +- .../formats/letsmt/LetsmtSentenceStream.java | 2 +- .../tools/languagemodel/LanguageModel.java | 28 +-- .../tools/languagemodel/NGramLanguageModel.java | 210 ++++++++++--------- .../java/opennlp/tools/ml/maxent/GISModel.java | 52 +---- .../opennlp/tools/ml/maxent/GISTrainer.java | 9 +- .../opennlp/tools/ml/maxent/IntegerPool.java | 60 ------ .../tools/ml/maxent/io/BinaryQNModelWriter.java | 3 +- .../tools/ml/maxent/io/GISModelReader.java | 13 +- .../tools/ml/maxent/io/GISModelWriter.java | 34 ++- .../opennlp/tools/ml/model/AbstractModel.java | 19 +- .../opennlp/tools/ml/model/DataIndexer.java | 25 ++- .../opennlp/tools/ml/model/EvalParameters.java | 52 +---- .../tools/ml/model/OnePassDataIndexer.java | 56 ----- .../ml/model/OnePassRealValueDataIndexer.java | 21 -- .../tools/ml/model/TwoPassDataIndexer.java | 61 ------ .../ml/naivebayes/NaiveBayesEvalParameters.java | 2 +- .../tools/ml/naivebayes/NaiveBayesTrainer.java | 6 +- .../tools/ml/perceptron/PerceptronTrainer.java | 2 +- .../opennlp/tools/parser/chunking/Parser.java | 11 - .../opennlp/tools/parser/treeinsert/Parser.java | 11 +- .../AbstractEndOfSentenceScanner.java | 56 ----- 24 files changed, 231 insertions(+), 624 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/LanguageModelTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/LanguageModelTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/LanguageModelTool.java index c4d98d2..aa46355 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/LanguageModelTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/languagemodel/LanguageModelTool.java @@ -36,65 +36,68 @@ import opennlp.tools.util.StringList; */ public class LanguageModelTool extends BasicCmdLineTool { - @Override public String getShortDescription() { - return "gives the probability of a sequence of tokens in a language model"; - } + @Override + public String getShortDescription() { + return "gives the probability of a sequence of tokens in a language model"; + } - @Override public void run(String[] args) { - File lmFile = new File(args[0]); - FileInputStream stream = null; - try { - stream = new FileInputStream(lmFile); - NGramLanguageModel nGramLanguageModel = new NGramLanguageModel( - stream); + @Override + public void run(String[] args) { + File lmFile = new File(args[0]); + FileInputStream stream = null; + try { + stream = new FileInputStream(lmFile); + NGramLanguageModel nGramLanguageModel = new NGramLanguageModel( + stream); - ObjectStream<String> lineStream; - PerformanceMonitor perfMon = null; + ObjectStream<String> lineStream; + PerformanceMonitor perfMon = null; - try { - lineStream = new PlainTextByLineStream( - new SystemInputStreamFactory(), - SystemInputStreamFactory.encoding()); - perfMon = new PerformanceMonitor(System.err, "lm"); - perfMon.start(); - String line; - while ((line = lineStream.read()) != null) { - double probability; - String[] tokens = line.split(" "); - try { - probability = nGramLanguageModel - .calculateProbability(new StringList(tokens)); - } catch (Exception e) { - System.err.println("Error:" + e.getLocalizedMessage()); - System.err.println(line); - continue; - } + try { + lineStream = new PlainTextByLineStream( + new SystemInputStreamFactory(), + SystemInputStreamFactory.encoding()); + perfMon = new PerformanceMonitor(System.err, "lm"); + perfMon.start(); + String line; + while ((line = lineStream.read()) != null) { + double probability; + String[] tokens = line.split(" "); + try { + probability = nGramLanguageModel + .calculateProbability(new StringList(tokens)); + } catch (Exception e) { + System.err.println("Error:" + e.getLocalizedMessage()); + System.err.println(line); + continue; + } - System.out.println("sequence '" + Arrays.toString(tokens) - + "' has a probability of " + probability); + System.out.println("sequence '" + Arrays.toString(tokens) + + "' has a probability of " + probability); - perfMon.incrementCounter(); - } - } catch (IOException e) { - CmdLineUtil.handleStdinIoError(e); - } + perfMon.incrementCounter(); + } + } catch (IOException e) { + CmdLineUtil.handleStdinIoError(e); + } - perfMon.stopAndPrintFinalResult(); + perfMon.stopAndPrintFinalResult(); - } catch (java.io.IOException e) { - System.err.println(e.getLocalizedMessage()); - } finally { - if (stream != null) { - try { - stream.close(); - } catch (IOException e) { - // do nothing - } - } + } catch (java.io.IOException e) { + System.err.println(e.getLocalizedMessage()); + } finally { + if (stream != null) { + try { + stream.close(); + } catch (IOException e) { + // do nothing } + } } + } - @Override public String getHelp() { - return "Usage: " + CLI.CMD + " " + getName() + " model"; - } + @Override + public String getHelp() { + return "Usage: " + CLI.CMD + " " + getName() + " model"; + } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java index ce30f3b..327355b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java @@ -20,12 +20,12 @@ package opennlp.tools.cmdline.parser; import java.io.IOException; import opennlp.tools.dictionary.Dictionary; -import opennlp.tools.ml.model.AbstractModel; +import opennlp.tools.ml.maxent.GIS; import opennlp.tools.ml.model.Event; +import opennlp.tools.ml.model.MaxentModel; import opennlp.tools.parser.Parse; import opennlp.tools.parser.ParserEventTypeEnum; import opennlp.tools.parser.ParserModel; -import opennlp.tools.parser.chunking.Parser; import opennlp.tools.parser.chunking.ParserEventStream; import opennlp.tools.util.ObjectStream; @@ -49,7 +49,9 @@ public final class BuildModelUpdaterTool extends ModelUpdaterTool { System.out.println("Training builder"); ObjectStream<Event> bes = new ParserEventStream(parseSamples, originalModel.getHeadRules(), ParserEventTypeEnum.BUILD, mdict); - AbstractModel buildModel = Parser.train(bes, 100, 5); + + GIS trainer = new GIS(); + MaxentModel buildModel = trainer.train(bes); parseSamples.close(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java index 1103e66..55e96ba 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java @@ -20,12 +20,12 @@ package opennlp.tools.cmdline.parser; import java.io.IOException; import opennlp.tools.dictionary.Dictionary; -import opennlp.tools.ml.model.AbstractModel; +import opennlp.tools.ml.maxent.GIS; import opennlp.tools.ml.model.Event; +import opennlp.tools.ml.model.MaxentModel; import opennlp.tools.parser.Parse; import opennlp.tools.parser.ParserEventTypeEnum; import opennlp.tools.parser.ParserModel; -import opennlp.tools.parser.chunking.Parser; import opennlp.tools.parser.chunking.ParserEventStream; import opennlp.tools.util.ObjectStream; @@ -50,8 +50,9 @@ public final class CheckModelUpdaterTool extends ModelUpdaterTool { System.out.println("Training check model"); ObjectStream<Event> bes = new ParserEventStream(parseSamples, originalModel.getHeadRules(), ParserEventTypeEnum.CHECK, mdict); - AbstractModel checkModel = Parser.train(bes, - 100, 5); + + GIS trainer = new GIS(); + MaxentModel checkModel = trainer.train(bes); parseSamples.close(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/formats/letsmt/LetsmtSentenceStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/letsmt/LetsmtSentenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/letsmt/LetsmtSentenceStream.java index 25c705a..d640b77 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/letsmt/LetsmtSentenceStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/letsmt/LetsmtSentenceStream.java @@ -49,7 +49,7 @@ class LetsmtSentenceStream implements ObjectStream<SentenceSample> { int begin = sentencesString.length(); if (sentence.getTokens() != null) { - sentencesString.append(String.join(" ", sentence.getTokens())); + sentencesString.append(String.join(" ", sentence.getTokens())); } else if (sentence.getNonTokenizedText() != null) { sentencesString.append(sentence.getNonTokenizedText()); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java index e7cecc1..98dde4e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java @@ -25,20 +25,20 @@ import opennlp.tools.util.StringList; */ public interface LanguageModel { - /** - * Calculate the probability of a series of tokens (e.g. a sentence), given a vocabulary - * - * @param tokens the text tokens to calculate the probability for - * @return the probability of the given text tokens in the vocabulary - */ - double calculateProbability(StringList tokens); + /** + * Calculate the probability of a series of tokens (e.g. a sentence), given a vocabulary + * + * @param tokens the text tokens to calculate the probability for + * @return the probability of the given text tokens in the vocabulary + */ + double calculateProbability(StringList tokens); - /** - * Predict the most probable output sequence of tokens, given an input sequence of tokens - * - * @param tokens a sequence of tokens - * @return the most probable subsequent token sequence - */ - StringList predictNextTokens(StringList tokens); + /** + * Predict the most probable output sequence of tokens, given an input sequence of tokens + * + * @param tokens a sequence of tokens + * @return the most probable subsequent token sequence + */ + StringList predictNextTokens(StringList tokens); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java index adf8857..e11c107 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java @@ -32,115 +32,117 @@ import opennlp.tools.util.StringList; */ public class NGramLanguageModel extends NGramModel implements LanguageModel { - private static final int DEFAULT_N = 3; - private static final double DEFAULT_K = 1d; - - private final int n; - private final double k; - - public NGramLanguageModel() { - this(DEFAULT_N, DEFAULT_K); - } - - public NGramLanguageModel(int n) { - this(n, DEFAULT_K); - } - - public NGramLanguageModel(double k) { - this(DEFAULT_N, k); - } - - public NGramLanguageModel(int n, double k) { - this.n = n; - this.k = k; - } - - public NGramLanguageModel(InputStream in) throws IOException { - this(in, DEFAULT_N, DEFAULT_K); - } - - public NGramLanguageModel(InputStream in, double k) throws IOException { - this(in, DEFAULT_N, k); - } - - public NGramLanguageModel(InputStream in, int n) throws IOException { - this(in, n, DEFAULT_K); - } - - public NGramLanguageModel(InputStream in, int n, double k) - throws IOException { - super(in); - this.n = n; - this.k = k; - } - - @Override public double calculateProbability(StringList sample) { - double probability = 0d; - if (size() > 0) { - for (StringList ngram : NGramUtils.getNGrams(sample, n)) { - StringList nMinusOneToken = NGramUtils - .getNMinusOneTokenFirst(ngram); - if (size() > 1000000) { - // use stupid backoff - probability += Math.log( - getStupidBackoffProbability(ngram, nMinusOneToken)); - } else { - // use laplace smoothing - probability += Math.log( - getLaplaceSmoothingProbability(ngram, nMinusOneToken)); - } - } - if (Double.isNaN(probability)) { - probability = 0d; - } else if (probability != 0) { - probability = Math.exp(probability); - } - - } - return probability; - } - - @Override public StringList predictNextTokens(StringList tokens) { - double maxProb = Double.NEGATIVE_INFINITY; - StringList token = null; - - for (StringList ngram : this) { - String[] sequence = new String[ngram.size() + tokens.size()]; - for (int i = 0; i < tokens.size(); i++) { - sequence[i] = tokens.getToken(i); - } - for (int i = 0; i < ngram.size(); i++) { - sequence[i + tokens.size()] = ngram.getToken(i); - } - StringList sample = new StringList(sequence); - double v = calculateProbability(sample); - if (v > maxProb) { - maxProb = v; - token = ngram; - } + private static final int DEFAULT_N = 3; + private static final double DEFAULT_K = 1d; + + private final int n; + private final double k; + + public NGramLanguageModel() { + this(DEFAULT_N, DEFAULT_K); + } + + public NGramLanguageModel(int n) { + this(n, DEFAULT_K); + } + + public NGramLanguageModel(double k) { + this(DEFAULT_N, k); + } + + public NGramLanguageModel(int n, double k) { + this.n = n; + this.k = k; + } + + public NGramLanguageModel(InputStream in) throws IOException { + this(in, DEFAULT_N, DEFAULT_K); + } + + public NGramLanguageModel(InputStream in, double k) throws IOException { + this(in, DEFAULT_N, k); + } + + public NGramLanguageModel(InputStream in, int n) throws IOException { + this(in, n, DEFAULT_K); + } + + public NGramLanguageModel(InputStream in, int n, double k) + throws IOException { + super(in); + this.n = n; + this.k = k; + } + + @Override + public double calculateProbability(StringList sample) { + double probability = 0d; + if (size() > 0) { + for (StringList ngram : NGramUtils.getNGrams(sample, n)) { + StringList nMinusOneToken = NGramUtils + .getNMinusOneTokenFirst(ngram); + if (size() > 1000000) { + // use stupid backoff + probability += Math.log( + getStupidBackoffProbability(ngram, nMinusOneToken)); + } else { + // use laplace smoothing + probability += Math.log( + getLaplaceSmoothingProbability(ngram, nMinusOneToken)); } + } + if (Double.isNaN(probability)) { + probability = 0d; + } else if (probability != 0) { + probability = Math.exp(probability); + } - return token; } - - private double getLaplaceSmoothingProbability(StringList ngram, - StringList nMinusOneToken) { - return (getCount(ngram) + k) / (getCount(nMinusOneToken) + k * size()); + return probability; + } + + @Override + public StringList predictNextTokens(StringList tokens) { + double maxProb = Double.NEGATIVE_INFINITY; + StringList token = null; + + for (StringList ngram : this) { + String[] sequence = new String[ngram.size() + tokens.size()]; + for (int i = 0; i < tokens.size(); i++) { + sequence[i] = tokens.getToken(i); + } + for (int i = 0; i < ngram.size(); i++) { + sequence[i + tokens.size()] = ngram.getToken(i); + } + StringList sample = new StringList(sequence); + double v = calculateProbability(sample); + if (v > maxProb) { + maxProb = v; + token = ngram; + } } - private double getStupidBackoffProbability(StringList ngram, - StringList nMinusOneToken) { - int count = getCount(ngram); - if (nMinusOneToken == null || nMinusOneToken.size() == 0) { - return count / size(); - } else if (count > 0) { - return ((double) count) / ((double) getCount( - nMinusOneToken)); // maximum likelihood probability - } else { - StringList nextNgram = NGramUtils.getNMinusOneTokenLast(ngram); - return 0.4d * getStupidBackoffProbability(nextNgram, - NGramUtils.getNMinusOneTokenFirst(nextNgram)); - } + return token; + } + + private double getLaplaceSmoothingProbability(StringList ngram, + StringList nMinusOneToken) { + return (getCount(ngram) + k) / (getCount(nMinusOneToken) + k * size()); + } + + private double getStupidBackoffProbability(StringList ngram, + StringList nMinusOneToken) { + int count = getCount(ngram); + if (nMinusOneToken == null || nMinusOneToken.size() == 0) { + return count / size(); + } else if (count > 0) { + return ((double) count) / ((double) getCount( + nMinusOneToken)); // maximum likelihood probability + } else { + StringList nextNgram = NGramUtils.getNMinusOneTokenLast(ngram); + return 0.4d * getStupidBackoffProbability(nextNgram, + NGramUtils.getNMinusOneTokenFirst(nextNgram)); } + } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java index 187a00d..e546d1c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISModel.java @@ -39,32 +39,9 @@ public final class GISModel extends AbstractModel { * The names of the predicates used in this model. * @param outcomeNames * The names of the outcomes this model predicts. - * @param correctionConstant - * The maximum number of active features which occur in an event. - * @param correctionParam - * The parameter associated with the correction feature. - */ - @Deprecated - public GISModel(Context[] params, String[] predLabels, String[] outcomeNames, - int correctionConstant, double correctionParam) { - this(params, predLabels, outcomeNames, correctionConstant, correctionParam, - new UniformPrior()); - } - - /** - * Creates a new model with the specified parameters, outcome names, and - * predicate/feature labels. - * - * @param params - * The parameters of the model. - * @param predLabels - * The names of the predicates used in this model. - * @param outcomeNames - * The names of the outcomes this model predicts. */ public GISModel(Context[] params, String[] predLabels, String[] outcomeNames) { - this(params, predLabels, outcomeNames, 1, 0, - new UniformPrior()); + this(params, predLabels, outcomeNames, new UniformPrior()); } /** @@ -77,40 +54,17 @@ public final class GISModel extends AbstractModel { * The names of the predicates used in this model. * @param outcomeNames * The names of the outcomes this model predicts. - * @param correctionConstant - * The maximum number of active features which occur in an event. - * @param correctionParam - * The parameter associated with the correction feature. * @param prior * The prior to be used with this model. */ - @Deprecated - public GISModel(Context[] params, String[] predLabels, String[] outcomeNames, - int correctionConstant, double correctionParam, Prior prior) { - super(params, predLabels, outcomeNames, correctionConstant, correctionParam); + public GISModel(Context[] params, String[] predLabels, String[] outcomeNames, Prior prior) { + super(params, predLabels, outcomeNames); this.prior = prior; prior.setLabels(outcomeNames, predLabels); modelType = ModelType.Maxent; } /** - * Creates a new model with the specified parameters, outcome names, and - * predicate/feature labels. - * - * @param params - * The parameters of the model. - * @param predLabels - * The names of the predicates used in this model. - * @param outcomeNames - * The names of the outcomes this model predicts. - * @param prior - * The prior to be used with this model. - */ - public GISModel(Context[] params, String[] predLabels, String[] outcomeNames, Prior prior) { - this(params, predLabels, outcomeNames, 1, 0, prior); - } - - /** * Use this model to evaluate a context and return an array of the likelihood * of each outcome given that context. * http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java index b8c0bf4..34b640b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java @@ -55,7 +55,7 @@ import opennlp.tools.util.TrainingParameters; * relative entropy between the distribution specified by the empirical constraints of the training * data and the specified prior. By default, the uniform distribution is used as the prior. */ -class GISTrainer { +public class GISTrainer { private static final double LLThreshold = 0.0001; private final boolean printMessages; @@ -134,9 +134,6 @@ class GISTrainer { */ private EvalParameters evalParams; - // TODO: GISTrainer should be an AbstractEventTrainer, The reportMap should be - // held by the AET. - private Map<String, String> reportMap = new HashMap<>(); /** * Creates a new <code>GISTrainer</code> instance which does not print * progress messages about training to STDOUT. @@ -203,8 +200,8 @@ class GISTrainer { TrainingParameters indexingParameters = new TrainingParameters(); indexingParameters.put(GIS.CUTOFF_PARAM, Integer.toString(cutoff)); indexingParameters.put(GIS.ITERATIONS_PARAM, Integer.toString(iterations)); - reportMap = new HashMap<>(); - indexer.init(indexingParameters,reportMap); + Map<String, String> reportMap = new HashMap<>(); + indexer.init(indexingParameters, reportMap); indexer.index(eventStream); return trainModel(iterations, indexer); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/IntegerPool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/IntegerPool.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/IntegerPool.java deleted file mode 100644 index b236950..0000000 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/IntegerPool.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.tools.ml.maxent; - -/** - * A pool of read-only, unsigned Integer objects within a fixed, - * non-sparse range. Use this class for operations in which a large - * number of Integer wrapper objects will be created. - * - * @deprecated repalace with Integer.valueOf or auto boxing - */ -@Deprecated -public class IntegerPool { - private Integer[] _table; - - /** - * Creates an IntegerPool with 0..size Integer objects. - * - * @param size - * the size of the pool. - */ - public IntegerPool(int size) { - _table = new Integer[size]; - for (int i = 0; i < size; i++) { - _table[i] = i; - } // end of for (int i = 0; i < size; i++) - } - - /** - * Returns the shared Integer wrapper for <tt>value</tt> if it is inside the - * range managed by this pool. if <tt>value</tt> is outside the range, a new - * Integer instance is returned. - * - * @param value - * an <code>int</code> value - * @return an <code>Integer</code> value - */ - public Integer get(int value) { - if (value < _table.length && value >= 0) { - return _table[value]; - } else { - return value; - } - } -} http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/BinaryQNModelWriter.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/BinaryQNModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/BinaryQNModelWriter.java index de047c8..cf23929 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/BinaryQNModelWriter.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/BinaryQNModelWriter.java @@ -43,8 +43,7 @@ public class BinaryQNModelWriter extends QNModelWriter { super(model); if (f.getName().endsWith(".gz")) { - output = new DataOutputStream(new GZIPOutputStream( - new FileOutputStream(f))); + output = new DataOutputStream(new GZIPOutputStream(new FileOutputStream(f))); } else { output = new DataOutputStream(new FileOutputStream(f)); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/GISModelReader.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/GISModelReader.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/GISModelReader.java index d4766fa..0050548 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/GISModelReader.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/GISModelReader.java @@ -65,15 +65,11 @@ public class GISModelReader extends AbstractModelReader { * GISModelReader (usually via its the constructor). */ public AbstractModel constructModel() throws IOException { - int correctionConstant = getCorrectionConstant(); - double correctionParam = getCorrectionParameter(); String[] outcomeLabels = getOutcomes(); int[][] outcomePatterns = getOutcomePatterns(); String[] predLabels = getPredicates(); Context[] params = getParameters(outcomePatterns); - - return new GISModel(params, predLabels, outcomeLabels, correctionConstant, - correctionParam); + return new GISModel(params, predLabels, outcomeLabels); } public void checkModelType() throws java.io.IOException { @@ -83,11 +79,4 @@ public class GISModelReader extends AbstractModelReader { + " model as a GIS model." + " You should expect problems."); } - protected int getCorrectionConstant() throws java.io.IOException { - return readInt(); - } - - protected double getCorrectionParameter() throws java.io.IOException { - return readDouble(); - } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/GISModelWriter.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/GISModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/GISModelWriter.java index 5649708..34541d3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/GISModelWriter.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/GISModelWriter.java @@ -36,8 +36,6 @@ import opennlp.tools.ml.model.Context; public abstract class GISModelWriter extends AbstractModelWriter { protected Context[] PARAMS; protected String[] OUTCOME_LABELS; - protected int CORRECTION_CONSTANT; - protected double CORRECTION_PARAM; protected String[] PRED_LABELS; public GISModelWriter(AbstractModel model) { @@ -49,9 +47,6 @@ public abstract class GISModelWriter extends AbstractModelWriter { @SuppressWarnings("unchecked") Map<String, Integer> pmap = (Map<String, Integer>) data[1]; OUTCOME_LABELS = (String[]) data[2]; - CORRECTION_CONSTANT = (Integer) data[3]; - CORRECTION_PARAM = (Double) data[4]; - PRED_LABELS = new String[pmap.size()]; for (String pred : pmap.keySet()) { PRED_LABELS[pmap.get(pred)] = pred; @@ -73,17 +68,12 @@ public abstract class GISModelWriter extends AbstractModelWriter { // the type of model (GIS) writeUTF("GIS"); - // the value of the correction constant - writeInt(CORRECTION_CONSTANT); - - // the value of the correction constant - writeDouble(CORRECTION_PARAM); - // the mapping from outcomes to their integer indexes writeInt(OUTCOME_LABELS.length); - for (int i = 0; i < OUTCOME_LABELS.length; i++) - writeUTF(OUTCOME_LABELS[i]); + for (String OUTCOME_LABEL : OUTCOME_LABELS) { + writeUTF(OUTCOME_LABEL); + } // the mapping from predicates to the outcomes they contributed to. // The sorting is done so that we actually can write this out more @@ -93,21 +83,23 @@ public abstract class GISModelWriter extends AbstractModelWriter { writeInt(compressed.size()); - for (int i = 0; i < compressed.size(); i++) { - List a = compressed.get(i); - writeUTF(a.size() + a.get(0).toString()); + for (List<ComparablePredicate> aCompressed : compressed) { + writeUTF(aCompressed.size() + ((List) aCompressed).get(0).toString()); } // the mapping from predicate names to their integer indexes writeInt(PARAMS.length); - for (int i = 0; i < sorted.length; i++) - writeUTF(sorted[i].name); + for (ComparablePredicate aSorted : sorted) { + writeUTF(aSorted.name); + } // write out the parameters - for (int i = 0; i < sorted.length; i++) - for (int j = 0; j < sorted[i].params.length; j++) - writeDouble(sorted[i].params[j]); + for (ComparablePredicate aSorted : sorted) { + for (int j = 0; j < aSorted.params.length; j++) { + writeDouble(aSorted.params[j]); + } + } close(); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java index 4213510..e5a60a7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java @@ -47,15 +47,8 @@ public abstract class AbstractModel implements MaxentModel { } public AbstractModel(Context[] params, String[] predLabels, String[] outcomeNames) { - init(predLabels,outcomeNames); - this.evalParams = new EvalParameters(params,outcomeNames.length); - } - - @Deprecated - public AbstractModel(Context[] params, String[] predLabels, String[] outcomeNames, - int correctionConstant, double correctionParam) { - init(predLabels,outcomeNames); - this.evalParams = new EvalParameters(params,correctionParam,correctionConstant,outcomeNames.length); + init(predLabels, outcomeNames); + this.evalParams = new EvalParameters(params, outcomeNames.length); } private void init(String[] predLabels, String[] outcomeNames) { @@ -159,21 +152,15 @@ public abstract class AbstractModel implements MaxentModel { * <li>index 2: java.lang.String[] containing the names of the outcomes, * stored in the index of the array which represents their * unique ids in the model. - * <li>index 3: java.lang.Integer containing the value of the models - * correction constant - * <li>index 4: java.lang.Double containing the value of the models - * correction parameter * </ul> * * @return An Object[] with the values as described above. */ public final Object[] getDataStructures() { - Object[] data = new Object[5]; + Object[] data = new Object[3]; data[0] = evalParams.getParams(); data[1] = pmap; data[2] = outcomeNames; - data[3] = (int) evalParams.getCorrectionConstant(); - data[4] = evalParams.getCorrectionParam(); return data; } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/model/DataIndexer.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/DataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/DataIndexer.java index 7da04ac..6c2955c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/DataIndexer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/DataIndexer.java @@ -31,62 +31,61 @@ public interface DataIndexer { * @return a 2-D array whose first dimension is the event index and array this refers to contains * the contexts for that event. */ - public int[][] getContexts(); + int[][] getContexts(); /** * Returns an array indicating the number of times a particular event was seen. * @return an array indexed by the event index indicating the number of times a particular event was seen. */ - public int[] getNumTimesEventsSeen(); + int[] getNumTimesEventsSeen(); /** * Returns an array indicating the outcome index for each event. * @return an array indicating the outcome index for each event. */ - public int[] getOutcomeList(); + int[] getOutcomeList(); /** * Returns an array of predicate/context names. * @return an array of predicate/context names indexed by context index. These indices are the * value of the array returned by <code>getContexts</code>. */ - public String[] getPredLabels(); + String[] getPredLabels(); /** * Returns an array of the count of each predicate in the events. * @return an array of the count of each predicate in the events. */ - public int[] getPredCounts(); + int[] getPredCounts(); /** * Returns an array of outcome names. * @return an array of outcome names indexed by outcome index. */ - public String[] getOutcomeLabels(); + String[] getOutcomeLabels(); /** * Returns the values associated with each event context or null if integer values are to be used. * @return the values associated with each event context. */ - public float[][] getValues(); + float[][] getValues(); /** * Returns the number of total events indexed. * @return The number of total events indexed. */ - public int getNumEvents(); + int getNumEvents(); /** * Sets parameters used during the data indexing. - * @param trainParams + * @param trainParams {@link TrainingParameters} */ - public void init(TrainingParameters trainParams,Map<String,String> reportMap); + void init(TrainingParameters trainParams,Map<String,String> reportMap); /** * Performs the data indexing. Make sure the init(...) method is called first. * - * @param eventStream - * @throws IOException + * @param eventStream {@link ObjectStream<Event>} */ - public void index(ObjectStream<Event> eventStream) throws IOException; + void index(ObjectStream<Event> eventStream) throws IOException; } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java index 5d23389..daf9ef2 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java @@ -40,32 +40,9 @@ public class EvalParameters { * This is used to normalize the number of features which occur in an event. */ private double correctionConstant; - /** Stores inverse of the correction constant, 1/C. */ - @Deprecated - private final double constantInverse; - /** The correction parameter of the model. */ - @Deprecated - private double correctionParam; - - /** - * Creates a set of parameters which can be evaulated with the eval method. - * @param params The parameters of the model. - * @param correctionParam The correction parameter. - * @param correctionConstant The correction constant. - * @param numOutcomes The number of outcomes. - */ - @Deprecated - public EvalParameters(Context[] params, double correctionParam, - double correctionConstant, int numOutcomes) { + public EvalParameters(Context[] params, int numOutcomes) { this.params = params; - this.correctionParam = correctionParam; this.numOutcomes = numOutcomes; - this.correctionConstant = correctionConstant; - this.constantInverse = 1.0 / correctionConstant; - } - - public EvalParameters(Context[] params, int numOutcomes) { - this(params, 0, 1, numOutcomes); } public Context[] getParams() { @@ -76,30 +53,9 @@ public class EvalParameters { return numOutcomes; } - @Deprecated - public double getCorrectionConstant() { - return correctionConstant; - } - - @Deprecated - public double getConstantInverse() { - return constantInverse; - } - - @Deprecated - public double getCorrectionParam() { - return correctionParam; - } - - @Deprecated - public void setCorrectionParam(double correctionParam) { - this.correctionParam = correctionParam; - } - @Override public int hashCode() { - return Objects.hash(Arrays.hashCode(params), numOutcomes, correctionConstant, - constantInverse, correctionParam); + return Objects.hash(Arrays.hashCode(params), numOutcomes, correctionConstant); } @Override @@ -113,9 +69,7 @@ public class EvalParameters { return Arrays.equals(params, evalParameters.params) && numOutcomes == evalParameters.numOutcomes - && correctionConstant == evalParameters.correctionConstant - && constantInverse == evalParameters.constantInverse - && correctionParam == evalParameters.correctionParam; + && correctionConstant == evalParameters.correctionConstant; } return false; http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassDataIndexer.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassDataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassDataIndexer.java index d8f40da..7b53251 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassDataIndexer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassDataIndexer.java @@ -37,62 +37,6 @@ import opennlp.tools.util.ObjectStream; */ public class OnePassDataIndexer extends AbstractDataIndexer { - /** - * One argument constructor for DataIndexer which calls the two argument - * constructor assuming no cutoff. - * - * @param eventStream - * An Event[] which contains the a list of all the Events seen in the - * training data. - */ - @Deprecated - public OnePassDataIndexer(ObjectStream<Event> eventStream) throws IOException { - this(eventStream, 0); - } - - @Deprecated - public OnePassDataIndexer(ObjectStream<Event> eventStream, int cutoff) - throws IOException { - this(eventStream, cutoff, true); - } - - /** - * Two argument constructor for DataIndexer. - * - * @param eventStream - * An Event[] which contains the a list of all the Events seen in the - * training data. - * @param cutoff - * The minimum number of times a predicate must have been observed in - * order to be included in the model. - */ - @Deprecated - public OnePassDataIndexer(ObjectStream<Event> eventStream, int cutoff, boolean sort) - throws IOException { - Map<String, Integer> predicateIndex = new HashMap<>(); - List<Event> events; - List<ComparableEvent> eventsToCompare; - - System.out.println("Indexing events using cutoff of " + cutoff + "\n"); - - System.out.print("\tComputing event counts... "); - events = computeEventCounts(eventStream, predicateIndex, cutoff); - System.out.println("done. " + events.size() + " events"); - - System.out.print("\tIndexing... "); - eventsToCompare = index(events, predicateIndex); - // done with event list - events = null; - // done with predicates - predicateIndex = null; - - System.out.println("done."); - - System.out.print("Sorting and merging events... "); - sortAndMerge(eventsToCompare, sort); - System.out.println("Done indexing."); - } - public OnePassDataIndexer(){} @Override http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java index 1c5db03..a5abb34 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java @@ -17,7 +17,6 @@ package opennlp.tools.ml.model; -import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -25,7 +24,6 @@ import java.util.List; import java.util.Map; import opennlp.tools.util.InsufficientTrainingDataException; -import opennlp.tools.util.ObjectStream; /** * An indexer for maxent model data which handles cutoffs for uncommon @@ -35,24 +33,6 @@ import opennlp.tools.util.ObjectStream; public class OnePassRealValueDataIndexer extends OnePassDataIndexer { float[][] values; - - @Deprecated - public OnePassRealValueDataIndexer(ObjectStream<Event> eventStream, int cutoff, boolean sort) - throws IOException { - super(eventStream,cutoff,sort); - } - - /** - * Two argument constructor for DataIndexer. - * @param eventStream An Event[] which contains the a list of all the Events - * seen in the training data. - * @param cutoff The minimum number of times a predicate must have been - * observed in order to be included in the model. - */ - @Deprecated - public OnePassRealValueDataIndexer(ObjectStream<Event> eventStream, int cutoff) throws IOException { - super(eventStream,cutoff); - } public OnePassRealValueDataIndexer() { } @@ -76,7 +56,6 @@ public class OnePassRealValueDataIndexer extends OnePassDataIndexer { return numUniqueEvents; } - @Override protected List<ComparableEvent> index(List<Event> events, Map<String,Integer> predicateIndex) { Map<String,Integer> omap = new HashMap<>(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java index b6de912..133c350 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java @@ -45,67 +45,6 @@ import opennlp.tools.util.ObjectStream; */ public class TwoPassDataIndexer extends AbstractDataIndexer { - /** - * One argument constructor for DataIndexer which calls the two argument - * constructor assuming no cutoff. - * - * @param eventStream An Event[] which contains the a list of all the Events - * seen in the training data. - */ - @Deprecated - public TwoPassDataIndexer(ObjectStream<Event> eventStream) throws IOException { - this(eventStream, 0); - } - - @Deprecated - public TwoPassDataIndexer(ObjectStream<Event> eventStream, int cutoff) throws IOException { - this(eventStream,cutoff,true); - } - - /** - * Two argument constructor for DataIndexer. - * - * @param eventStream An Event[] which contains the a list of all the Events - * seen in the training data. - * @param cutoff The minimum number of times a predicate must have been - * observed in order to be included in the model. - */ - @Deprecated - public TwoPassDataIndexer(ObjectStream<Event> eventStream, int cutoff, boolean sort) throws IOException { - Map<String,Integer> predicateIndex = new HashMap<>(); - List<ComparableEvent> eventsToCompare; - - System.out.println("Indexing events using cutoff of " + cutoff + "\n"); - - System.out.print("\tComputing event counts... "); - - File tmp = File.createTempFile("events", null); - tmp.deleteOnExit(); - Writer osw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tmp),"UTF8")); - int numEvents = computeEventCounts(eventStream, osw, predicateIndex, cutoff); - System.out.println("done. " + numEvents + " events"); - - System.out.print("\tIndexing... "); - - try (FileEventStream fes = new FileEventStream(tmp)) { - eventsToCompare = index(numEvents, fes, predicateIndex); - } - // done with predicates - predicateIndex = null; - tmp.delete(); - System.out.println("done."); - - if (sort) { - System.out.print("Sorting and merging events... "); - } - else { - System.out.print("Collecting events... "); - } - sortAndMerge(eventsToCompare,sort); - System.out.println("Done indexing."); - - } - public TwoPassDataIndexer() {} @Override http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesEvalParameters.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesEvalParameters.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesEvalParameters.java index 8d38238..3798a8b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesEvalParameters.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesEvalParameters.java @@ -30,7 +30,7 @@ public class NaiveBayesEvalParameters extends EvalParameters { public NaiveBayesEvalParameters(Context[] params, int numOutcomes, double[] outcomeTotals, long vocabulary) { - super(params, 0, 0, numOutcomes); + super(params, numOutcomes); this.outcomeTotals = outcomeTotals; this.vocabulary = vocabulary; } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java index 116a0cd..629c222 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesTrainer.java @@ -158,7 +158,7 @@ public class NaiveBayesTrainer extends AbstractEventTrainer { EvalParameters evalParams = new EvalParameters(params, numOutcomes); - double stepsize = 1; + double stepSize = 1; for (int ei = 0; ei < numUniqueEvents; ei++) { int targetOutcome = outcomeList[ei]; @@ -166,9 +166,9 @@ public class NaiveBayesTrainer extends AbstractEventTrainer { for (int ci = 0; ci < contexts[ei].length; ci++) { int pi = contexts[ei][ci]; if (values == null) { - params[pi].updateParameter(targetOutcome, stepsize); + params[pi].updateParameter(targetOutcome, stepSize); } else { - params[pi].updateParameter(targetOutcome, stepsize * values[ei][ci]); + params[pi].updateParameter(targetOutcome, stepSize * values[ei][ci]); } } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java index e61553f..dec6274 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java @@ -233,7 +233,7 @@ public class PerceptronTrainer extends AbstractEventTrainer { params[pi].setParameter(aoi, 0.0); } - EvalParameters evalParams = new EvalParameters(params,numOutcomes); + EvalParameters evalParams = new EvalParameters(params, numOutcomes); /* Stores the sum of parameter values of each predicate over many iterations. */ MutableContext[] summedParams = new MutableContext[numPreds]; http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java index 3dec212..394b955 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java @@ -31,10 +31,8 @@ import opennlp.tools.dictionary.Dictionary; import opennlp.tools.ml.BeamSearch; import opennlp.tools.ml.EventTrainer; import opennlp.tools.ml.TrainerFactory; -import opennlp.tools.ml.model.AbstractModel; import opennlp.tools.ml.model.Event; import opennlp.tools.ml.model.MaxentModel; -import opennlp.tools.ml.model.TwoPassDataIndexer; import opennlp.tools.parser.AbstractBottomUpParser; import opennlp.tools.parser.ChunkSampleStream; import opennlp.tools.parser.HeadRules; @@ -261,15 +259,6 @@ public class Parser extends AbstractBottomUpParser { return newParses; } - /** - * @deprecated Please do not use anymore, use the ObjectStream train methods instead! This method - * will be removed soon. - */ - @Deprecated - public static AbstractModel train(ObjectStream<Event> es, int iterations, int cut) throws IOException { - return opennlp.tools.ml.maxent.GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut)); - } - public static void mergeReportIntoManifest(Map<String, String> manifest, Map<String, String> report, String namespace) { http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java index 5a99df4..d3904a9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java @@ -31,10 +31,8 @@ import opennlp.tools.chunker.ChunkerModel; import opennlp.tools.dictionary.Dictionary; import opennlp.tools.ml.EventTrainer; import opennlp.tools.ml.TrainerFactory; -import opennlp.tools.ml.model.AbstractModel; import opennlp.tools.ml.model.Event; import opennlp.tools.ml.model.MaxentModel; -import opennlp.tools.ml.model.TwoPassDataIndexer; import opennlp.tools.parser.AbstractBottomUpParser; import opennlp.tools.parser.ChunkSampleStream; import opennlp.tools.parser.HeadRules; @@ -206,8 +204,8 @@ public class Parser extends AbstractBottomUpParser { @Override protected Parse[] advanceChunks(Parse p, double minChunkScore) { Parse[] parses = super.advanceChunks(p, minChunkScore); - for (int pi = 0; pi < parses.length; pi++) { - Parse[] chunks = parses[pi].getChildren(); + for (Parse parse : parses) { + Parse[] chunks = parse.getChildren(); for (int ci = 0; ci < chunks.length; ci++) { setComplete(chunks[ci]); } @@ -530,9 +528,4 @@ public class Parser extends AbstractBottomUpParser { return train(languageCode, parseSamples, rules, params); } - @Deprecated - public static AbstractModel train(ObjectStream<Event> es, int iterations, int cut) - throws IOException { - return opennlp.tools.ml.maxent.GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut)); - } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/9a9366c7/opennlp-tools/src/main/java/opennlp/tools/sentdetect/AbstractEndOfSentenceScanner.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/AbstractEndOfSentenceScanner.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/AbstractEndOfSentenceScanner.java deleted file mode 100644 index a2ac8b7..0000000 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/AbstractEndOfSentenceScanner.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package opennlp.tools.sentdetect; - -import java.util.ArrayList; -import java.util.List; - -import opennlp.tools.ml.maxent.IntegerPool; - -/** - * Abstract class for common methods related to identifying potential ends of sentences. - * @deprecated use DefaultEndOfSentenceScanner instead - */ -@Deprecated -public abstract class AbstractEndOfSentenceScanner implements EndOfSentenceScanner { - - protected static final IntegerPool INT_POOL = new IntegerPool(500); - - public List<Integer> getPositions(String s) { - return getPositions(s.toCharArray()); - } - - public List<Integer> getPositions(StringBuffer buf) { - return getPositions(buf.toString().toCharArray()); - } - - public List<Integer> getPositions(char[] cbuf) { - List<Integer> l = new ArrayList<Integer>(); - char[] eosCharacters = getEndOfSentenceCharacters(); - for (int i = 0; i < cbuf.length; i++) { - for (char eosCharacter : eosCharacters) { - if (cbuf[i] == eosCharacter) { - l.add(INT_POOL.get(i)); - break; - } - } - } - return l; - } -}
