Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java Fri Apr 17 10:21:00 2015 @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -21,86 +21,86 @@ package opennlp.tools.ml.maxent.quasinew import opennlp.tools.ml.maxent.quasinewton.LineSearch.LineSearchResult; /** - * Implementation of L-BFGS which supports L1-, L2-regularization + * Implementation of L-BFGS which supports L1-, L2-regularization * and Elastic Net for solving convex optimization problems. <p> * Usage example: * <blockquote><pre> * // Quadratic function f(x) = (x-1)^2 + 10 * // f obtains its minimum value 10 at x = 1 * Function f = new Function() { - * + * * {@literal @}Override - * public int getDimension() { - * return 1; + * public int getDimension() { + * return 1; * } - * + * * {@literal @}Override - * public double valueAt(double[] x) { - * return Math.pow(x[0]-1, 2) + 10; + * public double valueAt(double[] x) { + * return Math.pow(x[0]-1, 2) + 10; * } - * + * * {@literal @}Override * public double[] gradientAt(double[] x) { * return new double[] { 2*(x[0]-1) }; * } - * + * * }; - * - * QNMinimizer minimizer = new QNMinimizer(); + * + * QNMinimizer minimizer = new QNMinimizer(); * double[] x = minimizer.minimize(f); * double min = f.valueAt(x); * </pre></blockquote> */ public class QNMinimizer { - + // Function change rate tolerance public static final double CONVERGE_TOLERANCE = 1e-4; - + // Relative gradient norm tolerance - public static final double REL_GRAD_NORM_TOL = 1e-4; + public static final double REL_GRAD_NORM_TOL = 1e-4; // Initial step size public static final double INITIAL_STEP_SIZE = 1.0; - + // Minimum step size public static final double MIN_STEP_SIZE = 1e-10; - + // Default L1-cost public static final double L1COST_DEFAULT = 0; - + // Default L2-cost public static final double L2COST_DEFAULT = 0; - + // Default number of iterations public static final int NUM_ITERATIONS_DEFAULT = 100; - + // Default number of Hessian updates to store public static final int M_DEFAULT = 15; // Default maximum number of function evaluations public static final int MAX_FCT_EVAL_DEFAULT = 30000; - + // L1-regularization cost private double l1Cost; - + // L2-regularization cost private double l2Cost; - + // Maximum number of iterations private int iterations; - + // Number of Hessian updates to store private int m; - + // Maximum number of function evaluations private int maxFctEval; - + // Verbose output private boolean verbose; - + // Objective function's dimension private int dimension; - + // Hessian updates private UpdateInfo updateInfo; @@ -111,20 +111,20 @@ public class QNMinimizer { public QNMinimizer() { this(L1COST_DEFAULT, L2COST_DEFAULT); } - + public QNMinimizer(double l1Cost, double l2Cost) { this(l1Cost, l2Cost, NUM_ITERATIONS_DEFAULT); } - + public QNMinimizer(double l1Cost, double l2Cost, int iterations) { - this(l1Cost, l2Cost, iterations, M_DEFAULT, MAX_FCT_EVAL_DEFAULT); + this(l1Cost, l2Cost, iterations, M_DEFAULT, MAX_FCT_EVAL_DEFAULT); } - - public QNMinimizer(double l1Cost, double l2Cost, + + public QNMinimizer(double l1Cost, double l2Cost, int iterations, int m, int maxFctEval) { this(l1Cost, l2Cost, iterations, m, maxFctEval, true); } - + /** * Constructor * @param l1Cost L1-regularization cost @@ -135,25 +135,25 @@ public class QNMinimizer { * @param verbose verbose output */ public QNMinimizer(double l1Cost, double l2Cost, int iterations, - int m, int maxFctEval, boolean verbose) + int m, int maxFctEval, boolean verbose) { // Check arguments - if (l1Cost < 0 || l2Cost < 0) + if (l1Cost < 0 || l2Cost < 0) throw new IllegalArgumentException( "L1-cost and L2-cost must not be less than zero"); - + if (iterations <= 0) throw new IllegalArgumentException( "Number of iterations must be larger than zero"); - + if (m <= 0) throw new IllegalArgumentException( "Number of Hessian updates must be larger than zero"); - + if (maxFctEval <= 0) throw new IllegalArgumentException( "Maximum number of function evaluations must be larger than zero"); - + this.l1Cost = l1Cost; this.l2Cost = l2Cost; this.iterations = iterations; @@ -171,25 +171,25 @@ public class QNMinimizer { /** * Find the parameters that minimize the objective function - * @param function objective function + * @param function objective function * @return minimizing parameters */ public double[] minimize(Function function) { - + Function l2RegFunction = new L2RegFunction(function, l2Cost); this.dimension = l2RegFunction.getDimension(); this.updateInfo = new UpdateInfo(this.m, this.dimension); - + // Current point is at the origin double[] currPoint = new double[dimension]; - + double currValue = l2RegFunction.valueAt(currPoint); - + // Gradient at the current point - double[] currGrad = new double[dimension]; - System.arraycopy(l2RegFunction.gradientAt(currPoint), 0, + double[] currGrad = new double[dimension]; + System.arraycopy(l2RegFunction.gradientAt(currPoint), 0, currGrad, 0, dimension); - + // Pseudo-gradient - only use when L1-regularization is enabled double[] pseudoGrad = null; if (l1Cost > 0) { @@ -197,7 +197,7 @@ public class QNMinimizer { pseudoGrad = new double[dimension]; computePseudoGrad(currPoint, currGrad, pseudoGrad); } - + LineSearchResult lsr; if (l1Cost > 0) { lsr = LineSearchResult.getInitialObjectForL1( @@ -213,15 +213,15 @@ public class QNMinimizer { display("\n\nPerforming " + iterations + " iterations with " + "L1Cost=" + l1Cost + " and L2Cost=" + l2Cost + "\n"); } - + double[] direction = new double[dimension]; long startTime = System.currentTimeMillis(); - + // Initial step size for the 1st iteration double initialStepSize = l1Cost > 0? ArrayMath.invL2norm(lsr.getPseudoGradAtNext()) : ArrayMath.invL2norm(lsr.getGradAtNext()); - + for (int iter = 1; iter <= iterations; iter++) { // Find direction if (l1Cost > 0) { @@ -230,7 +230,7 @@ public class QNMinimizer { System.arraycopy(lsr.getGradAtNext(), 0, direction, 0, direction.length); } computeDirection(direction); - + // Line search if (l1Cost > 0) { // Constrain the search direction @@ -247,10 +247,10 @@ public class QNMinimizer { else { LineSearch.doLineSearch(l2RegFunction, direction, lsr, initialStepSize); } - + // Save Hessian updates updateInfo.update(lsr); - + if (verbose) { if (iter < 10) display(" " + iter + ": "); @@ -264,17 +264,17 @@ public class QNMinimizer { + "\t" + lsr.getFuncChangeRate() + "\t" + evaluator.evaluate(lsr.getNextPoint()) + "\n"); } else { - display("\t " + lsr.getValueAtNext() + + display("\t " + lsr.getValueAtNext() + "\t" + lsr.getFuncChangeRate() + "\n"); } } if (isConverged(lsr)) break; - + initialStepSize = INITIAL_STEP_SIZE; } - - // Undo L2-shrinkage if Elastic Net is used (since + + // Undo L2-shrinkage if Elastic Net is used (since // in that case, the shrinkage is done twice) if (l1Cost > 0 && l2Cost > 0) { double[] x = lsr.getNextPoint(); @@ -282,37 +282,37 @@ public class QNMinimizer { x[i] = Math.sqrt(1 + l2Cost) * x[i]; } } - + long endTime = System.currentTimeMillis(); long duration = endTime - startTime; display("Running time: " + (duration / 1000.) + "s\n"); - + // Release memory this.updateInfo = null; System.gc(); - - // Avoid returning the reference to LineSearchResult's member so that GC can + + // Avoid returning the reference to LineSearchResult's member so that GC can // collect memory occupied by lsr after this function completes (is it necessary?) double[] parameters = new double[dimension]; System.arraycopy(lsr.getNextPoint(), 0, parameters, 0, dimension); - + return parameters; } - + /** - * Pseudo-gradient for L1-regularization (see equation 4 in the paper + * Pseudo-gradient for L1-regularization (see equation 4 in the paper * "Scalable Training of L1-Regularized Log-Linear Models", Andrew et al. 2007) - * + * * @param x current point * @param g gradient at x * @param pg pseudo-gradient at x which is to be computed */ private void computePseudoGrad(double[] x, double[] g, double[] pg) { for (int i = 0; i < dimension; i++) { - if (x[i] < 0) { + if (x[i] < 0) { pg[i] = g[i] - l1Cost; } - else if (x[i] > 0) { + else if (x[i] > 0) { pg[i] = g[i] + l1Cost; } else { @@ -330,19 +330,19 @@ public class QNMinimizer { } } } - + /** - * L-BFGS two-loop recursion (see Nocedal & Wright 2006, Numerical Optimization, p. 178) + * L-BFGS two-loop recursion (see Nocedal & Wright 2006, Numerical Optimization, p. 178) */ private void computeDirection(double[] direction) { - + // Implemented two-loop Hessian update method. int k = updateInfo.kCounter; double[] rho = updateInfo.rho; double[] alpha = updateInfo.alpha; // just to avoid recreating alpha double[][] S = updateInfo.S; double[][] Y = updateInfo.Y; - + // First loop for (int i = k - 1; i >= 0; i--) { alpha[i] = rho[i] * ArrayMath.innerProduct(S[i], direction); @@ -363,54 +363,54 @@ public class QNMinimizer { direction[i] = -direction[i]; } } - + private boolean isConverged(LineSearchResult lsr) { - + // Check function's change rate if (lsr.getFuncChangeRate() < CONVERGE_TOLERANCE) { if (verbose) - display("Function change rate is smaller than the threshold " + display("Function change rate is smaller than the threshold " + CONVERGE_TOLERANCE + ".\nTraining will stop.\n\n"); return true; } - + // Check gradient's norm using the criteria: ||g(x)|| / max(1, ||x||) < threshold double xNorm = Math.max(1, ArrayMath.l2norm(lsr.getNextPoint())); - double gradNorm = l1Cost > 0? + double gradNorm = l1Cost > 0? ArrayMath.l2norm(lsr.getPseudoGradAtNext()) : ArrayMath.l2norm(lsr.getGradAtNext()); if (gradNorm / xNorm < REL_GRAD_NORM_TOL) { if (verbose) - display("Relative L2-norm of the gradient is smaller than the threshold " + display("Relative L2-norm of the gradient is smaller than the threshold " + REL_GRAD_NORM_TOL + ".\nTraining will stop.\n\n"); return true; } - + // Check step size if (lsr.getStepSize() < MIN_STEP_SIZE) { - if (verbose) - display("Step size is smaller than the minimum step size " + if (verbose) + display("Step size is smaller than the minimum step size " + MIN_STEP_SIZE + ".\nTraining will stop.\n\n"); return true; } - + // Check number of function evaluations if (lsr.getFctEvalCount() > this.maxFctEval) { if (verbose) - display("Maximum number of function evaluations has exceeded the threshold " + display("Maximum number of function evaluations has exceeded the threshold " + this.maxFctEval + ".\nTraining will stop.\n\n"); return true; } - - return false; + + return false; } - + /** * Shorthand for System.out.print */ private void display(String s) { System.out.print(s); } - + /** * Class to store vectors for Hessian approximation update. */ @@ -432,16 +432,16 @@ public class QNMinimizer { rho = new double[this.m]; alpha = new double[this.m]; } - + public void update(LineSearchResult lsr) { double[] currPoint = lsr.getCurrPoint(); - double[] gradAtCurr = lsr.getGradAtCurr(); + double[] gradAtCurr = lsr.getGradAtCurr(); double[] nextPoint = lsr.getNextPoint(); - double[] gradAtNext = lsr.getGradAtNext(); - + double[] gradAtNext = lsr.getGradAtNext(); + // Inner product of S_k and Y_k - double SYk = 0.0; - + double SYk = 0.0; + // Add new ones. if (kCounter < m) { for (int j = 0; j < dimension; j++) { @@ -450,7 +450,7 @@ public class QNMinimizer { SYk += S[kCounter][j] * Y[kCounter][j]; } rho[kCounter] = 1.0 / SYk; - } + } else { // Discard oldest vectors and add new ones. for (int i = 0; i < m - 1; i++) { @@ -461,12 +461,12 @@ public class QNMinimizer { for (int j = 0; j < dimension; j++) { S[m - 1][j] = nextPoint[j] - currPoint[j]; Y[m - 1][j] = gradAtNext[j] - gradAtCurr[j]; - SYk += S[m - 1][j] * Y[m - 1][j]; + SYk += S[m - 1][j] * Y[m - 1][j]; } rho[m - 1] = 1.0 / SYk; } - - if (kCounter < m) + + if (kCounter < m) kCounter++; } } @@ -477,7 +477,7 @@ public class QNMinimizer { public static class L2RegFunction implements Function { private Function f; private double l2Cost; - + public L2RegFunction(Function f, double l2Cost) { this.f = f; this.l2Cost = l2Cost; @@ -509,18 +509,18 @@ public class QNMinimizer { } return gradient; } - + private void checkDimension(double[] x) { if (x.length != getDimension()) throw new IllegalArgumentException( "x's dimension is not the same as function's dimension"); } } - + /** * Evaluate quality of training parameters. For example, * it can be used to report model's training accuracy when - * we train a Maximum Entropy classifier. + * we train a Maximum Entropy classifier. */ public static interface Evaluator { /**
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java Fri Apr 17 10:21:00 2015 @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -22,7 +22,7 @@ import opennlp.tools.ml.model.AbstractMo import opennlp.tools.ml.model.Context; public class QNModel extends AbstractModel { - + public QNModel(Context[] params, String[] predLabels, String[] outcomeNames) { super(params, predLabels, outcomeNames); this.modelType = ModelType.MaxentQn; @@ -43,33 +43,33 @@ public class QNModel extends AbstractMod public double[] eval(String[] context, double[] probs) { return eval(context, null, probs); } - + public double[] eval(String[] context, float[] values) { return eval(context, values, new double[evalParams.getNumOutcomes()]); } - + /** * Model evaluation which should be used during inference. * @param context - * The predicates which have been observed at the present - * decision point. + * The predicates which have been observed at the present + * decision point. * @param values * Weights of the predicates which have been observed at - * the present decision point. + * the present decision point. * @param probs * Probability for outcomes. * @return Normalized probabilities for the outcomes given the context. */ private double[] eval(String[] context, float[] values, double[] probs) { Context[] params = evalParams.getParams(); - + for (int ci = 0; ci < context.length; ci++) { int predIdx = getPredIndex(context[ci]); if (predIdx >= 0) { double predValue = 1.0; if (values != null) predValue = values[ci]; - + double[] parameters = params[predIdx].getParameters(); int[] outcomes = params[predIdx].getOutcomes(); for (int i = 0; i < outcomes.length; i++) { @@ -78,7 +78,7 @@ public class QNModel extends AbstractMod } } } - + double logSumExp = ArrayMath.logSumOfExps(probs); for (int oi = 0; oi < outcomeNames.length; oi++) { probs[oi] = Math.exp(probs[oi] - logSumExp); @@ -87,13 +87,13 @@ public class QNModel extends AbstractMod } /** - * Model evaluation which should be used during training to report model accuracy. - * @param context - * Indices of the predicates which have been observed at the present - * decision point. + * Model evaluation which should be used during training to report model accuracy. + * @param context + * Indices of the predicates which have been observed at the present + * decision point. * @param values * Weights of the predicates which have been observed at - * the present decision point. + * the present decision point. * @param probs * Probability for outcomes * @param nOutcomes @@ -104,9 +104,9 @@ public class QNModel extends AbstractMod * Model parameters * @return Normalized probabilities for the outcomes given the context. */ - public static double[] eval(int[] context, float[] values, double[] probs, + public static double[] eval(int[] context, float[] values, double[] probs, int nOutcomes, int nPredLabels, double[] parameters) { - + for (int i = 0; i < context.length; i++) { int predIdx = context[i]; double predValue = values != null? values[i] : 1.0; @@ -114,20 +114,20 @@ public class QNModel extends AbstractMod probs[oi] += predValue * parameters[oi * nPredLabels + predIdx]; } } - + double logSumExp = ArrayMath.logSumOfExps(probs); - + for (int oi = 0; oi < nOutcomes; oi++) { probs[oi] = Math.exp(probs[oi] - logSumExp); } - + return probs; } - + public boolean equals(Object obj) { if (!(obj instanceof QNModel)) return false; - + QNModel objModel = (QNModel) obj; if (this.outcomeNames.length != objModel.outcomeNames.length) return false; @@ -135,7 +135,7 @@ public class QNModel extends AbstractMod if (!this.outcomeNames[i].equals(objModel.outcomeNames[i])) return false; } - + if (this.pmap.size() != objModel.pmap.size()) return false; String[] pmapArray = new String[pmap.size()]; @@ -144,7 +144,7 @@ public class QNModel extends AbstractMod if (i != objModel.pmap.get(pmapArray[i])) return false; } - + // compare evalParameters Context[] contextComparing = objModel.evalParams.getParams(); if (this.evalParams.getParams().length != contextComparing.length) @@ -156,14 +156,14 @@ public class QNModel extends AbstractMod if (this.evalParams.getParams()[i].getOutcomes()[j] != contextComparing[i].getOutcomes()[j]) return false; } - + if (this.evalParams.getParams()[i].getParameters().length != contextComparing[i].getParameters().length) return false; for (int j = 0; i < this.evalParams.getParams()[i].getParameters().length; i++) { if (this.evalParams.getParams()[i].getParameters()[j] != contextComparing[i].getParameters()[j]) return false; } - } + } return true; } } \ No newline at end of file Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java Fri Apr 17 10:21:00 2015 @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -34,33 +34,33 @@ import opennlp.tools.ml.model.DataIndexe public class QNTrainer extends AbstractEventTrainer { public static final String MAXENT_QN_VALUE = "MAXENT_QN"; - + public static final String THREADS_PARAM = "Threads"; public static final int THREADS_DEFAULT = 1; - + public static final String L1COST_PARAM = "L1Cost"; - public static final double L1COST_DEFAULT = 0.1; - + public static final double L1COST_DEFAULT = 0.1; + public static final String L2COST_PARAM = "L2Cost"; - public static final double L2COST_DEFAULT = 0.1; - + public static final double L2COST_DEFAULT = 0.1; + // Number of Hessian updates to store public static final String M_PARAM = "NumOfUpdates"; public static final int M_DEFAULT = 15; - + // Maximum number of function evaluations public static final String MAX_FCT_EVAL_PARAM = "MaxFctEval"; public static final int MAX_FCT_EVAL_DEFAULT = 30000; // Number of threads private int threads; - + // L1-regularization cost private double l1Cost; - + // L2-regularization cost private double l2Cost; - + // Settings for QNMinimizer private int m; private int maxFctEval; @@ -112,34 +112,34 @@ public class QNTrainer extends AbstractE return false; } this.m = m; - + // Maximum number of function evaluations int maxFctEval = getIntParam(MAX_FCT_EVAL_PARAM, MAX_FCT_EVAL_DEFAULT); if (maxFctEval < 0) { return false; } this.maxFctEval = maxFctEval; - + // Number of threads must be >= 1 int threads = getIntParam(THREADS_PARAM, THREADS_DEFAULT); if (threads < 1) { return false; } this.threads = threads; - + // Regularization costs must be >= 0 double l1Cost = getDoubleParam(L1COST_PARAM, L1COST_DEFAULT); if (l1Cost < 0) { return false; } this.l1Cost = l1Cost; - - double l2Cost = getDoubleParam(L2COST_PARAM, L2COST_DEFAULT); + + double l2Cost = getDoubleParam(L2COST_PARAM, L2COST_DEFAULT); if (l2Cost < 0) { return false; } this.l2Cost = l2Cost; - + return true; } @@ -154,7 +154,7 @@ public class QNTrainer extends AbstractE // << Members related to AbstractEventTrainer public QNModel trainModel(int iterations, DataIndexer indexer) { - + // Train model's parameters Function objectiveFunction = null; if (threads == 1) { @@ -164,7 +164,7 @@ public class QNTrainer extends AbstractE System.out.println("Computing model parameters in " + threads + " threads ..."); objectiveFunction = new ParallelNegLogLikelihood(indexer, threads); } - + QNMinimizer minimizer = new QNMinimizer( l1Cost, l2Cost, iterations, m, maxFctEval, verbose); minimizer.setEvaluator(new ModelEvaluator(indexer)); @@ -172,25 +172,25 @@ public class QNTrainer extends AbstractE double[] parameters = minimizer.minimize(objectiveFunction); // Construct model with trained parameters - String[] predLabels = indexer.getPredLabels(); + String[] predLabels = indexer.getPredLabels(); int nPredLabels = predLabels.length; String[] outcomeNames = indexer.getOutcomeLabels(); int nOutcomes = outcomeNames.length; - + Context[] params = new Context[nPredLabels]; for (int ci = 0; ci < params.length; ci++) { List<Integer> outcomePattern = new ArrayList<Integer>(nOutcomes); - List<Double> alpha = new ArrayList<Double>(nOutcomes); + List<Double> alpha = new ArrayList<Double>(nOutcomes); for (int oi = 0; oi < nOutcomes; oi++) { double val = parameters[oi * nPredLabels + ci]; outcomePattern.add(oi); alpha.add(val); } - params[ci] = new Context(ArrayMath.toIntArray(outcomePattern), + params[ci] = new Context(ArrayMath.toIntArray(outcomePattern), ArrayMath.toDoubleArray(alpha)); } - + return new QNModel(params, predLabels, outcomeNames); } @@ -206,7 +206,7 @@ public class QNTrainer extends AbstractE } /** - * Evaluate the current model on training data set + * Evaluate the current model on training data set * @return model's training accuracy */ @Override @@ -214,17 +214,17 @@ public class QNTrainer extends AbstractE int[][] contexts = indexer.getContexts(); float[][] values = indexer.getValues(); int[] nEventsSeen = indexer.getNumTimesEventsSeen(); - int[] outcomeList = indexer.getOutcomeList(); + int[] outcomeList = indexer.getOutcomeList(); int nOutcomes = indexer.getOutcomeLabels().length; int nPredLabels = indexer.getPredLabels().length; - + int nCorrect = 0; int nTotalEvents = 0; - + for (int ei = 0; ei < contexts.length; ei++) { int[] context = contexts[ei]; float[] value = values == null? null: values[ei]; - + double[] probs = new double[nOutcomes]; QNModel.eval(context, value, probs, nOutcomes, nPredLabels, parameters); int outcome = ArrayMath.maxIdx(probs); @@ -233,7 +233,7 @@ public class QNTrainer extends AbstractE } nTotalEvents += nEventsSeen[ei]; } - + return (double) nCorrect / nTotalEvents; } } Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java Fri Apr 17 10:21:00 2015 @@ -271,8 +271,8 @@ public class NameFinderME implements Tok */ private Span[] setProbs(Span[] spans) { double[] probs = probs(spans); - if (probs != null) { - + if (probs != null) { + for (int i = 0; i < probs.length; i++) { double prob = probs[i]; spans[i]= new Span(spans[i], prob); Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java Fri Apr 17 10:21:00 2015 @@ -37,5 +37,5 @@ public interface TokenNameFinder { * This method is typical called at the end of a document. */ public void clearAdaptiveData(); - + } Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java Fri Apr 17 10:21:00 2015 @@ -68,7 +68,7 @@ public class TokenNameFinderFactory exte protected Map<String, Object> getResources() { return resources; } - + protected byte[] getFeatureGenerator() { return featureGeneratorBytes; } Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java Fri Apr 17 10:21:00 2015 @@ -272,7 +272,7 @@ public class TokenNameFinderModel extend serializers.put("brownclustertoken", new BrownCluster.BrownClusterSerializer()); serializers.put("brownclustertokenclass", new BrownCluster.BrownClusterSerializer()); serializers.put("brownclusterbigram", new BrownCluster.BrownClusterSerializer()); - + return serializers; } Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java Fri Apr 17 10:21:00 2015 @@ -29,18 +29,18 @@ public class ParserChunkerFactory extend public ChunkerContextGenerator getContextGenerator() { return new ChunkContextGenerator(ChunkerME.DEFAULT_BEAM_SIZE); } - + @Override public SequenceValidator<String> getSequenceValidator() { - + MaxentModel model = (MaxentModel) artifactProvider.getArtifact("chunker.model"); - + String outcomes[] = new String[model.getNumOutcomes()]; for (int i = 0; i < outcomes.length; i++) { outcomes[i] = model.getOutcome(i); } - + return new ParserChunkerSequenceValidator(outcomes); } - + } Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java Fri Apr 17 10:21:00 2015 @@ -28,10 +28,10 @@ import opennlp.tools.util.eval.FMeasure; /** * Class for ParserEvaluator. - * This ParserEvaluator behaves like EVALB with no exceptions, e.g, - * without removing punctuation tags, or equality between ADVP and PRT + * This ParserEvaluator behaves like EVALB with no exceptions, e.g, + * without removing punctuation tags, or equality between ADVP and PRT * (as in COLLINS convention). To follow parsing evaluation conventions - * (Bikel, Collins, Charniak, etc.) as in EVALB, options are to be added + * (Bikel, Collins, Charniak, etc.) as in EVALB, options are to be added * to the {@code ParserEvaluatorTool}. * */ Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java Fri Apr 17 10:21:00 2015 @@ -286,12 +286,12 @@ public class Parser extends AbstractBott // tag TrainingParameters posTaggerParams = mlParams.getParameters("tagger"); - + if (!posTaggerParams.getSettings().containsKey(BeamSearch.BEAM_SIZE_PARAMETER)) { mlParams.put("tagger", BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(10)); } - + POSModel posModel = POSTaggerME.train(languageCode, new PosSampleStream(parseSamples), mlParams.getParameters("tagger"), new POSTaggerFactory()); Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java Fri Apr 17 10:21:00 2015 @@ -43,16 +43,16 @@ import opennlp.tools.util.model.Artifact import opennlp.tools.util.model.SerializableArtifact; /** - * Class for storing the Ancora Spanish head rules associated with parsing. In this class - * headrules for noun phrases are specified. The rest of the rules are + * Class for storing the Ancora Spanish head rules associated with parsing. In this class + * headrules for noun phrases are specified. The rest of the rules are * in opennlp-tools/lang/es/parser/es-head-rules * * NOTE: This class has been adapted from opennlp.tools.parser.lang.en.HeadRules * * The main change is the constituents search direction in the first for loop. * - * Note also the change in the return of the getHead() method: - * In the lang.en.HeadRules class: return constituents[ci].getHead(); + * Note also the change in the return of the getHead() method: + * In the lang.en.HeadRules class: return constituents[ci].getHead(); * Now: return constituents[ci]; * * Other changes include removal of deprecated methods. Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java Fri Apr 17 10:21:00 2015 @@ -96,7 +96,7 @@ public final class POSModel extends Base Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY); manifest.setProperty(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize)); - + artifactMap.put(POS_MODEL_ENTRY_NAME, posModel); checkArtifactMap(); } Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java Fri Apr 17 10:21:00 2015 @@ -56,7 +56,7 @@ import opennlp.tools.util.model.ModelTyp public class POSTaggerME implements POSTagger { public static final int DEFAULT_BEAM_SIZE = 3; - + private POSModel modelPackage; /** @@ -95,7 +95,7 @@ public class POSTaggerME implements POST * * @param model * @param beamSize - * + * * @deprecated the beam size should be specified in the params during training */ @Deprecated @@ -130,13 +130,13 @@ public class POSTaggerME implements POST POSTaggerFactory factory = model.getFactory(); int beamSize = POSTaggerME.DEFAULT_BEAM_SIZE; - + String beamSizeString = model.getManifestProperty(BeamSearch.BEAM_SIZE_PARAMETER); - + if (beamSizeString != null) { beamSize = Integer.parseInt(beamSizeString); } - + modelPackage = model; contextGen = factory.getPOSContextGenerator(beamSize); Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java Fri Apr 17 10:21:00 2015 @@ -260,8 +260,8 @@ public class SentenceDetectorME implemen */ for (int i = 0; i < spans.length; i++) { double prob = sentProbs.get(i); - spans[i]= new Span(spans[i], prob); - + spans[i]= new Span(spans[i], prob); + } return spans; Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java Fri Apr 17 10:21:00 2015 @@ -28,12 +28,12 @@ import opennlp.tools.sentdetect.SDContex import opennlp.tools.sentdetect.lang.th.SentenceContextGenerator; public class Factory { - + public static final char[] ptEosCharacters = new char[] { '.', '?', '!', ';', ':', '(', ')', '«', '»', '\'', '"' }; public static final char[] defaultEosCharacters = new char[] { '.', '!', '?' }; - + public static final char[] thEosCharacters = new char[] { ' ','\n' }; public EndOfSentenceScanner createEndOfSentenceScanner(String languageCode) { Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java Fri Apr 17 10:21:00 2015 @@ -78,7 +78,7 @@ public class DetokenizationDictionary { private final Map<String, DetokenizationDictionary.Operation> operationTable = new HashMap<String, DetokenizationDictionary.Operation>(); - + /** * Initializes the current instance. * Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java Fri Apr 17 10:21:00 2015 @@ -88,11 +88,11 @@ public class SimpleTokenizer extends Abs /** - * + * * @param args the command line arguments * * @throws IOException if reading or writing from stdin or stdout fails in anyway - * + * * @deprecated this method will be removed, use the new command line interface instead! */ @Deprecated Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java Fri Apr 17 10:21:00 2015 @@ -125,15 +125,15 @@ public class TokenizerFactory extends Ba /** * Factory method the framework uses create a new {@link TokenizerFactory}. - * + * * @param subclassName the name of the class implementing the {@link TokenizerFactory} * @param languageCode the language code the tokenizer should use * @param abbreviationDictionary an optional dictionary containing abbreviations, or null if not present * @param useAlphaNumericOptimization indicate if the alpha numeric optimization should be enabled or disabled * @param alphaNumericPattern the pattern the alpha numeric optimization should use - * + * * @return the instance of the Tokenizer Factory - * + * * @throws InvalidFormatException if once of the input parameters doesn't comply if the expected format */ public static TokenizerFactory create(String subclassName, @@ -185,7 +185,7 @@ public class TokenizerFactory extends Ba /** * Gets whether to use alphanumeric optimization. - * + * * @return true if the alpha numeric optimization is enabled, otherwise false */ public boolean isUseAlphaNumericOptmization() { @@ -211,7 +211,7 @@ public class TokenizerFactory extends Ba /** * Retrieves the language code. - * + * * @return the language code */ public String getLanguageCode() { @@ -223,7 +223,7 @@ public class TokenizerFactory extends Ba /** * Gets the context generator - * + * * @return a new instance of the context generator */ public TokenContextGenerator getContextGenerator() { Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStream.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStream.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStream.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStream.java Fri Apr 17 10:21:00 2015 @@ -50,7 +50,7 @@ public interface ObjectStream<T> extends * null will return each object from the underlying source exactly once. * * @return the next object or null to signal that the stream is exhausted - * + * * @throws IOException if there is an error during reading */ T read() throws IOException; @@ -61,7 +61,7 @@ public interface ObjectStream<T> extends * the stream if multiple passes over the objects are required. * * The implementation of this method is optional. - * + * * @throws IOException if there is an error during reseting the stream */ void reset() throws IOException, UnsupportedOperationException; Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Span.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Span.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Span.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/Span.java Fri Apr 17 10:21:00 2015 @@ -241,7 +241,7 @@ public class Span implements Comparable< * Return a copy of this span with leading and trailing white spaces removed. * * @param text - * + * * @return the trimmed span or the same object if already trimmed */ public Span trim(CharSequence text) { Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java Fri Apr 17 10:21:00 2015 @@ -23,23 +23,23 @@ import java.util.List; * Generates Brown cluster features for token bigrams. */ public class BrownBigramFeatureGenerator extends FeatureGeneratorAdapter { - + private BrownCluster brownLexicon; - + public BrownBigramFeatureGenerator(BrownCluster dict){ this.brownLexicon = dict; } - + public void createFeatures(List<String> features, String[] tokens, int index, String[] previousOutcomes) { - + List<String> wordClasses = BrownTokenClasses.getWordClasses(tokens[index], brownLexicon); if (index > 0) { List<String> prevWordClasses = BrownTokenClasses.getWordClasses(tokens[index - 1], brownLexicon); for (int i = 0; i < wordClasses.size() && i < prevWordClasses.size(); i++) features.add("p" + "browncluster" + "," + "browncluster" + "=" + prevWordClasses.get(i) + "," + wordClasses.get(i)); } - + if (index + 1 < tokens.length) { List<String> nextWordClasses = BrownTokenClasses.getWordClasses(tokens[index + 1], brownLexicon); for (int i = 0; i < wordClasses.size() && i < nextWordClasses.size(); i++) { @@ -47,6 +47,6 @@ public class BrownBigramFeatureGenerator } } } - + } Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java Fri Apr 17 10:21:00 2015 @@ -35,16 +35,16 @@ import opennlp.tools.util.model.Artifact import opennlp.tools.util.model.SerializableArtifact; /** - * + * * Class to load a Brown cluster document: word\tword_class\tprob * http://metaoptimize.com/projects/wordreprs/ - * - * The file containing the clustering lexicon has to be passed as the + * + * The file containing the clustering lexicon has to be passed as the * value of the dict attribute of each BrownCluster feature generator. - * + * */ public class BrownCluster implements SerializableArtifact { - + private static final Pattern tabPattern = Pattern.compile("\t"); public static class BrownClusterSerializer implements ArtifactSerializer<BrownCluster> { @@ -59,7 +59,7 @@ public class BrownCluster implements Ser artifact.serialize(out); } } - + private Map<String, String> tokenToClusterMap = new HashMap<String, String>(); /** Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java Fri Apr 17 10:21:00 2015 @@ -23,23 +23,23 @@ import java.util.List; * Generates Brown cluster features for current token and token class. */ public class BrownTokenClassFeatureGenerator extends FeatureGeneratorAdapter { - + private BrownCluster brownLexicon; - + public BrownTokenClassFeatureGenerator(BrownCluster dict){ this.brownLexicon = dict; } - + public void createFeatures(List<String> features, String[] tokens, int index, String[] previousOutcomes) { - + String wordShape = FeatureGeneratorUtil.tokenFeature(tokens[index]); List<String> wordClasses = BrownTokenClasses.getWordClasses(tokens[index], brownLexicon); - + for (int i = 0; i < wordClasses.size(); i++) { features.add("c," + "browncluster" + "=" + wordShape + "," + wordClasses.get(i)); } } - + } Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClasses.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClasses.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClasses.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClasses.java Fri Apr 17 10:21:00 2015 @@ -26,13 +26,13 @@ import java.util.List; * */ public class BrownTokenClasses { - + public static final int[] pathLengths = { 4, 6, 10, 20 }; - + /** * It provides a list containing the pathLengths for a token if found * in the Map:token,BrownClass. - * + * * @param token the token to be looked up in the brown clustering map * @param brownLexicon the Brown clustering map * @return the list of the paths for a token @@ -54,6 +54,6 @@ public class BrownTokenClasses { return pathLengthsList; } } - + } Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java Fri Apr 17 10:21:00 2015 @@ -23,21 +23,21 @@ import java.util.List; * Generates Brown cluster features for current token. */ public class BrownTokenFeatureGenerator extends FeatureGeneratorAdapter { - + private BrownCluster brownLexicon; - + public BrownTokenFeatureGenerator(BrownCluster dict){ this.brownLexicon = dict; } - + public void createFeatures(List<String> features, String[] tokens, int index, String[] previousOutcomes) { - + List<String> wordClasses = BrownTokenClasses.getWordClasses(tokens[index], brownLexicon); - + for (int i = 0; i < wordClasses.size(); i++) { features.add("browncluster" + "=" + wordClasses.get(i)); } } - + } Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java Fri Apr 17 10:21:00 2015 @@ -299,7 +299,7 @@ public class GeneratorFactory { factoryMap.put("wordcluster", new WordClusterFeatureGeneratorFactory()); } } - + /** * Generates Brown clustering features for current token. */ @@ -324,7 +324,7 @@ public class GeneratorFactory { factoryMap.put("brownclustertoken", new BrownClusterTokenFeatureGeneratorFactory()); } } - + /** * Generates Brown clustering features for token classes. */ @@ -349,7 +349,7 @@ public class GeneratorFactory { factoryMap.put("brownclustertokenclass", new BrownClusterTokenClassFeatureGeneratorFactory()); } } - + /** * Generates Brown clustering features for token bigrams. */ @@ -719,7 +719,7 @@ public class GeneratorFactory { org.w3c.dom.Document xmlDescriptorDOM = createDOM(xmlDescriptorIn); XPath xPath = XPathFactory.newInstance().newXPath(); - + NodeList customElements; try { @@ -746,7 +746,7 @@ public class GeneratorFactory { } return mapping; } - + /** * Provides a list with all the elements in the xml feature descriptor. * @param xmlDescriptorIn the xml feature descriptor @@ -757,7 +757,7 @@ public class GeneratorFactory { public static List<Element> getDescriptorElements( InputStream xmlDescriptorIn) throws IOException, InvalidFormatException { - + List<Element> elements = new ArrayList<Element>(); org.w3c.dom.Document xmlDescriptorDOM = createDOM(xmlDescriptorIn); XPath xPath = XPathFactory.newInstance().newXPath(); Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGenerator.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGenerator.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGenerator.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PreviousTwoMapFeatureGenerator.java Fri Apr 17 10:21:00 2015 @@ -32,7 +32,7 @@ public class PreviousTwoMapFeatureGenera * Generates previous decision features for the token based on contents of the previous map. */ public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) { - + if (index > 0) { features.add("ppd=" + previousMap.get(tokens[index]) + "," + previousMap.get(tokens[index - 1])); } Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java?rev=1674262&r1=1674261&r2=1674262&view=diff ============================================================================== --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java (original) +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TrigramNameFeatureGenerator.java Fri Apr 17 10:21:00 2015 @@ -21,7 +21,7 @@ import java.util.List; /** * Adds trigram features based on tokens and token classes. - * + * */ public class TrigramNameFeatureGenerator extends FeatureGeneratorAdapter {
