Author: vkhuc
Date: Mon Jul 21 04:07:43 2014
New Revision: 1612182
URL: http://svn.apache.org/r1612182
Log:
OPENNLP-703 Improved NegLogLikelihood so that it runs faster and uses less
memory
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java?rev=1612182&r1=1612181&r2=1612182&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java
Mon Jul 21 04:07:43 2014
@@ -1,183 +1,160 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package opennlp.tools.ml.maxent.quasinewton;
-
-import java.util.Arrays;
-
-import opennlp.tools.ml.model.DataIndexer;
-import opennlp.tools.ml.model.OnePassRealValueDataIndexer;
-
-/**
- * Evaluate negative log-likelihood and its gradient from DataIndexer.
- */
-public class NegLogLikelihood implements Function {
-
- private int dimension;
- private double[] empiricalCount;
- private int numOutcomes;
- private int numFeatures;
- private int numContexts;
-
- // Information from data index
- private final float[][] values;
- private final int[][] contexts;
- private final int[] outcomeList;
- private final int[] numTimesEventsSeen;
-
- // For computing negative log-likelihood
- private double[][] voteSum;
- private double[] logSumExp;
-
- // For gradient computation
- private double[] gradient;
- private double[] expectedCount;
-
- public NegLogLikelihood(DataIndexer indexer) {
-
- // Get data from indexer.
- if (indexer instanceof OnePassRealValueDataIndexer) {
- this.values = indexer.getValues();
- } else {
- this.values = null;
- }
-
- this.contexts = indexer.getContexts();
- this.outcomeList = indexer.getOutcomeList();
- this.numTimesEventsSeen = indexer.getNumTimesEventsSeen();
-
- this.numOutcomes = indexer.getOutcomeLabels().length;
- this.numFeatures = indexer.getPredLabels().length;
- this.numContexts = this.contexts.length;
- this.dimension = numOutcomes * numFeatures;
- this.empiricalCount = new double[dimension];
-
- this.voteSum = new double[numContexts][numOutcomes];
- this.logSumExp = new double[numContexts];
-
- this.gradient = new double[dimension];
- this.expectedCount = new double[dimension];
-
- computeEmpCount();
- }
-
- public int getDimension() {
- return this.dimension;
- }
-
- public double[] getInitialPoint() {
- return new double[dimension];
- }
-
- /**
- * Negative log-likelihood
- */
- public double valueAt(double[] x) {
-
- if (x.length != this.dimension)
- throw new IllegalArgumentException(
- "x is invalid, its dimension is not equal to domain dimension.");
-
- computeSums(x); // Compute voteSum and logSumExp
-
- double negLogLikelihood = 0.;
- for (int ci = 0; ci < numContexts; ci++) {
- int outcome = this.outcomeList[ci];
- negLogLikelihood += (voteSum[ci][outcome] - logSumExp[ci]) *
numTimesEventsSeen[ci];
- }
- negLogLikelihood = -negLogLikelihood;
-
- return negLogLikelihood;
- }
-
- /**
- * Compute gradient
- */
- public double[] gradientAt(double[] x) {
-
- if (x.length != this.dimension)
- throw new IllegalArgumentException(
- "x is invalid, its dimension is not equal to the function.");
-
- computeSums(x); // Compute voteSum and logSumExp
-
- // Reset
- Arrays.fill(expectedCount, 0);
- for (int ci = 0; ci < numContexts; ci++) {
- for (int oi = 0; oi < numOutcomes; oi++) {
- for (int af = 0; af < contexts[ci].length; af++) {
- int vectorIndex = indexOf(oi, this.contexts[ci][af]);
- double predValue = 1.;
- if (values != null) predValue = this.values[ci][af];
- if (predValue == 0.) continue;
-
- expectedCount[vectorIndex] +=
- predValue * Math.exp(voteSum[ci][oi] - logSumExp[ci]) *
this.numTimesEventsSeen[ci];
- }
- }
- }
-
- for (int i = 0; i < dimension; i++) {
- gradient[i] = expectedCount[i] - this.empiricalCount[i];
- }
-
- return gradient;
- }
-
- private int indexOf(int outcomeId, int featureId) {
- return outcomeId * numFeatures + featureId;
- }
-
- /**
- * Compute temporary values
- */
- private void computeSums(double[] x) {
- for (int ci = 0; ci < numContexts; ci++) {
- for (int oi = 0; oi < numOutcomes; oi++) {
- double vecProduct = 0.;
- for (int af = 0; af < this.contexts[ci].length; af++) {
- int vectorIndex = indexOf(oi, contexts[ci][af]);
- double predValue = 1.;
- if (values != null) predValue = this.values[ci][af];
- if (predValue == 0.) continue;
- vecProduct += predValue * x[vectorIndex];
- }
- voteSum[ci][oi] = vecProduct;
- }
-
- // \log(\sum_{c'=1}^{C} e^{w_c'^T x_i})
- logSumExp[ci] = ArrayMath.logSumOfExps(voteSum[ci]);
- }
- }
-
- /**
- * Compute empirical count
- */
- private void computeEmpCount() {
- for (int ci = 0; ci < numContexts; ci++) {
- for (int af = 0; af < this.contexts[ci].length; af++) {
- int vectorIndex = indexOf(this.outcomeList[ci], contexts[ci][af]);
- if (values != null) {
- empiricalCount[vectorIndex] += this.values[ci][af] *
numTimesEventsSeen[ci];
- } else {
- empiricalCount[vectorIndex] += 1. * numTimesEventsSeen[ci];
- }
- }
- }
- }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package opennlp.tools.ml.maxent.quasinewton;
+
+import java.util.Arrays;
+
+import opennlp.tools.ml.model.DataIndexer;
+import opennlp.tools.ml.model.OnePassRealValueDataIndexer;
+
+/**
+ * Evaluate negative log-likelihood and its gradient from DataIndexer.
+ */
+public class NegLogLikelihood implements Function {
+
+ protected int dimension;
+ protected int numOutcomes;
+ protected int numFeatures;
+ protected int numContexts;
+
+ // Information from data index
+ protected final float[][] values;
+ protected final int[][] contexts;
+ protected final int[] outcomeList;
+ protected final int[] numTimesEventsSeen;
+
+ // For calculating negLogLikelihood and gradient
+ protected double[] tempSums;
+ protected double[] expectation;
+
+ protected double[] gradient;
+
+ public NegLogLikelihood(DataIndexer indexer) {
+
+ // Get data from indexer.
+ if (indexer instanceof OnePassRealValueDataIndexer) {
+ this.values = indexer.getValues();
+ } else {
+ this.values = null;
+ }
+
+ this.contexts = indexer.getContexts();
+ this.outcomeList = indexer.getOutcomeList();
+ this.numTimesEventsSeen = indexer.getNumTimesEventsSeen();
+
+ this.numOutcomes = indexer.getOutcomeLabels().length;
+ this.numFeatures = indexer.getPredLabels().length;
+ this.numContexts = this.contexts.length;
+ this.dimension = numOutcomes * numFeatures;
+
+ this.expectation = new double[numOutcomes];
+ this.tempSums = new double[numOutcomes];
+ this.gradient = new double[dimension];
+ }
+
+ public int getDimension() {
+ return this.dimension;
+ }
+
+ public double[] getInitialPoint() {
+ return new double[dimension];
+ }
+
+ /**
+ * Negative log-likelihood
+ */
+ public double valueAt(double[] x) {
+
+ if (x.length != dimension)
+ throw new IllegalArgumentException(
+ "x is invalid, its dimension is not equal to domain dimension.");
+
+ int ci, oi, ai, vectorIndex, outcome;
+ double predValue, logSumOfExps;
+ double negLogLikelihood = 0;
+
+ for (ci = 0; ci < numContexts; ci++) {
+ for (oi = 0; oi < numOutcomes; oi++) {
+ tempSums[oi] = 0;
+ for (ai = 0; ai < contexts[ci].length; ai++) {
+ vectorIndex = indexOf(oi, contexts[ci][ai]);
+ predValue = values != null? values[ci][ai] : 1.0;
+ tempSums[oi] += predValue * x[vectorIndex];
+ }
+ }
+
+ logSumOfExps = ArrayMath.logSumOfExps(tempSums);
+
+ outcome = outcomeList[ci];
+ negLogLikelihood -= (tempSums[outcome] - logSumOfExps) *
numTimesEventsSeen[ci];
+ }
+
+ return negLogLikelihood;
+ }
+
+ /**
+ * Compute gradient
+ */
+ public double[] gradientAt(double[] x) {
+
+ if (x.length != dimension)
+ throw new IllegalArgumentException(
+ "x is invalid, its dimension is not equal to the function.");
+
+ int ci, oi, ai, vectorIndex;
+ double predValue, logSumOfExps;
+ int empirical;
+
+ // Reset gradient
+ Arrays.fill(gradient, 0);
+
+ for (ci = 0; ci < numContexts; ci++) {
+ for (oi = 0; oi < numOutcomes; oi++) {
+ expectation[oi] = 0;
+ for (ai = 0; ai < contexts[ci].length; ai++) {
+ vectorIndex = indexOf(oi, contexts[ci][ai]);
+ predValue = values != null? values[ci][ai] : 1.0;
+ expectation[oi] += predValue * x[vectorIndex];
+ }
+ }
+
+ logSumOfExps = ArrayMath.logSumOfExps(expectation);
+
+ for (oi = 0; oi < numOutcomes; oi++) {
+ expectation[oi] = Math.exp(expectation[oi] - logSumOfExps);
+ }
+
+ for (oi = 0; oi < numOutcomes; oi++) {
+ empirical = outcomeList[ci] == oi? 1 : 0;
+ for (ai = 0; ai < contexts[ci].length; ai++) {
+ vectorIndex = indexOf(oi, contexts[ci][ai]);
+ predValue = values != null? values[ci][ai] : 1.0;
+ gradient[vectorIndex] +=
+ predValue * (expectation[oi] - empirical) *
numTimesEventsSeen[ci];
+ }
+ }
+ }
+
+ return gradient;
+ }
+
+ protected int indexOf(int outcomeId, int featureId) {
+ return outcomeId * numFeatures + featureId;
+ }
}
\ No newline at end of file