Author: ragerri
Date: Wed Jun  4 07:33:14 2014
New Revision: 1599954

URL: http://svn.apache.org/r1599954
Log:
OPENNLP-687 fmeasure update to avoid duplicate true positives

Removed:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/ParseEval.java
Modified:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/FMeasure.java

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java?rev=1599954&r1=1599953&r2=1599954&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java
 Wed Jun  4 07:33:14 2014
@@ -23,12 +23,16 @@ import java.util.Stack;
 
 import opennlp.tools.cmdline.parser.ParserTool;
 import opennlp.tools.util.Span;
-import opennlp.tools.util.eval.ParseEval;
 import opennlp.tools.util.eval.Evaluator;
+import opennlp.tools.util.eval.FMeasure;
 
 /**
- * Class for Parsing Evaluation. Hopefully to be merged
- * into FMeasure soon.
+ * Class for ParserEvaluator.
+ * This ParserEvaluator behaves like EVALB with no exceptions, e.g, 
+ * without removing punctuation tags, or equality between ADVP and PRT 
+ * (as in COLLINS convention). To follow parsing evaluation conventions
+ * (Bikel, Collins, Charniak, etc.) as in EVALB, options are to be added 
+ * to the {@code ParserEvaluatorTool}.
  *
  */
 public class ParserEvaluator extends Evaluator<Parse> {
@@ -36,7 +40,7 @@ public class ParserEvaluator extends Eva
   /**
    * fmeasure.
    */
-  private ParseEval fmeasure = new ParseEval();
+  private FMeasure fmeasure = new FMeasure();
   /**
    * The parser to evaluate.
    */
@@ -54,7 +58,7 @@ public class ParserEvaluator extends Eva
 
   /**
    * Obtain {@code Span}s for every parse in the sentence.
-   * @param parse
+   * @param parse the parse from which to obtain the spans
    * @return an array containing every span for the parse
    */
   private static Span[] getConstituencySpans(final Parse parse) {
@@ -85,6 +89,9 @@ public class ParserEvaluator extends Eva
     return consts.toArray(new Span[consts.size()]);
   }
 
+  /* (non-Javadoc)
+   * @see opennlp.tools.util.eval.Evaluator#processSample(java.lang.Object)
+   */
   @Override
   protected final Parse processSample(final Parse reference) {
 
@@ -106,7 +113,7 @@ public class ParserEvaluator extends Eva
    * It returns the fmeasure result.
    * @return the fmeasure value
    */
-  public final ParseEval getFMeasure() {
+  public final FMeasure getFMeasure() {
     return fmeasure;
   }
 
@@ -124,7 +131,7 @@ public class ParserEvaluator extends Eva
     String testParseString = "(TOP (S (NP (NNS Sales) (NNS executives)) (VP 
(VBD were) (VP (VBG examing) (NP (DT the) (NNS figures)) (PP (IN with) (NP (JJ 
great) (NN care) (NN yesterday))) ))  (. .) ))";
     Span[] testConsts = 
getConstituencySpans(Parse.parseParse(testParseString));
 
-    ParseEval measure = new ParseEval();
+    FMeasure measure = new FMeasure();
     measure.updateScores(goldConsts, testConsts);
 
     // Expected output:

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/FMeasure.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/FMeasure.java?rev=1599954&r1=1599953&r2=1599954&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/FMeasure.java 
(original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/eval/FMeasure.java 
Wed Jun  4 07:33:14 2014
@@ -17,6 +17,10 @@
 
 package opennlp.tools.util.eval;
 
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
 
 /**
  * The {@link FMeasure} is an utility class for evaluators
@@ -28,64 +32,79 @@ package opennlp.tools.util.eval;
  * each reference sample.
  */
 public final class FMeasure {
+  /**
+   * |selected| = true positives + false positives <br>
+   * the count of selected (or retrieved) items.
+   */
+  private long selected;
 
-       /** |selected| = true positives + false positives <br>
-        * the count of selected (or retrieved) items  */
-       private long selected;
-
-       /** |target| = true positives + false negatives <br>
-        * the count of target (or correct) items */
-       private long target;
+  /**
+   * |target| = true positives + false negatives <br>
+   * the count of target (or correct) items.
+   */
+  private long target;
 
-       private long truePositive;
+  /**
+   * Storing the number of true positives found.
+   */
+  private long truePositive;
 
   /**
-   * Retrieves the arithmetic mean of the precision scores
-   * calculated for each evaluated sample.
+   * Retrieves the arithmetic mean of the precision scores calculated for each
+   * evaluated sample.
    *
    * @return the arithmetic mean of all precision scores
    */
   public double getPrecisionScore() {
-    return selected > 0 ? (double)truePositive / (double)selected : 0;
+    return selected > 0 ? (double) truePositive / (double) selected : 0;
   }
 
   /**
-   * Retrieves the arithmetic mean of the recall score
-   * calculated for each evaluated sample.
+   * Retrieves the arithmetic mean of the recall score calculated for each
+   * evaluated sample.
    *
    * @return the arithmetic mean of all recall scores
    */
   public double getRecallScore() {
-    return target > 0 ? (double)truePositive / (double)target : 0;
+    return target > 0 ? (double) truePositive / (double) target : 0;
   }
 
   /**
    * Retrieves the f-measure score.
    *
    * f-measure = 2 * precision * recall / (precision + recall)
-   *
    * @return the f-measure or -1 if precision + recall &lt;= 0
    */
   public double getFMeasure() {
 
     if (getPrecisionScore() + getRecallScore() > 0) {
-      return 2 * (getPrecisionScore() * getRecallScore()) /
-          (getPrecisionScore() + getRecallScore());
-    }
-    else {
+      return 2 * (getPrecisionScore() * getRecallScore())
+          / (getPrecisionScore() + getRecallScore());
+    } else {
       // cannot divide by zero, return error code
       return -1;
     }
   }
 
-  public void updateScores(Object references[], Object predictions[]) {
+  /**
+   * Updates the score based on the number of true positives and
+   * the number of predictions and references.
+   *
+   * @param references the provided references
+   * @param predictions the predicted spans
+   */
+  public void updateScores(final Object[] references, final Object[] 
predictions) {
 
-         truePositive += countTruePositives(references, predictions);
-         selected += predictions.length;
-         target += references.length;
+    truePositive += countTruePositives(references, predictions);
+    selected += predictions.length;
+    target += references.length;
   }
 
-  public void mergeInto(FMeasure measure) {
+  /**
+   * Merge results into fmeasure metric.
+   * @param measure the fmeasure
+   */
+  public void mergeInto(final FMeasure measure) {
     this.selected += measure.selected;
     this.target += measure.target;
     this.truePositive += measure.truePositive;
@@ -93,84 +112,87 @@ public final class FMeasure {
 
   /**
    * Creates a human read-able {@link String} representation.
+   * @return the results
    */
   @Override
   public String toString() {
-    return "Precision: " + Double.toString(getPrecisionScore()) + "\n" +
-        "Recall: " + Double.toString(getRecallScore()) + "\n" +
-        "F-Measure: " + Double.toString(getFMeasure());
+    return "Precision: " + Double.toString(getPrecisionScore()) + "\n"
+        + "Recall: " + Double.toString(getRecallScore()) + "\n" + "F-Measure: "
+        + Double.toString(getFMeasure());
   }
 
   /**
-   * This method counts the number of objects which are equal and
-   * occur in the references and predictions arrays.
-   *
-   * These are the number of true positives.
-   *
-   * @param references the gold standard
-   * @param predictions the predictions
+   * This method counts the number of objects which are equal and occur in the
+   * references and predictions arrays.
+   * Matched items are removed from the prediction list.
    *
+   * @param references
+   *          the gold standard
+   * @param predictions
+   *          the predictions
    * @return number of true positives
    */
-  static int countTruePositives(Object references[],
-      Object predictions[]) {
+  static int countTruePositives(final Object[] references, final Object[] 
predictions) {
 
+    List<Object> predListSpans = new ArrayList<Object>(predictions.length);
+    Collections.addAll(predListSpans, predictions);
     int truePositives = 0;
+    Object matchedItem = null;
 
-    // Note: Maybe a map should be used to improve performance
-    for (int referenceIndex = 0; referenceIndex < references.length;
-        referenceIndex++) {
-
+    for (int referenceIndex = 0; referenceIndex < references.length; 
referenceIndex++) {
       Object referenceName = references[referenceIndex];
 
-      for (int predictedIndex = 0; predictedIndex < predictions.length;
-          predictedIndex++) {
-        if (referenceName.equals(predictions[predictedIndex])) {
+      for (int predIndex = 0; predIndex < predListSpans.size(); predIndex++) {
+
+        if (referenceName.equals(predListSpans.get(predIndex))) {
+          matchedItem = predListSpans.get(predIndex);
           truePositives++;
         }
       }
+      if (matchedItem != null) {
+        predListSpans.remove(matchedItem);
+      }
     }
-
     return truePositives;
   }
 
+
   /**
-   * Calculates the precision score for the given reference and
-   * predicted spans.
-   *
-   * @param references the gold standard spans
-   * @param predictions the predicted spans
+   * Calculates the precision score for the given reference and predicted 
spans.
    *
+   * @param references
+   *          the gold standard spans
+   * @param predictions
+   *          the predicted spans
    * @return the precision score or NaN if there are no predicted spans
    */
-  public static double precision(Object references[], Object predictions[]) {
+  public static double precision(final Object[] references, final Object[] 
predictions) {
 
     if (predictions.length > 0) {
-      return countTruePositives(references, predictions) /
-          (double) predictions.length;
-    }
-    else {
+      return countTruePositives(references, predictions)
+          / (double) predictions.length;
+    } else {
       return Double.NaN;
     }
   }
 
   /**
-   * Calculates the recall score for the given reference and
-   * predicted spans.
+   * Calculates the recall score for the given reference and predicted spans.
    *
-   * @param references the gold standard spans
-   * @param predictions the predicted spans
+   * @param references
+   *          the gold standard spans
+   * @param predictions
+   *          the predicted spans
    *
    * @return the recall score or NaN if there are no reference spans
    */
-  public static double recall(Object references[], Object predictions[]) {
+  public static double recall(final Object[] references, final Object[] 
predictions) {
 
     if (references.length > 0) {
-      return countTruePositives(references, predictions) /
-          (double) references.length;
-    }
-    else {
-        return Double.NaN;
+      return countTruePositives(references, predictions)
+          / (double) references.length;
+    } else {
+      return Double.NaN;
     }
   }
 }


Reply via email to