Author: isabel
Date: Thu Jan 14 10:58:40 2010
New Revision: 899157

URL: http://svn.apache.org/viewvc?rev=899157&view=rev
Log:
MAHOUT-244 added root log-likelihood method.

Modified:
    
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
    
lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java

Modified: 
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
URL: 
http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java?rev=899157&r1=899156&r2=899157&view=diff
==============================================================================
--- 
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
 (original)
+++ 
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java
 Thu Jan 14 10:58:40 2010
@@ -43,7 +43,7 @@
   }
 
   /**
-   * Calculate the Log-likelihood ratio for two events, call them A and B.  
Then we have:
+   * Calculate the Raw Log-likelihood ratio for two events, call them A and B. 
 Then we have:
    * <p/>
    * <table border="1" cellpadding="5" cellspacing="0">
    * <tbody><tr><td>&nbsp;</td><td>Event A</td><td>Everything but A</td></tr>
@@ -55,7 +55,7 @@
    * @param k12 The number of times the second event occurred WITHOUT the 
first event
    * @param k21 The number of times the first event occurred WITHOUT the 
second event
    * @param k22 The number of times something else occurred (i.e. was neither 
of these events
-   * @return The log-likelihood ratio
+   * @return The raw log-likelihood ratio
    *
    * <p/>
    * Credit to 
http://tdunning.blogspot.com/2008/03/surprise-and-coincidence.html for the 
table and the descriptions.
@@ -66,4 +66,24 @@
     double matrixEntropy = entropy(k11, k12, k21, k22);
     return 2 * (matrixEntropy - rowEntropy - columnEntropy);
   }
+  
+  /** 
+   * Calculate the Root Log-likelihood ratio for two events.
+   * 
+   * @see #logLikelihoodRatio(int, int, int, int);
+
+   * @param k11 The number of times the two events occurred together
+   * @param k12 The number of times the second event occurred WITHOUT the 
first event
+   * @param k21 The number of times the first event occurred WITHOUT the 
second event
+   * @param k22 The number of times something else occurred (i.e. was neither 
of these events
+   * @return The root log-likelihood ratio
+   * 
+   * <p/>
+   * See discussion of raw vs. root LLR at 
+   * 
http://www.lucidimagination.com/search/document/6dc8709e65a7ced1/llr_scoring_question
+   */
+  public static double rootLogLikelihoodRatio(int k11, int k12, int k21, int 
k22) {
+    double llr = logLikelihoodRatio(k11, k12, k21, k22);
+    return Math.signum(((double) k11 / (k11+k12)) - ((double) k21 / 
(k21+k22))) * Math.sqrt(llr);
+  }
 }

Modified: 
lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java
URL: 
http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java?rev=899157&r1=899156&r2=899157&view=diff
==============================================================================
--- 
lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java
 (original)
+++ 
lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java
 Thu Jan 14 10:58:40 2010
@@ -65,4 +65,12 @@
     assertEquals(LogLikelihood.logLikelihoodRatio(1000,1000,1000, 99000), 
5714.932, 0.001);
   }
 
+  @Test
+  public void testRootLogLikelihood() throws Exception {
+    // positive where k11 is bigger than expected.
+    assertTrue(LogLikelihood.rootLogLikelihoodRatio(904, 21060, 1144, 283012) 
> 0.0);
+    
+    // negative because k11 is lower than expected
+    assertTrue(LogLikelihood.rootLogLikelihoodRatio(36, 21928, 60280, 623876) 
< 0.0);
+  }
 }


Reply via email to