Author: isabel Date: Thu Jan 14 10:58:40 2010 New Revision: 899157 URL: http://svn.apache.org/viewvc?rev=899157&view=rev Log: MAHOUT-244 added root log-likelihood method.
Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java?rev=899157&r1=899156&r2=899157&view=diff ============================================================================== --- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java (original) +++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/LogLikelihood.java Thu Jan 14 10:58:40 2010 @@ -43,7 +43,7 @@ } /** - * Calculate the Log-likelihood ratio for two events, call them A and B. Then we have: + * Calculate the Raw Log-likelihood ratio for two events, call them A and B. Then we have: * <p/> * <table border="1" cellpadding="5" cellspacing="0"> * <tbody><tr><td> </td><td>Event A</td><td>Everything but A</td></tr> @@ -55,7 +55,7 @@ * @param k12 The number of times the second event occurred WITHOUT the first event * @param k21 The number of times the first event occurred WITHOUT the second event * @param k22 The number of times something else occurred (i.e. was neither of these events - * @return The log-likelihood ratio + * @return The raw log-likelihood ratio * * <p/> * Credit to http://tdunning.blogspot.com/2008/03/surprise-and-coincidence.html for the table and the descriptions. @@ -66,4 +66,24 @@ double matrixEntropy = entropy(k11, k12, k21, k22); return 2 * (matrixEntropy - rowEntropy - columnEntropy); } + + /** + * Calculate the Root Log-likelihood ratio for two events. + * + * @see #logLikelihoodRatio(int, int, int, int); + + * @param k11 The number of times the two events occurred together + * @param k12 The number of times the second event occurred WITHOUT the first event + * @param k21 The number of times the first event occurred WITHOUT the second event + * @param k22 The number of times something else occurred (i.e. was neither of these events + * @return The root log-likelihood ratio + * + * <p/> + * See discussion of raw vs. root LLR at + * http://www.lucidimagination.com/search/document/6dc8709e65a7ced1/llr_scoring_question + */ + public static double rootLogLikelihoodRatio(int k11, int k12, int k21, int k22) { + double llr = logLikelihoodRatio(k11, k12, k21, k22); + return Math.signum(((double) k11 / (k11+k12)) - ((double) k21 / (k21+k22))) * Math.sqrt(llr); + } } Modified: lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java?rev=899157&r1=899156&r2=899157&view=diff ============================================================================== --- lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java (original) +++ lucene/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/LogLikelihoodTest.java Thu Jan 14 10:58:40 2010 @@ -65,4 +65,12 @@ assertEquals(LogLikelihood.logLikelihoodRatio(1000,1000,1000, 99000), 5714.932, 0.001); } + @Test + public void testRootLogLikelihood() throws Exception { + // positive where k11 is bigger than expected. + assertTrue(LogLikelihood.rootLogLikelihoodRatio(904, 21060, 1144, 283012) > 0.0); + + // negative because k11 is lower than expected + assertTrue(LogLikelihood.rootLogLikelihoodRatio(36, 21928, 60280, 623876) < 0.0); + } }