Author: ab
Date: Mon Aug 14 08:29:07 2006
New Revision: 431368

URL: http://svn.apache.org/viewvc?rev=431368&view=rev
Log:
Fix incorrect calculation of max and min scores in readdb -stats. Spotted
by Chris Schneider.

Modified:
    lucene/nutch/branches/branch-0.8/CHANGES.txt
    
lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/CrawlDbReader.java

Modified: lucene/nutch/branches/branch-0.8/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/CHANGES.txt?rev=431368&r1=431367&r2=431368&view=diff
==============================================================================
--- lucene/nutch/branches/branch-0.8/CHANGES.txt (original)
+++ lucene/nutch/branches/branch-0.8/CHANGES.txt Mon Aug 14 08:29:07 2006
@@ -13,6 +13,9 @@
  4. Optionally skip pages with abnormally large Crawl-Delay values
     (Dennis Kubes via ab)
 
+ 5. Fix incorrect calculation of max and min scores in readdb -stats
+    (Chris Schneider via ab)
+
 Release 0.8 - 2006-07-25
 
  0. Totally new architecture, based on hadoop

Modified: 
lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/CrawlDbReader.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=431368&r1=431367&r2=431368&view=diff
==============================================================================
--- 
lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/CrawlDbReader.java
 (original)
+++ 
lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/CrawlDbReader.java
 Mon Aug 14 08:29:07 2006
@@ -238,7 +238,7 @@
     LongWritable value = new LongWritable();
 
     TreeMap stats = new TreeMap();
-    int avg = 0, min = 0, max = 0;
+    int avg = 0;
     for (int i = 0; i < readers.length; i++) {
       SequenceFile.Reader reader = readers[i];
       while (reader.next(key, value)) {
@@ -246,14 +246,18 @@
         LongWritable val = (LongWritable) stats.get(k);
         if (val == null) {
           val = new LongWritable();
+          if (k.startsWith("max")) val.set(Long.MIN_VALUE);
+          else if (k.startsWith("min")) val.set(Long.MAX_VALUE);
           stats.put(k, val);
         }
-        val.set(val.get() + value.get());
-        if (k.startsWith("max"))
-          max++;
-        else if (k.startsWith("min"))
-          min++;
-        else if (k.startsWith("avg")) avg++;
+        if (k.startsWith("max")) {
+          if (value.get() > val.get()) val.set(value.get());
+        } else if (k.startsWith("min")) {
+          if (value.get() < val.get()) val.set(value.get());
+        } else {
+          val.set(val.get() + value.get());
+          avg++;
+        }
       }
     }
     
@@ -265,9 +269,9 @@
         LongWritable val = (LongWritable) stats.get(k);
         if (k.indexOf("score") != -1) {
           if (k.startsWith("min")) {
-            LOG.info(k + ":\t" + (float) ((float) (val.get() / min) / 
1000.0f));
+            LOG.info(k + ":\t" + ((float) val.get() / 1000.0f));
           } else if (k.startsWith("max")) {
-            LOG.info(k + ":\t" + (float) ((float) (val.get() / max) / 
1000.0f));
+            LOG.info(k + ":\t" + ((float) val.get() / 1000.0f));
           } else if (k.startsWith("avg")) {
             LOG.info(k + ":\t" + (float) ((float) (val.get() / avg) / 
1000.0f));
           }



-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Nutch-cvs mailing list
Nutch-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to