Author: ab Date: Mon Aug 14 08:29:07 2006 New Revision: 431368 URL: http://svn.apache.org/viewvc?rev=431368&view=rev Log: Fix incorrect calculation of max and min scores in readdb -stats. Spotted by Chris Schneider.
Modified: lucene/nutch/branches/branch-0.8/CHANGES.txt lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/CrawlDbReader.java Modified: lucene/nutch/branches/branch-0.8/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/CHANGES.txt?rev=431368&r1=431367&r2=431368&view=diff ============================================================================== --- lucene/nutch/branches/branch-0.8/CHANGES.txt (original) +++ lucene/nutch/branches/branch-0.8/CHANGES.txt Mon Aug 14 08:29:07 2006 @@ -13,6 +13,9 @@ 4. Optionally skip pages with abnormally large Crawl-Delay values (Dennis Kubes via ab) + 5. Fix incorrect calculation of max and min scores in readdb -stats + (Chris Schneider via ab) + Release 0.8 - 2006-07-25 0. Totally new architecture, based on hadoop Modified: lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/CrawlDbReader.java URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=431368&r1=431367&r2=431368&view=diff ============================================================================== --- lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/CrawlDbReader.java (original) +++ lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/CrawlDbReader.java Mon Aug 14 08:29:07 2006 @@ -238,7 +238,7 @@ LongWritable value = new LongWritable(); TreeMap stats = new TreeMap(); - int avg = 0, min = 0, max = 0; + int avg = 0; for (int i = 0; i < readers.length; i++) { SequenceFile.Reader reader = readers[i]; while (reader.next(key, value)) { @@ -246,14 +246,18 @@ LongWritable val = (LongWritable) stats.get(k); if (val == null) { val = new LongWritable(); + if (k.startsWith("max")) val.set(Long.MIN_VALUE); + else if (k.startsWith("min")) val.set(Long.MAX_VALUE); stats.put(k, val); } - val.set(val.get() + value.get()); - if (k.startsWith("max")) - max++; - else if (k.startsWith("min")) - min++; - else if (k.startsWith("avg")) avg++; + if (k.startsWith("max")) { + if (value.get() > val.get()) val.set(value.get()); + } else if (k.startsWith("min")) { + if (value.get() < val.get()) val.set(value.get()); + } else { + val.set(val.get() + value.get()); + avg++; + } } } @@ -265,9 +269,9 @@ LongWritable val = (LongWritable) stats.get(k); if (k.indexOf("score") != -1) { if (k.startsWith("min")) { - LOG.info(k + ":\t" + (float) ((float) (val.get() / min) / 1000.0f)); + LOG.info(k + ":\t" + ((float) val.get() / 1000.0f)); } else if (k.startsWith("max")) { - LOG.info(k + ":\t" + (float) ((float) (val.get() / max) / 1000.0f)); + LOG.info(k + ":\t" + ((float) val.get() / 1000.0f)); } else if (k.startsWith("avg")) { LOG.info(k + ":\t" + (float) ((float) (val.get() / avg) / 1000.0f)); } ------------------------------------------------------------------------- Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs