Author: jnioche
Date: Wed Aug 10 19:54:46 2011
New Revision: 1156342

URL: http://svn.apache.org/viewvc?rev=1156342&view=rev
Log:
NUTCH-1044 : problem with scores of redirections and their outlinks

Modified:
    nutch/branches/branch-1.4/CHANGES.txt
    nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDatum.java
    nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java

Modified: nutch/branches/branch-1.4/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1156342&r1=1156341&r2=1156342&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Wed Aug 10 19:54:46 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-1044 Redirected URLs and possibly all of their outlinked URLs have 
invalid scores (jnioche)
+
 * NUTCH-1028 Log urls when parsing (markus)
 
 * NUTCH-1065 New mvn.template (lewismc)

Modified: 
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDatum.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=1156342&r1=1156341&r2=1156342&view=diff
==============================================================================
--- nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDatum.java 
(original)
+++ nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDatum.java 
Wed Aug 10 19:54:46 2011
@@ -119,7 +119,7 @@ public class CrawlDatum implements Writa
   private long fetchTime = System.currentTimeMillis();
   private byte retries;
   private int fetchInterval;
-  private float score = 1.0f;
+  private float score = 0.0f;
   private byte[] signature = null;
   private long modifiedTime;
   private org.apache.hadoop.io.MapWritable metaData;

Modified: 
nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=1156342&r1=1156341&r2=1156342&view=diff
==============================================================================
--- nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java 
(original)
+++ nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java 
Wed Aug 10 19:54:46 2011
@@ -816,7 +816,7 @@ public class Fetcher extends Configured 
           return url;
         } else {
           CrawlDatum newDatum = new CrawlDatum(CrawlDatum.STATUS_LINKED,
-              datum.getFetchInterval());
+              datum.getFetchInterval(),datum.getScore());
           // transfer existing metadata 
           newDatum.getMetaData().putAll(datum.getMetaData());
           try {


Reply via email to