Author: jnioche
Date: Wed Aug 10 19:54:46 2011
New Revision: 1156342
URL: http://svn.apache.org/viewvc?rev=1156342&view=rev
Log:
NUTCH-1044 : problem with scores of redirections and their outlinks
Modified:
nutch/branches/branch-1.4/CHANGES.txt
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDatum.java
nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java
Modified: nutch/branches/branch-1.4/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1156342&r1=1156341&r2=1156342&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Wed Aug 10 19:54:46 2011
@@ -2,6 +2,8 @@ Nutch Change Log
Release 1.4 - Current development
+* NUTCH-1044 Redirected URLs and possibly all of their outlinked URLs have
invalid scores (jnioche)
+
* NUTCH-1028 Log urls when parsing (markus)
* NUTCH-1065 New mvn.template (lewismc)
Modified:
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDatum.java
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=1156342&r1=1156341&r2=1156342&view=diff
==============================================================================
--- nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDatum.java
(original)
+++ nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDatum.java
Wed Aug 10 19:54:46 2011
@@ -119,7 +119,7 @@ public class CrawlDatum implements Writa
private long fetchTime = System.currentTimeMillis();
private byte retries;
private int fetchInterval;
- private float score = 1.0f;
+ private float score = 0.0f;
private byte[] signature = null;
private long modifiedTime;
private org.apache.hadoop.io.MapWritable metaData;
Modified:
nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=1156342&r1=1156341&r2=1156342&view=diff
==============================================================================
--- nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java
(original)
+++ nutch/branches/branch-1.4/src/java/org/apache/nutch/fetcher/Fetcher.java
Wed Aug 10 19:54:46 2011
@@ -816,7 +816,7 @@ public class Fetcher extends Configured
return url;
} else {
CrawlDatum newDatum = new CrawlDatum(CrawlDatum.STATUS_LINKED,
- datum.getFetchInterval());
+ datum.getFetchInterval(),datum.getScore());
// transfer existing metadata
newDatum.getMetaData().putAll(datum.getMetaData());
try {