Author: ab Date: Thu Mar 22 03:08:00 2007 New Revision: 521182 URL: http://svn.apache.org/viewvc?view=rev&rev=521182 Log: NUTCH-246 - incorrect segment size being generated due to time synchronization issue.
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=521182&r1=521181&r2=521182 ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Thu Mar 22 03:08:00 2007 @@ -166,6 +166,9 @@ 56. Upgrade to Hadoop 0.12.1 release. (ab) +57. NUTCH-246 - Incorrect segment size being generated due to time + synchronization issue (Stefan Groschupf via ab) + Release 0.8 - 2006-07-25 Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?view=diff&rev=521182&r1=521181&r2=521182 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Thu Mar 22 03:08:00 2007 @@ -51,6 +51,7 @@ private JobConf jobConf; private URLFilters filters; private ScoringFilters scfilters; + private long curTime; public void configure(JobConf job) { this.jobConf = job; @@ -59,6 +60,7 @@ filters = new URLFilters(jobConf); scfilters = new ScoringFilters(jobConf); scoreInjected = jobConf.getFloat("db.score.injected", 1.0f); + curTime = job.getLong("injector.current.time", System.currentTimeMillis()); } public void close() {} @@ -79,6 +81,7 @@ if (url != null) { // if it passes value.set(url); // collect it CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_INJECTED, interval); + datum.setFetchTime(curTime); datum.setScore(scoreInjected); try { scfilters.injectedScore(value, datum); @@ -96,7 +99,7 @@ /** Combine multiple new entries for a url. */ public static class InjectReducer implements Reducer { - public void configure(JobConf job) {} + public void configure(JobConf job) {} public void close() {} public void reduce(WritableComparable key, Iterator values, @@ -155,6 +158,7 @@ sortJob.setOutputFormat(SequenceFileOutputFormat.class); sortJob.setOutputKeyClass(Text.class); sortJob.setOutputValueClass(CrawlDatum.class); + sortJob.setLong("injector.current.time", System.currentTimeMillis()); JobClient.runJob(sortJob); // merge with existing crawl db ------------------------------------------------------------------------- Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys-and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs