Update of /cvsroot/nutch/nutch/src/java/net/nutch/tools In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17598/src/java/net/nutch/tools
Modified Files: FetchListTool.java Log Message: Add option to FetchListTool to prioritize pages based on the log(number of incoming links) instead of the original page scores. Suggested by Doug Cutting. Index: FetchListTool.java =================================================================== RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/tools/FetchListTool.java,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** FetchListTool.java 20 Aug 2004 20:36:14 -0000 1.25 --- FetchListTool.java 22 Oct 2004 23:30:20 -0000 1.26 *************** *** 27,31 **** private static final long FETCH_GENERATION_DELAY_MS = 7 * 24 * 60 * 60 * 1000; ! NutchFileSystem nfs; File dbDir; --- 27,33 ---- private static final long FETCH_GENERATION_DELAY_MS = 7 * 24 * 60 * 60 * 1000; ! private boolean scoreByLinkCount = ! NutchConf.getBoolean("fetchlist.score.by.link.count", false); ! NutchFileSystem nfs; File dbDir; *************** *** 499,503 **** // perform dbupdate at the same time. // ! curScore.set(page.getScore()); page.setNextFetchTime(page.getNextFetchTime() + FETCH_GENERATION_DELAY_MS); writer.append(curScore, new FetchListEntry(shouldFetch, page, results)); --- 501,508 ---- // perform dbupdate at the same time. // ! // Optionally set the score by the log of number of ! // incoming anchors. ! curScore.set(scoreByLinkCount ? ! (float)Math.log(results.length) : page.getScore()); page.setNextFetchTime(page.getNextFetchTime() + FETCH_GENERATION_DELAY_MS); writer.append(curScore, new FetchListEntry(shouldFetch, page, results)); ------------------------------------------------------- This SF.net email is sponsored by: IT Product Guide on ITManagersJournal Use IT products in your business? Tell us what you think of them. Give us Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more http://productguide.itmanagersjournal.com/guidepromo.tmpl _______________________________________________ Nutch-cvs mailing list [EMAIL PROTECTED] https://lists.sourceforge.net/lists/listinfo/nutch-cvs