Dear Wiki user, You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change notification.
The following page has been changed by MatthewHolt: http://wiki.apache.org/nutch/IntranetRecrawl ------------------------------------------------------------------------------ linkdb_dir=$crawl_dir/linkdb index_dir=$crawl_dir/index + # Sets the number of current segments for later clean up + seg_num=`ls $segments_dir | wc -l` + # The generate/fetch/update cycle for ((i=1; i <= depth ; i++)) do @@ -147, +150 @@ # Update segments $nutch_dir/nutch invertlinks $linkdb_dir -dir $segments_dir + # Merge segments + mergesegs_dir=$crawl_dir/mergesegs_dir + $nutch_dir/nutch mergesegs $mergesegs_dir -dir $segments_dir + cp -R $mergesegs_dir/* $segments_dir + rm -rf $mergesegs_dir + # Index segments new_indexes=$crawl_dir/newindexes + segment=`ls -d $segments_dir/* | tail -1` - $nutch_dir/nutch index $new_indexes $webdb_dir $linkdb_dir $segments_dir/* + $nutch_dir/nutch index $new_indexes $webdb_dir $linkdb_dir $segment # De-duplicate indexes $nutch_dir/nutch dedup $new_indexes @@ -163, +173 @@ # Clean up rm -rf $new_indexes + # sleeps for 1 minute to make sure tomcat has released its lock on dir's + # before removing them + sleep 1m + + echo "***Removing old segment directories that are no longer in use. If any of these error out it is not a problem, just used for clean up." + + seg_num=`expr $seg_num + $depth` + for segment in `ls -dr $segments_dir/* | tail -$seg_num` + do + echo "Removing Segment: $segment" + rm -rf $segment + done }}} ------------------------------------------------------------------------- Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys -- and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs