Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Nutch Wiki" for change 
notification.

The following page has been changed by MatthewHolt:
http://wiki.apache.org/nutch/IntranetRecrawl

------------------------------------------------------------------------------
  linkdb_dir=$crawl_dir/linkdb
  index_dir=$crawl_dir/index
  
+ # Sets the number of current segments for later clean up
+ seg_num=`ls $segments_dir | wc -l`
+ 
  # The generate/fetch/update cycle
  for ((i=1; i <= depth ; i++))
  do
@@ -147, +150 @@

  # Update segments
  $nutch_dir/nutch invertlinks $linkdb_dir -dir $segments_dir
  
+ # Merge segments
+ mergesegs_dir=$crawl_dir/mergesegs_dir
+ $nutch_dir/nutch mergesegs $mergesegs_dir -dir $segments_dir
+ cp -R $mergesegs_dir/* $segments_dir
+ rm -rf $mergesegs_dir
+ 
  # Index segments
  new_indexes=$crawl_dir/newindexes
+ segment=`ls -d $segments_dir/* | tail -1`
- $nutch_dir/nutch index $new_indexes $webdb_dir $linkdb_dir $segments_dir/*
+ $nutch_dir/nutch index $new_indexes $webdb_dir $linkdb_dir $segment
  
  # De-duplicate indexes
  $nutch_dir/nutch dedup $new_indexes
@@ -163, +173 @@

  # Clean up
  rm -rf $new_indexes
  
+ # sleeps for 1 minute to make sure tomcat has released its lock on dir's
+ # before removing them
+ sleep 1m
+ 
+ echo "***Removing old segment directories that are no longer in use. If any 
of these error out it is not a problem, just used for clean up."
+ 
+ seg_num=`expr $seg_num + $depth`
+ for segment in `ls -dr $segments_dir/* | tail -$seg_num`
+ do
+   echo "Removing Segment: $segment"
+   rm -rf $segment
+ done
  }}}
  

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys -- and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Nutch-cvs mailing list
Nutch-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to