Author: jnioche
Date: Fri Aug 29 11:22:46 2014
New Revision: 1621285

URL: http://svn.apache.org/r1621285
Log:
NUTCH-1828 bin/crawl : incorrect handling of nutch errors

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/bin/crawl

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1621285&r1=1621284&r2=1621285&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Aug 29 11:22:46 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1828 bin/crawl : incorrect handling of nutch errors (Mathieu Bouchard 
via jnioche)
+
 * NUTCH-1693 TextMD5Signature computed on textual content (Tien Nguyen Manh, 
markus via snagel)
 
 * NUTCH-1409 remove deprecated properties db.{default,max}.fetch.interval, 
generate.max.per.host.by.ip (Matthias Agethle via snagel)

Modified: nutch/branches/2.x/src/bin/crawl
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/bin/crawl?rev=1621285&r1=1621284&r2=1621285&view=diff
==============================================================================
--- nutch/branches/2.x/src/bin/crawl (original)
+++ nutch/branches/2.x/src/bin/crawl Fri Aug 29 11:22:46 2014
@@ -93,11 +93,14 @@ fi
 
 # initial injection
 "$bin/nutch" inject "$SEEDDIR" -crawlId "$CRAWL_ID"
-if [ $? -ne 0 ] 
-  then exit $? 
+RETCODE=$?
+
+if [ $RETCODE -ne 0 ] 
+  then exit $RETCODE 
 fi
 
 
+
 # main loop : rounds of generate - fetch - parse - update
 for ((a=1; a <= LIMIT ; a++))
 do
@@ -114,16 +117,18 @@ do
 
   echo "Generating a new fetchlist"
   "$bin/nutch" generate $commonOptions -topN $sizeFetchlist -noNorm -noFilter 
-adddays $addDays -crawlId "$CRAWL_ID" -batchId $batchId
-  
-  if [ $? -ne 0 ] 
-  then exit $? 
+  RETCODE=$?
+
+  if [ $RETCODE -ne 0 ] 
+    then exit $RETCODE 
   fi
 
   echo "Fetching : "
   "$bin/nutch" fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch 
$batchId -crawlId "$CRAWL_ID" -threads 50
+  RETCODE=$?
 
-  if [ $? -ne 0 ] 
-  then exit $? 
+  if [ $RETCODE -ne 0 ] 
+    then exit $RETCODE 
   fi
 
   # parsing the batch
@@ -132,31 +137,35 @@ do
   # so that it does not fail the full task
   skipRecordsOptions="-D mapred.skip.attempts.to.start.skipping=2 -D 
mapred.skip.map.max.skip.records=1"
   "$bin/nutch" parse $commonOptions $skipRecordsOptions $batchId -crawlId 
"$CRAWL_ID"
+  RETCODE=$?
 
-  if [ $? -ne 0 ] 
-  then exit $? 
+  if [ $RETCODE -ne 0 ] 
+    then exit $RETCODE 
   fi
 
   # updatedb with this batch
   echo "CrawlDB update for $CRAWL_ID"
   "$bin/nutch" updatedb $commonOptions $batchId -crawlId "$CRAWL_ID"
+  RETCODE=$?
 
-  if [ $? -ne 0 ] 
-  then exit $? 
+  if [ $RETCODE -ne 0 ] 
+    then exit $RETCODE 
   fi
 
   echo "Indexing $CRAWL_ID on SOLR index -> $SOLRURL"
   "$bin/nutch" index $commonOptions -D solr.server.url=$SOLRURL -all -crawlId 
"$CRAWL_ID"
-  
-  if [ $? -ne 0 ] 
-   then exit $? 
+  RETCODE=$?
+
+  if [ $RETCODE -ne 0 ] 
+    then exit $RETCODE 
   fi
 
   echo "SOLR dedup -> $SOLRURL"
   "$bin/nutch" solrdedup $commonOptions $SOLRURL
-  
-  if [ $? -ne 0 ] 
-   then exit $? 
+  RETCODE=$?
+
+  if [ $RETCODE -ne 0 ] 
+    then exit $RETCODE 
   fi
 
 done


Reply via email to