Author: jnioche
Date: Fri Aug 29 11:22:46 2014
New Revision: 1621285
URL: http://svn.apache.org/r1621285
Log:
NUTCH-1828 bin/crawl : incorrect handling of nutch errors
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/bin/crawl
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1621285&r1=1621284&r2=1621285&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Aug 29 11:22:46 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1828 bin/crawl : incorrect handling of nutch errors (Mathieu Bouchard
via jnioche)
+
* NUTCH-1693 TextMD5Signature computed on textual content (Tien Nguyen Manh,
markus via snagel)
* NUTCH-1409 remove deprecated properties db.{default,max}.fetch.interval,
generate.max.per.host.by.ip (Matthias Agethle via snagel)
Modified: nutch/branches/2.x/src/bin/crawl
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/bin/crawl?rev=1621285&r1=1621284&r2=1621285&view=diff
==============================================================================
--- nutch/branches/2.x/src/bin/crawl (original)
+++ nutch/branches/2.x/src/bin/crawl Fri Aug 29 11:22:46 2014
@@ -93,11 +93,14 @@ fi
# initial injection
"$bin/nutch" inject "$SEEDDIR" -crawlId "$CRAWL_ID"
-if [ $? -ne 0 ]
- then exit $?
+RETCODE=$?
+
+if [ $RETCODE -ne 0 ]
+ then exit $RETCODE
fi
+
# main loop : rounds of generate - fetch - parse - update
for ((a=1; a <= LIMIT ; a++))
do
@@ -114,16 +117,18 @@ do
echo "Generating a new fetchlist"
"$bin/nutch" generate $commonOptions -topN $sizeFetchlist -noNorm -noFilter
-adddays $addDays -crawlId "$CRAWL_ID" -batchId $batchId
-
- if [ $? -ne 0 ]
- then exit $?
+ RETCODE=$?
+
+ if [ $RETCODE -ne 0 ]
+ then exit $RETCODE
fi
echo "Fetching : "
"$bin/nutch" fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch
$batchId -crawlId "$CRAWL_ID" -threads 50
+ RETCODE=$?
- if [ $? -ne 0 ]
- then exit $?
+ if [ $RETCODE -ne 0 ]
+ then exit $RETCODE
fi
# parsing the batch
@@ -132,31 +137,35 @@ do
# so that it does not fail the full task
skipRecordsOptions="-D mapred.skip.attempts.to.start.skipping=2 -D
mapred.skip.map.max.skip.records=1"
"$bin/nutch" parse $commonOptions $skipRecordsOptions $batchId -crawlId
"$CRAWL_ID"
+ RETCODE=$?
- if [ $? -ne 0 ]
- then exit $?
+ if [ $RETCODE -ne 0 ]
+ then exit $RETCODE
fi
# updatedb with this batch
echo "CrawlDB update for $CRAWL_ID"
"$bin/nutch" updatedb $commonOptions $batchId -crawlId "$CRAWL_ID"
+ RETCODE=$?
- if [ $? -ne 0 ]
- then exit $?
+ if [ $RETCODE -ne 0 ]
+ then exit $RETCODE
fi
echo "Indexing $CRAWL_ID on SOLR index -> $SOLRURL"
"$bin/nutch" index $commonOptions -D solr.server.url=$SOLRURL -all -crawlId
"$CRAWL_ID"
-
- if [ $? -ne 0 ]
- then exit $?
+ RETCODE=$?
+
+ if [ $RETCODE -ne 0 ]
+ then exit $RETCODE
fi
echo "SOLR dedup -> $SOLRURL"
"$bin/nutch" solrdedup $commonOptions $SOLRURL
-
- if [ $? -ne 0 ]
- then exit $?
+ RETCODE=$?
+
+ if [ $RETCODE -ne 0 ]
+ then exit $RETCODE
fi
done