Author: lewismc
Date: Thu Oct 22 03:47:04 2015
New Revision: 1709943
URL: http://svn.apache.org/viewvc?rev=1709943&view=rev
Log:
NUTCH-2148 Review and update mapred --> mapreduce config params in crawl script
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/bin/crawl
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1709943&r1=1709942&r2=1709943&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Oct 22 03:47:04 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.11-SNAPSHOT
+* NUTCH-2128 Review and update mapred --> mapreduce config params in crawl
script (lewismc)
+
* NUTCH-2141 Change the InteractiveSelenium plugin handler Interface to return
page content
(Balaji Gurumurthy via mattmann)
Modified: nutch/trunk/src/bin/crawl
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/bin/crawl?rev=1709943&r1=1709942&r2=1709943&view=diff
==============================================================================
--- nutch/trunk/src/bin/crawl (original)
+++ nutch/trunk/src/bin/crawl Thu Oct 22 03:47:04 2015
@@ -109,7 +109,7 @@ fi
numSlaves=1
# and the total number of available tasks
-# sets Hadoop parameter "mapred.reduce.tasks"
+# sets Hadoop parameter "mapreduce.job.reduces"
numTasks=`expr $numSlaves \* 2`
# number of urls to fetch in one iteration
@@ -135,7 +135,7 @@ fi
# note that some of the options listed here could be set in the
# corresponding hadoop site xml param file
-commonOptions="-D mapred.reduce.tasks=$numTasks -D
mapred.child.java.opts=-Xmx1000m -D
mapred.reduce.tasks.speculative.execution=false -D
mapred.map.tasks.speculative.execution=false -D mapred.compress.map.output=true"
+commonOptions="-D mapreduce.job.reduces=$numTasks -D
mapred.child.java.opts=-Xmx1000m -D mapreduce.reduce.speculative=false -D
mapreduce.map.speculative=false -D mapreduce.map.output.compress=true"
# check that hadoop can be found on the path
if [ $mode = "distributed" ]; then
@@ -232,7 +232,7 @@ do
echo "Parsing : $SEGMENT"
# enable the skipping of records for the parsing so that a dodgy document
# so that it does not fail the full task
- skipRecordsOptions="-D mapred.skip.attempts.to.start.skipping=2 -D
mapred.skip.map.max.skip.records=1"
+ skipRecordsOptions="-D mapreduce.task.skip.start.attempts=2 -D
mapreduce.map.skip.maxrecords=1"
__bin_nutch parse $commonOptions $skipRecordsOptions
"$CRAWL_PATH"/segments/$SEGMENT
# updatedb with this segment