Author: lewismc
Date: Sat Sep 27 21:57:10 2014
New Revision: 1628010
URL: http://svn.apache.org/r1628010
Log:
NUTCH-1853 Add commented out WebGraph executions to ./bin/crawl
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/bin/crawl
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1628010&r1=1628009&r2=1628010&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Sep 27 21:57:10 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development
+* NUTCH-1853 Add commented out WebGraph executions to ./bin/crawl (lewismc)
+
* NUTCH-1844 testresources/testcrawl not referenced anywhere in code (mattmann)
* NUTCH-1839 Improve WebGraph CLI parsing (lewismc)
Modified: nutch/trunk/src/bin/crawl
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/bin/crawl?rev=1628010&r1=1628009&r2=1628010&view=diff
==============================================================================
--- nutch/trunk/src/bin/crawl (original)
+++ nutch/trunk/src/bin/crawl Sat Sep 27 21:57:10 2014
@@ -203,7 +203,45 @@ do
else
echo "Skipping indexing: no SOLR url provided."
fi
+
+ #######################################################
+ # The following commands fall into WebGraph territory
+ # and should be uncommented based on your requirements
+ #######################################################
+ #echo "Building WebGraph within $CRAWL_PATH on all segments in
$CRAWL_PATH/segments/"
+ #"$bin/nutch" webgraph $commonOptions -filter -normalize -segmentDir
"$CRAWL_PATH"/segments/ -webgraphdb "$CRAWL_PATH"
+ #if [ $? -ne 0 ]
+ # then exit $?
+ #fi
+
+ #echo "Running Loops Job on WebGraph within $CRAWL_PATH"
+ #"$bin/nutch" org.apache.nutch.scoring.webgraph.Loops $commonOptions
-webgraphdb "$CRAWL_PATH"
+
+ #if [ $? -ne 0 ]
+ # then exit $?
+ #fi
+
+ #echo "Running LinkRank Algorithm on WebGraph within $CRAWL_PATH"
+ #"$bin/nutch" linkrank $commonOptions -webgraphdb "$CRAWL_PATH"
+
+ #if [ $? -ne 0 ]
+ # then exit $?
+ #fi
+
+ #echo "Running ScoreUpdater Job with $CRAWL_PATH/crawldb and WebGraph
within $CRAWL_PATH"
+ #"$bin/nutch" scoreupdater $commonOptions -crawldb "$CRAWL_PATH"/crawldb
-webgraphdb "$CRAWL_PATH"
+
+ #if [ $? -ne 0 ]
+ # then exit $?
+ #fi
+
+ #echo "Running NodeDumper on WebGraph within $CRAWL_PATH and dumping output
to $CRAWL_PATH/dump/scores"
+ #"$bin/nutch" nodedumper $commonOptions -scores -topn 1000 -webgraphdb
"$CRAWL_PATH" -output "$CRAWL_PATH"/dump/scores
+
+ #if [ $? -ne 0 ]
+ # then exit $?
+ #fi
done
exit 0