Author: markus
Date: Wed Feb 3 16:03:22 2016
New Revision: 1728339
URL: http://svn.apache.org/viewvc?rev=1728339&view=rev
Log:
NUTCH-2211 Added filterchecker and normalizerchecker to bin/nutch script
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/bin/nutch
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1728339&r1=1728338&r2=1728339&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Feb 3 16:03:22 2016
@@ -1,5 +1,7 @@
Nutch Change Log
+* NUTCH-2211 Added filterchecker and normalizerchecker to bin/nutch script
(markus)
+
* NUTCH-2197 Add Solr 5 cloud indexer support (Jurian Broertjes via markus)
* NUTCH-2206 Provide example scoring.similarity.stopword.file (sujen)
Modified: nutch/trunk/src/bin/nutch
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/bin/nutch?rev=1728339&r1=1728338&r2=1728339&view=diff
==============================================================================
--- nutch/trunk/src/bin/nutch (original)
+++ nutch/trunk/src/bin/nutch Wed Feb 3 16:03:22 2016
@@ -79,6 +79,8 @@ if [ $# = 0 ]; then
echo " clean remove HTTP 301 and 404 documents and duplicates
from indexing backends configured via plugins"
echo " parsechecker check the parser for a given url"
echo " indexchecker check the indexing filters for a given url"
+ echo " filterchecker check url filters for a given url"
+ echo " normalizerchecker check url normalizers for a given url"
echo " domainstats calculate domain statistics from crawldb"
echo " protocolstats calculate protocol status code stats from crawldb"
echo " crawlcomplete calculate crawl completion stats from crawldb"
@@ -262,6 +264,10 @@ elif [ "$COMMAND" = "parsechecker" ] ; t
CLASS=org.apache.nutch.parse.ParserChecker
elif [ "$COMMAND" = "indexchecker" ] ; then
CLASS=org.apache.nutch.indexer.IndexingFiltersChecker
+elif [ "$COMMAND" = "filterchecker" ] ; then
+ CLASS=org.apache.nutch.net.URLFilterChecker
+elif [ "$COMMAND" = "normalizerchecker" ] ; then
+ CLASS=org.apache.nutch.net.URLNormalizerChecker
elif [ "$COMMAND" = "domainstats" ] ; then
CLASS=org.apache.nutch.util.domain.DomainStatistics
elif [ "$COMMAND" = "protocolstats" ] ; then