Author: mattmann
Date: Fri Nov 25 02:01:07 2011
New Revision: 1206038
URL: http://svn.apache.org/viewvc?rev=1206038&view=rev
Log:
fix for NUTCH-1211 URLFilterChecker command line help doesn't inform user of
STDIN requirements
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java
nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1206038&r1=1206037&r2=1206038&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Nov 25 02:01:07 2011
@@ -1,5 +1,8 @@
Nutch Change Log
+* NUTCH-1211 URLFilterChecker command line help doesn't inform user of
+ STDIN requirements (mattmann)
+
* NUTCH-1209 Output from ParserChecker Url missing a newline (mattmann)
* NUTCH-1207 ParserChecker to output signature (markus)
Modified: nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java?rev=1206038&r1=1206037&r2=1206038&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java Fri Nov 25
02:01:07 2011
@@ -105,7 +105,8 @@ public class URLFilterChecker {
public static void main(String[] args) throws Exception {
- String usage = "Usage: URLFilterChecker (-filterName filterName |
-allCombined)";
+ String usage = "Usage: URLFilterChecker (-filterName filterName |
-allCombined) \n"
+ + "Tool takes a list of URLs, one per line, passed via STDIN.\n";
if (args.length == 0) {
System.err.println(usage);
Modified:
nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java?rev=1206038&r1=1206037&r2=1206038&view=diff
==============================================================================
---
nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
(original)
+++
nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
Fri Nov 25 02:01:07 2011
@@ -99,7 +99,15 @@ public class RegexURLFilter extends Rege
}
protected boolean match(String url) {
- return pattern.matcher(url).find();
+ boolean matched = pattern.matcher(url).find();
+ if(url.indexOf("at_download") != -1){
+ System.out.println("@#((#(#@ EVALUATING at_download LINK!:
["+url+"]: matched? ["+matched+"]");
+ }
+ else
+ {System.out.println("URL: ["+url+"] doesn't have at_download in
it!");
+ }
+ return matched;
+ //return pattern.matcher(url).find();
}
}