Author: mattmann
Date: Fri Nov 25 02:01:07 2011
New Revision: 1206038

URL: http://svn.apache.org/viewvc?rev=1206038&view=rev
Log:
fix for NUTCH-1211 URLFilterChecker command line help doesn't inform user of 
STDIN requirements

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java
    
nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1206038&r1=1206037&r2=1206038&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Nov 25 02:01:07 2011
@@ -1,5 +1,8 @@
 Nutch Change Log
 
+* NUTCH-1211 URLFilterChecker command line help doesn't inform user of 
+  STDIN requirements (mattmann)
+
 * NUTCH-1209 Output from ParserChecker Url missing a newline (mattmann)
 
 * NUTCH-1207 ParserChecker to output signature (markus)

Modified: nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java?rev=1206038&r1=1206037&r2=1206038&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java Fri Nov 25 
02:01:07 2011
@@ -105,7 +105,8 @@ public class URLFilterChecker {
 
   public static void main(String[] args) throws Exception {
 
-    String usage = "Usage: URLFilterChecker (-filterName filterName | 
-allCombined)";
+    String usage = "Usage: URLFilterChecker (-filterName filterName | 
-allCombined) \n" 
+       + "Tool takes a list of URLs, one per line, passed via STDIN.\n";
 
     if (args.length == 0) {
       System.err.println(usage);

Modified: 
nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java?rev=1206038&r1=1206037&r2=1206038&view=diff
==============================================================================
--- 
nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
 (original)
+++ 
nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/urlfilter/regex/RegexURLFilter.java
 Fri Nov 25 02:01:07 2011
@@ -99,7 +99,15 @@ public class RegexURLFilter extends Rege
     }
 
     protected boolean match(String url) {
-      return pattern.matcher(url).find();
+       boolean matched = pattern.matcher(url).find();
+        if(url.indexOf("at_download") != -1){
+           System.out.println("@#((#(#@ EVALUATING at_download LINK!: 
["+url+"]: matched? ["+matched+"]");
+        }
+        else 
+           {System.out.println("URL: ["+url+"] doesn't have at_download in 
it!");
+           }
+       return matched;
+       //return pattern.matcher(url).find();
     }
   }
   


Reply via email to