Author: jnioche
Date: Mon Nov 11 10:15:03 2013
New Revision: 1540654

URL: http://svn.apache.org/r1540654
Log:
NUTCH-1666 Optimisation for BasicURLNormalizer (jnioche)

Modified:
    nutch/trunk/CHANGES.txt
    
nutch/trunk/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1540654&r1=1540653&r2=1540654&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Nov 11 10:15:03 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Development Trunk
 
+* NUTCH-1666 Optimisation for BasicURLNormalizer (jnioche)
+
 * NUTCH-1656 ParseMeta not passed to CrawlDatum for not_modified (markus)
 
 * NUTCH-1606 Check that Factory classes use the cache in a thread safe way 
(jnioche)

Modified: 
nutch/trunk/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java?rev=1540654&r1=1540653&r2=1540654&view=diff
==============================================================================
--- 
nutch/trunk/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
 (original)
+++ 
nutch/trunk/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
 Mon Nov 11 10:15:03 2013
@@ -45,6 +45,8 @@ public class BasicURLNormalizer extends 
     private final Rule leadingRelativePathRule;
     private final Rule currentPathRule;
     private final Rule adjacentSlashRule;
+    
+    private final static java.util.regex.Pattern hasNormalizablePattern = 
java.util.regex.Pattern.compile("/\\.?\\.?/");
 
     private Configuration conf;
 
@@ -145,6 +147,10 @@ public class BasicURLNormalizer extends 
     }
 
     private String substituteUnnecessaryRelativePaths(String file) {
+       
+       if (!hasNormalizablePattern.matcher(file).find())
+               return file;
+       
         String fileWorkCopy = file;
         int oldLen = file.length();
         int newLen = oldLen - 1;


Reply via email to