fix for NUTCH-2191 contributed by karanjeets

Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/3cda2229
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/3cda2229
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/3cda2229

Branch: refs/heads/master
Commit: 3cda222971c970270dcc7525b97dfffe4b818ced
Parents: 366104d
Author: Karanjeet Singh <[email protected]>
Authored: Mon Mar 28 22:58:40 2016 -0700
Committer: Karanjeet Singh <[email protected]>
Committed: Mon Mar 28 22:58:40 2016 -0700

----------------------------------------------------------------------
 default.properties                              |   1 +
 .../protocol/htmlunit/HtmlUnitWebDriver.java    | 125 +++++++++----------
 .../htmlunit/HtmlUnitWebWindowListener.java     |  53 ++++----
 .../nutch/protocol/htmlunit/HttpResponse.java   |   5 +-
 4 files changed, 93 insertions(+), 91 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nutch/blob/3cda2229/default.properties
----------------------------------------------------------------------
diff --git a/default.properties b/default.properties
index d34f778..aec5d51 100644
--- a/default.properties
+++ b/default.properties
@@ -90,6 +90,7 @@ plugins.protocol=\
    org.apache.nutch.protocol.http*:\
    org.apache.nutch.protocol.httpclient*:\
    org.apache.nutch.protocol.selenium*
+   org.apache.nutch.protocol.htmlunit*
 
 #
 # URL Filter Plugins

http://git-wip-us.apache.org/repos/asf/nutch/blob/3cda2229/src/plugin/lib-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HtmlUnitWebDriver.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/lib-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HtmlUnitWebDriver.java
 
b/src/plugin/lib-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HtmlUnitWebDriver.java
index fc231c3..5e2c0ac 100644
--- 
a/src/plugin/lib-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HtmlUnitWebDriver.java
+++ 
b/src/plugin/lib-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HtmlUnitWebDriver.java
@@ -51,79 +51,79 @@ public class HtmlUnitWebDriver extends HtmlUnitDriver {
   private static int maxRedirects;
   
   public HtmlUnitWebDriver() {
-       super(enableJavascript);
+    super(enableJavascript);
   }
   
   @Override
   protected WebClient modifyWebClient(WebClient client) {
-         client.getOptions().setJavaScriptEnabled(enableJavascript);
-         client.getOptions().setCssEnabled(enableCss);
-         client.getOptions().setRedirectEnabled(enableRedirect);
-         if(enableJavascript)
-                 client.setJavaScriptTimeout(javascriptTimeout);
-         client.getOptions().setThrowExceptionOnScriptError(false);
-         if(enableRedirect)
-                 client.addWebWindowListener(new 
HtmlUnitWebWindowListener(maxRedirects));
-         return client;
+    client.getOptions().setJavaScriptEnabled(enableJavascript);
+    client.getOptions().setCssEnabled(enableCss);
+    client.getOptions().setRedirectEnabled(enableRedirect);
+    if(enableJavascript)
+      client.setJavaScriptTimeout(javascriptTimeout);
+      client.getOptions().setThrowExceptionOnScriptError(false);
+      if(enableRedirect)
+        client.addWebWindowListener(new 
HtmlUnitWebWindowListener(maxRedirects));
+       return client;
   }
   
   public static WebDriver getDriverForPage(String url, Configuration conf) {
-         long pageLoadTimout = conf.getLong("htmlunit.page.load.delay", 3);
-         enableJavascript = conf.getBoolean("htmlunit.enable.javascript", 
true);
-         enableCss = conf.getBoolean("htmlunit.enable.css", false);
-         javascriptTimeout = conf.getLong("htmlunit.javascript.timeout", 3500);
-         int redirects = Integer.parseInt(conf.get("http.redirect.max", "0"));
-         enableRedirect = redirects <= 0 ? false : true;
-         maxRedirects = redirects;
+    long pageLoadTimout = conf.getLong("htmlunit.page.load.delay", 3);
+    enableJavascript = conf.getBoolean("htmlunit.enable.javascript", true);
+    enableCss = conf.getBoolean("htmlunit.enable.css", false);
+    javascriptTimeout = conf.getLong("htmlunit.javascript.timeout", 3500);
+    int redirects = Integer.parseInt(conf.get("http.redirect.max", "0"));
+    enableRedirect = redirects <= 0 ? false : true;
+    maxRedirects = redirects;
          
-         WebDriver driver = null;
+    WebDriver driver = null;
          
-         try {
-                 driver = new HtmlUnitWebDriver();
-                 driver.manage().timeouts().pageLoadTimeout(pageLoadTimout, 
TimeUnit.SECONDS);
-                 driver.get(url);
-         } catch(Exception e) {
-                 if(e instanceof TimeoutException) {
-                               LOG.debug("HtmlUnit WebDriver: Timeout 
Exception: Capturing whatever loaded so far...");
-                               return driver;
-                       }
-                       cleanUpDriver(driver);
-                   throw new RuntimeException(e);
-         }
+    try {
+      driver = new HtmlUnitWebDriver();
+      driver.manage().timeouts().pageLoadTimeout(pageLoadTimout, 
TimeUnit.SECONDS);
+      driver.get(url);
+     } catch(Exception e) {
+       if(e instanceof TimeoutException) {
+        LOG.debug("HtmlUnit WebDriver: Timeout Exception: Capturing whatever 
loaded so far...");
+        return driver;
+     }
+     cleanUpDriver(driver);
+     throw new RuntimeException(e);
+    }
 
-      return driver;
+    return driver;
   }
 
   public static String getHTMLContent(WebDriver driver, Configuration conf) {
-      try {
-                 if (conf.getBoolean("htmlunit.take.screenshot", false))
-                 takeScreenshot(driver, conf);
+    try {
+      if (conf.getBoolean("htmlunit.take.screenshot", false))
+      takeScreenshot(driver, conf);
                  
-                 String innerHtml = "";
-             if(enableJavascript) {
-                 WebElement body = driver.findElement(By.tagName("body"));
-                 innerHtml = 
(String)((JavascriptExecutor)driver).executeScript("return 
arguments[0].innerHTML;", body); 
-             }
-             else
-                 innerHtml = driver.getPageSource().replaceAll("&amp;", "&");
-             return innerHtml;
-      } catch(Exception e) {
-         TemporaryFilesystem.getDefaultTmpFS().deleteTemporaryFiles();
-         cleanUpDriver(driver);
-         throw new RuntimeException(e);
-      } 
+      String innerHtml = "";
+      if(enableJavascript) {
+       WebElement body = driver.findElement(By.tagName("body"));
+       innerHtml = (String)((JavascriptExecutor)driver).executeScript("return 
arguments[0].innerHTML;", body); 
+      }
+      else
+       innerHtml = driver.getPageSource().replaceAll("&amp;", "&");
+      return innerHtml;
+    } catch(Exception e) {
+       TemporaryFilesystem.getDefaultTmpFS().deleteTemporaryFiles();
+       cleanUpDriver(driver);
+       throw new RuntimeException(e);
+    } 
   }
 
   public static void cleanUpDriver(WebDriver driver) {
-      if (driver != null) {
-          try {
-                 driver.close();
-              driver.quit();
-              TemporaryFilesystem.getDefaultTmpFS().deleteTemporaryFiles();
-          } catch (Exception e) {
-              throw new RuntimeException(e);
-          }
+    if (driver != null) {
+      try {
+        driver.close();
+        driver.quit();
+        TemporaryFilesystem.getDefaultTmpFS().deleteTemporaryFiles();
+      } catch (Exception e) {
+        throw new RuntimeException(e);
       }
+    }
   }
 
   /**
@@ -142,23 +142,22 @@ public class HtmlUnitWebDriver extends HtmlUnitDriver {
 
     try {
       if (conf.getBoolean("htmlunit.take.screenshot", false))
-         takeScreenshot(driver, conf);
+       takeScreenshot(driver, conf);
 
-      
       String innerHtml = "";
       if(enableJavascript) {
-         WebElement body = driver.findElement(By.tagName("body"));
-         innerHtml = 
(String)((JavascriptExecutor)driver).executeScript("return 
arguments[0].innerHTML;", body); 
+       WebElement body = driver.findElement(By.tagName("body"));
+       innerHtml = (String)((JavascriptExecutor)driver).executeScript("return 
arguments[0].innerHTML;", body); 
       }
       else
-         innerHtml = driver.getPageSource().replaceAll("&amp;", "&");
+       innerHtml = driver.getPageSource().replaceAll("&amp;", "&");
       return innerHtml;
 
     } catch (Exception e) {
-             TemporaryFilesystem.getDefaultTmpFS().deleteTemporaryFiles();
-             throw new RuntimeException(e);
+       TemporaryFilesystem.getDefaultTmpFS().deleteTemporaryFiles();
+        throw new RuntimeException(e);
     } finally {
-       cleanUpDriver(driver);
+        cleanUpDriver(driver);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/nutch/blob/3cda2229/src/plugin/lib-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HtmlUnitWebWindowListener.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/lib-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HtmlUnitWebWindowListener.java
 
b/src/plugin/lib-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HtmlUnitWebWindowListener.java
index 760f4aa..baa8774 100644
--- 
a/src/plugin/lib-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HtmlUnitWebWindowListener.java
+++ 
b/src/plugin/lib-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HtmlUnitWebWindowListener.java
@@ -5,32 +5,33 @@ import com.gargoylesoftware.htmlunit.WebWindowListener;
 
 public class HtmlUnitWebWindowListener implements WebWindowListener {
 
-       private Integer redirectCount = 0;
-       private Integer maxRedirects = 0;
-       
-       public HtmlUnitWebWindowListener() {
-               
-       }
-       
-       public HtmlUnitWebWindowListener(int maxRedirects) {
-               this.maxRedirects = maxRedirects;
-       }
-       
-       @Override
-       public void webWindowOpened(WebWindowEvent event) {
-               
-       }
+  private Integer redirectCount = 0;
+  private Integer maxRedirects = 0;
+  
+  public HtmlUnitWebWindowListener() {
+    
+  }
+  
+  public HtmlUnitWebWindowListener(int maxRedirects) {
+    this.maxRedirects = maxRedirects;
+  }
+  
+  @Override
+  public void webWindowOpened(WebWindowEvent event) {
+    
+  }
 
-       @Override
-       public void webWindowContentChanged(WebWindowEvent event) {
-               redirectCount++;
-               if(redirectCount > maxRedirects)
-                       throw new RuntimeException("Redirect Count: " + 
redirectCount + " exceeded the Maximum Redirects allowed: " + maxRedirects);
-       }
+  @Override
+  public void webWindowContentChanged(WebWindowEvent event) {
+    redirectCount++;
+    if(redirectCount > maxRedirects)
+      throw new RuntimeException("Redirect Count: " + redirectCount + " 
exceeded the Maximum Redirects allowed: " + maxRedirects);
+  }
 
-       @Override
-       public void webWindowClosed(WebWindowEvent event) {
-               
-       }
-       
+  @Override
+  public void webWindowClosed(WebWindowEvent event) {
+    
+  }
+  
 }
+

http://git-wip-us.apache.org/repos/asf/nutch/blob/3cda2229/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java
----------------------------------------------------------------------
diff --git 
a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java
 
b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java
index 72b1fa1..a2f3b1e 100644
--- 
a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java
+++ 
b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java
@@ -344,7 +344,8 @@ public class HttpResponse implements Response {
 
   @Override
   public int getCode() {
-       // TODO Auto-generated method stub
-       return code;
+  // TODO Auto-generated method stub
+  return code;
   }
 }
+

Reply via email to