Author: mattmann
Date: Sat Aug 29 23:40:26 2015
New Revision: 1700083
URL: http://svn.apache.org/r1700083
Log:
NUTCH-2088 - Add URL Processing Check to Interactive Selenium Handlers
contributed by Michael Joyce <[email protected]> this closes #53
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefaultHandler.java
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/InteractiveSeleniumHandler.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1700083&r1=1700082&r2=1700083&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Aug 29 23:40:26 2015
@@ -2,6 +2,9 @@ Nutch Change Log
Nutch Current Development 1.11-SNAPSHOT
+* NUTCH-2088 Add URL Processing Check to Interactive Selenium
+ Handlers (Michael Joyce via mattmann)
+
* NUTCH-2077 Upgrade to Tika 1.10 (Michael Joyce via lewismc)
* NUTCH-1517 CloudSearch indexer (jnioche)
Modified:
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java?rev=1700083&r1=1700082&r2=1700083&view=diff
==============================================================================
---
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java
(original)
+++
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java
Sat Aug 29 23:40:26 2015
@@ -271,6 +271,10 @@ public class HttpResponse implements Res
String processedPage = "";
for (InteractiveSeleniumHandler handler : this.handlers) {
+ if (! handler.shouldProcessURL(url.toString())) {
+ continue;
+ }
+
WebDriver driver = HttpWebClient.getDriverForPage(url.toString(),
conf);
handler.processDriver(driver);
Modified:
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefaultHandler.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefaultHandler.java?rev=1700083&r1=1700082&r2=1700083&view=diff
==============================================================================
---
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefaultHandler.java
(original)
+++
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/DefaultHandler.java
Sat Aug 29 23:40:26 2015
@@ -21,4 +21,8 @@ import org.openqa.selenium.WebDriver;
public class DefaultHandler implements InteractiveSeleniumHandler {
public void processDriver(WebDriver driver) {}
+
+ public boolean shouldProcessURL(String URL) {
+ return true;
+ }
}
Modified:
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/InteractiveSeleniumHandler.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/InteractiveSeleniumHandler.java?rev=1700083&r1=1700082&r2=1700083&view=diff
==============================================================================
---
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/InteractiveSeleniumHandler.java
(original)
+++
nutch/trunk/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/handlers/InteractiveSeleniumHandler.java
Sat Aug 29 23:40:26 2015
@@ -21,4 +21,5 @@ import org.openqa.selenium.WebDriver;
public interface InteractiveSeleniumHandler {
public void processDriver(WebDriver driver);
+ public boolean shouldProcessURL(String URL);
}