Author: mattmann
Date: Sat Sep 12 17:15:45 2015
New Revision: 1702649
URL: http://svn.apache.org/r1702649
Log:
Fix for NUTCH-2096: Explicitly indicate broswer binary to use when selecting
selenium remote option in config contributed by Kim Whitehall
<[email protected]> this closes #56.
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/conf/nutch-default.xml
nutch/trunk/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1702649&r1=1702648&r2=1702649&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Sep 12 17:15:45 2015
@@ -2,6 +2,9 @@ Nutch Change Log
Nutch Current Development 1.11-SNAPSHOT
+* NUTCH-2096 Explicitly indicate broswer binary to use when selecting
+ selenium remote option in config (Kim Whitehall via mattmann)
+
* NUTCH-2090 Refactor Seed Resource in REST API (Sujen Shah
via mattmann)
Modified: nutch/trunk/conf/nutch-default.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1702649&r1=1702648&r2=1702649&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Sat Sep 12 17:15:45 2015
@@ -1787,8 +1787,8 @@ CAUTION: Set the parser.timeout to -1 or
WebDriver() to use. Currently the following options
exist - 'firefox', 'chrome', 'safari', 'opera' and 'remote'.
If 'remote' is used it is essential to also set correct properties for
- 'selenium.hub.port', 'selenium.hub.path', 'selenium.hub.host' and
- 'selenium.hub.protocol'.
+ 'selenium.hub.port', 'selenium.hub.path', 'selenium.hub.host',
+ 'selenium.hub.protocol', 'selenium.grid.driver' and 'selenium.grid.binary'.
</description>
</property>
@@ -1840,6 +1840,22 @@ CAUTION: Set the parser.timeout to -1 or
<description>Selenium Hub Location connection protocol</description>
</property>
+<property>
+ <name>selenium.grid.driver</name>
+ <value>firefox</value>
+ <description>A String value representing the flavour of Selenium
+ WebDriver() used on the selenium grid. Currently the following options
+ exist - 'firefox' </description>
+</property>
+
+<property>
+ <name>selenium.grid.binary</name>
+ <value></value>
+ <description>A String value representing the path to the browser binary
+ location for each node
+ </description>
+</property>
+
<!-- lib-selenium configuration -->
<property>
<name>libselenium.page.load.delay</name>
Modified:
nutch/trunk/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java?rev=1702649&r1=1702648&r2=1702649&view=diff
==============================================================================
---
nutch/trunk/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java
(original)
+++
nutch/trunk/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java
Sat Sep 12 17:15:45 2015
@@ -64,6 +64,7 @@ public class HttpWebClient {
public static WebDriver getDriverForPage(String url, Configuration conf) {
WebDriver driver = null;
+ DesiredCapabilities capabilities = null;
long pageLoadWait = conf.getLong("libselenium.page.load.delay", 3);
try {
@@ -86,8 +87,22 @@ public class HttpWebClient {
int seleniumHubPort =
Integer.parseInt(conf.get("selenium.hub.port", "4444"));
String seleniumHubPath = conf.get("selenium.hub.path", "/wd/hub");
String seleniumHubProtocol = conf.get("selenium.hub.protocol",
"http");
- driver = new RemoteWebDriver(new URL(seleniumHubProtocol,
seleniumHubHost, seleniumHubPort, seleniumHubPath),
DesiredCapabilities.firefox());
- break;
+ String seleniumGridDriver =
conf.get("selenium.grid.driver","firefox");
+ String seleniumGridBinary = conf.get("selenium.grid.binary");
+
+ switch (seleniumGridDriver){
+ case "firefox":
+ capabilities = DesiredCapabilities.firefox();
+ capabilities.setBrowserName("firefox");
+ capabilities.setJavascriptEnabled(true);
+
capabilities.setCapability("firefox_binary",seleniumGridBinary);
+ driver = new RemoteWebDriver(new URL(seleniumHubProtocol,
seleniumHubHost, seleniumHubPort, seleniumHubPath), capabilities);
+ break;
+ default:
+ LOG.error("The Selenium Grid WebDriver choice {} is not
available... defaulting to FirefoxDriver().", driverType);
+ driver = new RemoteWebDriver(new URL(seleniumHubProtocol,
seleniumHubHost, seleniumHubPort, seleniumHubPath),
DesiredCapabilities.firefox());
+ break;
+ }
default:
LOG.error("The Selenium WebDriver choice {} is not available...
defaulting to FirefoxDriver().", driverType);
driver = new FirefoxDriver();