Author: mattmann
Date: Sat Sep 12 17:15:45 2015
New Revision: 1702649

URL: http://svn.apache.org/r1702649
Log:
Fix for NUTCH-2096: Explicitly indicate broswer binary to use when selecting 
selenium remote option in config contributed by Kim Whitehall 
<[email protected]> this closes #56.

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/conf/nutch-default.xml
    
nutch/trunk/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1702649&r1=1702648&r2=1702649&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Sep 12 17:15:45 2015
@@ -2,6 +2,9 @@ Nutch Change Log
   
 Nutch Current Development 1.11-SNAPSHOT
 
+* NUTCH-2096 Explicitly indicate broswer binary to use when selecting 
+  selenium remote option in config (Kim Whitehall via mattmann)
+
 * NUTCH-2090 Refactor Seed Resource in REST API (Sujen Shah
   via mattmann)
 

Modified: nutch/trunk/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1702649&r1=1702648&r2=1702649&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Sat Sep 12 17:15:45 2015
@@ -1787,8 +1787,8 @@ CAUTION: Set the parser.timeout to -1 or
     WebDriver() to use. Currently the following options
     exist - 'firefox', 'chrome', 'safari', 'opera' and 'remote'.
     If 'remote' is used it is essential to also set correct properties for
-    'selenium.hub.port', 'selenium.hub.path', 'selenium.hub.host' and
-    'selenium.hub.protocol'.
+    'selenium.hub.port', 'selenium.hub.path', 'selenium.hub.host',
+    'selenium.hub.protocol', 'selenium.grid.driver' and 'selenium.grid.binary'.
   </description>
 </property>
 
@@ -1840,6 +1840,22 @@ CAUTION: Set the parser.timeout to -1 or
   <description>Selenium Hub Location connection protocol</description>
 </property>
 
+<property>
+  <name>selenium.grid.driver</name>
+  <value>firefox</value>
+  <description>A String value representing the flavour of Selenium 
+    WebDriver() used on the selenium grid. Currently the following options
+    exist - 'firefox' </description>
+</property>
+
+<property>
+  <name>selenium.grid.binary</name>
+  <value></value>
+  <description>A String value representing the path to the browser binary 
+    location for each node
+ </description>
+</property>
+
 <!-- lib-selenium configuration -->
 <property>
   <name>libselenium.page.load.delay</name>

Modified: 
nutch/trunk/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java?rev=1702649&r1=1702648&r2=1702649&view=diff
==============================================================================
--- 
nutch/trunk/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java
 (original)
+++ 
nutch/trunk/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java
 Sat Sep 12 17:15:45 2015
@@ -64,6 +64,7 @@ public class HttpWebClient {
 
   public static WebDriver getDriverForPage(String url, Configuration conf) {
       WebDriver driver = null;
+      DesiredCapabilities capabilities = null;
       long pageLoadWait = conf.getLong("libselenium.page.load.delay", 3);
 
       try {
@@ -86,8 +87,22 @@ public class HttpWebClient {
             int seleniumHubPort = 
Integer.parseInt(conf.get("selenium.hub.port", "4444"));
             String seleniumHubPath = conf.get("selenium.hub.path", "/wd/hub");
             String seleniumHubProtocol = conf.get("selenium.hub.protocol", 
"http");
-            driver = new RemoteWebDriver(new URL(seleniumHubProtocol, 
seleniumHubHost, seleniumHubPort, seleniumHubPath), 
DesiredCapabilities.firefox());
-            break;
+            String seleniumGridDriver = 
conf.get("selenium.grid.driver","firefox");
+            String seleniumGridBinary = conf.get("selenium.grid.binary");
+
+            switch (seleniumGridDriver){
+              case "firefox":
+                capabilities = DesiredCapabilities.firefox();
+                capabilities.setBrowserName("firefox");
+                capabilities.setJavascriptEnabled(true);
+                
capabilities.setCapability("firefox_binary",seleniumGridBinary);
+                driver = new RemoteWebDriver(new URL(seleniumHubProtocol, 
seleniumHubHost, seleniumHubPort, seleniumHubPath), capabilities);
+                break;
+              default:
+                LOG.error("The Selenium Grid WebDriver choice {} is not 
available... defaulting to FirefoxDriver().", driverType);
+                driver = new RemoteWebDriver(new URL(seleniumHubProtocol, 
seleniumHubHost, seleniumHubPort, seleniumHubPath), 
DesiredCapabilities.firefox());
+                break;
+            }
           default:
             LOG.error("The Selenium WebDriver choice {} is not available... 
defaulting to FirefoxDriver().", driverType);
             driver = new FirefoxDriver();


Reply via email to