Merge and resolve conflicts with Nutch-2252
Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/7f211ecd Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/7f211ecd Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/7f211ecd Branch: refs/heads/master Commit: 7f211ecdc38b15c957be8dd1bb528f40d9218abc Parents: 37458a9 d953b96 Author: Chris Mattmann <[email protected]> Authored: Sat May 7 10:50:56 2016 -1000 Committer: Chris Mattmann <[email protected]> Committed: Sat May 7 10:50:56 2016 -1000 ---------------------------------------------------------------------- conf/nutch-default.xml | 4 +- .../nutch/protocol/selenium/HttpWebClient.java | 41 +++++++++++++------- src/plugin/protocol-selenium/README.md | 25 +++++++++--- 3 files changed, 48 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nutch/blob/7f211ecd/conf/nutch-default.xml ---------------------------------------------------------------------- diff --cc conf/nutch-default.xml index de0c2dc,a9cce43..641809f --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@@ -1915,51 -1919,6 +1915,51 @@@ visit https://wiki.apache.org/nutch/Sim </description> </property> +<!-- lib-htmlunit plugin properties; applies to protocol-htmlunit --> + +<property> + <name>htmlunit.enable.javascript</name> + <value>true</value> + <description> + A Boolean value representing if javascript should + be enabled or disabled when using htmlunit. The default value is enabled. + </description> +</property> + +<property> + <name>htmlunit.javascript.timeout</name> + <value>3500</value> + <description> + The timeout in milliseconds when loading javascript with lib-htmlunit. This + setting is used by protocol-htmlunit since they depending on + lib-htmlunit for fetching. + </description> +</property> + +<property> + <name>htmlunit.enable.css</name> + <value>false</value> + <description> + A Boolean value representing if CSS should + be enabled or disabled when using htmlunit. The default value is disabled. + </description> +</property> + +<!-- protocol-selenium plugin properties --> + +<property> + <name>selenium.driver</name> + <value>firefox</value> + <description> + A String value representing the flavour of Selenium + WebDriver() to use. Currently the following options - exist - 'firefox', 'chrome', 'safari', 'opera' and 'remote'. ++ exist - 'firefox', 'chrome', 'safari', 'opera', 'phantomjs' and 'remote'. + If 'remote' is used it is essential to also set correct properties for + 'selenium.hub.port', 'selenium.hub.path', 'selenium.hub.host', + 'selenium.hub.protocol', 'selenium.grid.driver' and 'selenium.grid.binary'. + </description> +</property> + <property> <name>selenium.hub.port</name> <value>4444</value> http://git-wip-us.apache.org/repos/asf/nutch/blob/7f211ecd/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java ---------------------------------------------------------------------- diff --cc src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java index 3a20cfe,8ec3fa9..86692ae --- a/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java +++ b/src/plugin/lib-selenium/src/java/org/apache/nutch/protocol/selenium/HttpWebClient.java @@@ -173,9 -186,9 +186,9 @@@ public class HttpWebClient */ public static String getHtmlPage(String url, Configuration conf) { WebDriver driver = getDriverForPage(url, conf); - + try { - if (conf.getBoolean("selenium.take.screenshot", false)) { + if (conf.getBoolean("take.screenshot", false)) { takeScreenshot(driver, conf); }
