Format the HttpFormAuthentication.java with eclipse format and add javadoc. Add the httpclient-auth.xml.template for cookie policy config example.
Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/753cad0b Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/753cad0b Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/753cad0b Branch: refs/heads/master Commit: 753cad0bd66ab525eb618d7f0e947eec034e207d Parents: 993e997 Author: Steve Yao <[email protected]> Authored: Wed Jul 13 12:21:26 2016 +0800 Committer: Steve Yao <[email protected]> Committed: Wed Jul 13 12:21:26 2016 +0800 ---------------------------------------------------------------------- conf/httpclient-auth.xml.template | 6 ++ .../httpclient/HttpFormAuthentication.java | 62 +++++++++++--------- 2 files changed, 40 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nutch/blob/753cad0b/conf/httpclient-auth.xml.template ---------------------------------------------------------------------- diff --git a/conf/httpclient-auth.xml.template b/conf/httpclient-auth.xml.template index ce5ed7e..9d23093 100644 --- a/conf/httpclient-auth.xml.template +++ b/conf/httpclient-auth.xml.template @@ -82,6 +82,9 @@ <removedFormFields> <field name="ctl00$MainContent$LoginUser$RememberMe"/> </removedFormFields> + <loginCookie> + <policy>BROWSER_COMPATIBILITY</policy> + </loginCookie> </credentials> it is critical that the following fields are substituted: @@ -98,6 +101,9 @@ the field and password respectively * <field name="ctl00$MainContent$LoginUser$RememberMe"/> - form element attributes for which we wish to skip fields + * <policy> value from <loginCookie> is a constant value symbol from + org.apache.commons.httpclient.cookie.CookiePolicy, like BROWSER_COMPATIBILITY, + DEFAULT, RFC_2109, etc. More information on HTTP POST can be located at https://wiki.apache.org/nutch/HttpPostAuthentication http://git-wip-us.apache.org/repos/asf/nutch/blob/753cad0b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java index a6d4aa4..2f36538 100644 --- a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java +++ b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpFormAuthentication.java @@ -51,8 +51,7 @@ public class HttpFormAuthentication { static { defaultLoginHeaders.put("User-Agent", "Mozilla/5.0"); - defaultLoginHeaders - .put("Accept", + defaultLoginHeaders.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); defaultLoginHeaders.put("Accept-Language", "en-US,en;q=0.5"); defaultLoginHeaders.put("Connection", "keep-alive"); @@ -79,15 +78,12 @@ public class HttpFormAuthentication { Set<String> removedFormFields) { this.authConfigurer.setLoginUrl(loginUrl); this.authConfigurer.setLoginFormId(loginForm); - this.authConfigurer - .setLoginPostData(loginPostData == null ? new HashMap<String, String>() - : loginPostData); - this.authConfigurer - .setAdditionalPostHeaders(additionalPostHeaders == null ? new HashMap<String, String>() - : additionalPostHeaders); - this.authConfigurer - .setRemovedFormFields(removedFormFields == null ? new HashSet<String>() - : removedFormFields); + this.authConfigurer.setLoginPostData( + loginPostData == null ? new HashMap<String, String>() : loginPostData); + this.authConfigurer.setAdditionalPostHeaders(additionalPostHeaders == null + ? new HashMap<String, String>() : additionalPostHeaders); + this.authConfigurer.setRemovedFormFields( + removedFormFields == null ? new HashSet<String>() : removedFormFields); this.client = new HttpClient(); } @@ -118,11 +114,11 @@ public class HttpFormAuthentication { // Entity enclosing requests cannot be redirected without user // intervention setLoginHeader(post); - + // NUTCH-2280 LOGGER.debug("FormAuth: set cookie policy"); this.setCookieParams(authConfigurer, post.getParams()); - + post.addParameters(params.toArray(new NameValuePair[0])); int rspCode = client.executeMethod(post); if (LOGGER.isDebugEnabled()) { @@ -143,25 +139,34 @@ public class HttpFormAuthentication { } } } - + /** + * NUTCH-2280 Set the cookie policy value from httpclient-auth.xml for the + * Post httpClient action. + * + * @param fromConfigurer + * - the httpclient-auth.xml values + * + * @param params + * - the HttpMethodParams from the current httpclient instance + * * @throws NoSuchFieldException * @throws SecurityException * @throws IllegalArgumentException * @throws IllegalAccessException */ private void setCookieParams(HttpFormAuthConfigurer formConfigurer, - HttpMethodParams params) - throws NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException { - // NUTCH-2280 - set the HttpClient cookie policy - if (formConfigurer.getCookiePolicy() != null) { - String policy = formConfigurer.getCookiePolicy(); - Object p = FieldUtils.readDeclaredStaticField(CookiePolicy.class, policy); - if(null != p) { - LOGGER.debug("reflection of cookie value: " + p.toString()); - params.setParameter(HttpMethodParams.COOKIE_POLICY, p); - } - } + HttpMethodParams params) throws NoSuchFieldException, SecurityException, + IllegalArgumentException, IllegalAccessException { + // NUTCH-2280 - set the HttpClient cookie policy + if (formConfigurer.getCookiePolicy() != null) { + String policy = formConfigurer.getCookiePolicy(); + Object p = FieldUtils.readDeclaredStaticField(CookiePolicy.class, policy); + if (null != p) { + LOGGER.debug("reflection of cookie value: " + p.toString()); + params.setParameter(HttpMethodParams.COOKIE_POLICY, p); + } + } } private void setLoginHeader(PostMethod post) { @@ -204,12 +209,13 @@ public class HttpFormAuthentication { if (loginform == null) { LOGGER.debug("No form element found with 'id' = {}, trying 'name'.", authConfigurer.getLoginFormId()); - loginform = doc.select("form[name="+ authConfigurer.getLoginFormId() + "]").first(); + loginform = doc + .select("form[name=" + authConfigurer.getLoginFormId() + "]").first(); if (loginform == null) { LOGGER.debug("No form element found with 'name' = {}", authConfigurer.getLoginFormId()); - throw new IllegalArgumentException("No form exists: " - + authConfigurer.getLoginFormId()); + throw new IllegalArgumentException( + "No form exists: " + authConfigurer.getLoginFormId()); } } Elements inputElements = loginform.getElementsByTag("input");
