Author: snagel Date: Thu Oct 9 19:20:51 2014 New Revision: 1630565 URL: http://svn.apache.org/r1630565 Log: NUTCH-1164 JUnit tests for protocol-http
Added: nutch/trunk/src/plugin/protocol-http/jsp/ nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp (with props) nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp (with props) nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp (with props) nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp (with props) nutch/trunk/src/plugin/protocol-http/src/test/conf/ nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml (with props) nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java (with props) Modified: nutch/trunk/CHANGES.txt nutch/trunk/build.xml nutch/trunk/src/plugin/build.xml nutch/trunk/src/plugin/protocol-http/build.xml Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1630565&r1=1630564&r2=1630565&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Thu Oct 9 19:20:51 2014 @@ -2,6 +2,8 @@ Nutch Change Log Nutch Current Development 1.10-SNAPSHOT +* NUTCH-1164 Write JUnit tests for protocol-http (nimafl via snagel) + * NUTCH-1868 Document and improve CLI for FileDumper tool (lewismc) * NUTCH-1869 Add a flag to -mimeType fiag to FileDumper (lewismc) @@ -10,7 +12,7 @@ Nutch Current Development 1.10-SNAPSHOT * NUTCH-1826, NUTCH-1864 indexchecker fails if solr.server.url not configured (lewismc, snagel) -* NUTCH-1866 ant eclipse target should not delete runtime (nimafl vai lewismc) +* NUTCH-1866 ant eclipse target should not delete runtime (nimafl via lewismc) * NUTCH-1857 readb -dump -format csv should use comma (lewismc) Modified: nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1630565&r1=1630564&r2=1630565&view=diff ============================================================================== --- nutch/trunk/build.xml (original) +++ nutch/trunk/build.xml Thu Oct 9 19:20:51 2014 @@ -992,7 +992,7 @@ <source path="${plugins.dir}/protocol-httpclient/src/java/" /> <source path="${plugins.dir}/protocol-httpclient/src/test/" /> <source path="${plugins.dir}/protocol-http/src/java/" /> - <!-- <source path="${plugins.dir}/protocol-http/src/test/" /> --> + <source path="${plugins.dir}/protocol-http/src/test/" /> <source path="${plugins.dir}/scoring-depth/src/java/" /> <source path="${plugins.dir}/scoring-link/src/java/" /> <source path="${plugins.dir}/scoring-opic/src/java/" /> Modified: nutch/trunk/src/plugin/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/build.xml?rev=1630565&r1=1630564&r2=1630565&view=diff ============================================================================== --- nutch/trunk/src/plugin/build.xml (original) +++ nutch/trunk/src/plugin/build.xml Thu Oct 9 19:20:51 2014 @@ -88,6 +88,7 @@ <ant dir="language-identifier" target="test"/> <ant dir="lib-http" target="test"/> <ant dir="protocol-file" target="test"/> + <ant dir="protocol-http" target="test"/> <ant dir="protocol-httpclient" target="test"/> <!--ant dir="parse-ext" target="test"/--> <ant dir="feed" target="test"/> Modified: nutch/trunk/src/plugin/protocol-http/build.xml URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/build.xml?rev=1630565&r1=1630564&r2=1630565&view=diff ============================================================================== --- nutch/trunk/src/plugin/protocol-http/build.xml (original) +++ nutch/trunk/src/plugin/protocol-http/build.xml Thu Oct 9 19:20:51 2014 @@ -29,12 +29,22 @@ <fileset dir="${nutch.root}/build"> <include name="**/lib-http/*.jar" /> </fileset> + <pathelement location="${build.dir}/test/conf"/> </path> <!-- Deploy Unit test dependencies --> <target name="deps-test"> <ant target="deploy" inheritall="false" dir="../lib-http"/> <ant target="deploy" inheritall="false" dir="../nutch-extensionpoints"/> + <copy toDir="${build.test}"> + <fileset dir="${src.test}" excludes="**/*.java"/> + </copy> </target> + <!-- for junit test --> + <mkdir dir="${build.test}/data" /> + <copy todir="${build.test}/data"> + <fileset dir="jsp"/> + </copy> + </project> Added: nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp?rev=1630565&view=auto ============================================================================== --- nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp (added) +++ nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp Thu Oct 9 19:20:51 2014 @@ -0,0 +1,44 @@ +<%-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--%><%-- + Example JSP Page to Test Protocol-Http Plugin +--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><% +String path = request.getContextPath(); +String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; +%> + +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <base href="<%=basePath%>"> + + <title>HelloWorld</title> + <meta http-equiv="content-type" content="text/html;charset=utf-8" /> + <meta name="Language" content="en" /> + <meta http-equiv="pragma" content="no-cache"> + <meta http-equiv="cache-control" content="no-cache"> + <meta http-equiv="expires" content="0"> + <meta http-equiv="keywords" content="keyword1,keyword2,keyword3"> + <meta http-equiv="description" content="This is my page"> + <!-- + <link rel="stylesheet" type="text/css" href="styles.css"> + --> + </head> + + <body> + Hello World!!! <br> + </body> +</html> Propchange: nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp ------------------------------------------------------------------------------ svn:eol-style = native Added: nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp?rev=1630565&view=auto ============================================================================== --- nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp (added) +++ nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp Thu Oct 9 19:20:51 2014 @@ -0,0 +1,47 @@ +<%-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--%><%-- + Example JSP Page to Test Protocol-Http Plugin +--%> + +@ page language="java" import="java.util.*" pageEncoding="UTF-8" + +String path = request.getContextPath(); +String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; + + +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <base href="<%=basePath%>"> + + <title>HelloWorld</title> + <meta http-equiv="content-type" content="text/html;charset=utf-8" /> + <meta name="Language" content="en" /> + <meta http-equiv="pragma" content="no-cache"> + <meta http-equiv="cache-control" content="no-cache"> + <meta http-equiv="expires" content="0"> + <meta http-equiv="keywords" content="keyword1,keyword2,keyword3"> + <meta http-equiv="description" content="This is my page"> + <!-- + <link rel="stylesheet" type="text/css" href="styles.css"> + --> + </head> + + <body> + Hello World!!! <br> + </body> +</html> Propchange: nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp ------------------------------------------------------------------------------ svn:eol-style = native Added: nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp?rev=1630565&view=auto ============================================================================== --- nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp (added) +++ nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp Thu Oct 9 19:20:51 2014 @@ -0,0 +1,49 @@ +<%-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--%><%-- + Example JSP Page to Test Protocol-Http Plugin +--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><% +String path = request.getContextPath(); +String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; +%> + +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <base href="<%=basePath%>"> + + <title>My JSP page</title> + + <meta http-equiv="pragma" content="no-cache"> + <meta http-equiv="cache-control" content="no-cache"> + <meta http-equiv="expires" content="0"> + <meta http-equiv="keywords" content="keyword1,keyword2,keyword3"> + <meta http-equiv="description" content="This is my page"> + <!-- + <link rel="stylesheet" type="text/css" href="styles.css"> + --> + + </head> + + <body> + <% + response.setStatus(301); + response.setHeader( "Location", "http://nutch.apache.org"); + response.setHeader( "Connection", "close" ); + %> + You are redirected by JSP<br> + </body> +</html> Propchange: nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp ------------------------------------------------------------------------------ svn:eol-style = native Added: nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp?rev=1630565&view=auto ============================================================================== --- nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp (added) +++ nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp Thu Oct 9 19:20:51 2014 @@ -0,0 +1,49 @@ +<%-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--%><%-- + Example JSP Page to Test Protocol-Http Plugin +--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><% +String path = request.getContextPath(); +String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; +%> + +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <base href="<%=basePath%>"> + + <title>My JSP page</title> + + <meta http-equiv="pragma" content="no-cache"> + <meta http-equiv="cache-control" content="no-cache"> + <meta http-equiv="expires" content="0"> + <meta http-equiv="keywords" content="keyword1,keyword2,keyword3"> + <meta http-equiv="description" content="This is my page"> + <!-- + <link rel="stylesheet" type="text/css" href="styles.css"> + --> + + </head> + + <body> + <% + response.setStatus(302); + response.setHeader( "Location", "http://nutch.apache.org"); + response.setHeader( "Connection", "close" ); + %> + You are sucessfully redirected by JSP<br> + </body> +</html> Propchange: nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp ------------------------------------------------------------------------------ svn:eol-style = native Added: nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml?rev=1630565&view=auto ============================================================================== --- nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml (added) +++ nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml Thu Oct 9 19:20:51 2014 @@ -0,0 +1,52 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<configuration> + +<property> + <name>http.robots.agents</name> + <value>Nutch-Test,*</value> + <description></description> +</property> + +<property> + <name>http.agent.name</name> + <value>Nutch-Test</value> + <description></description> +</property> + +<property> + <name>http.agent.description</name> + <value>Nutch protocol-httpclient test</value> + <description></description> +</property> + +<property> + <name>http.auth.file</name> + <value>httpclient-auth-test.xml</value> + <description></description> +</property> + +<property> + <name>http.timeout</name> + <value>60000</value> + <description></description> +</property> + +</configuration> \ No newline at end of file Propchange: nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml ------------------------------------------------------------------------------ svn:eol-style = native Added: nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java?rev=1630565&view=auto ============================================================================== --- nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java (added) +++ nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java Thu Oct 9 19:20:51 2014 @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nutch.protocol.http; + +import static org.junit.Assert.assertEquals; + +import java.net.URL; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.nutch.crawl.CrawlDatum; +import org.apache.nutch.net.protocols.Response; +import org.apache.nutch.protocol.Content; +import org.apache.nutch.protocol.ProtocolOutput; +import org.junit.After; +import org.junit.Test; +import org.mortbay.jetty.Server; +import org.mortbay.jetty.nio.SelectChannelConnector; +import org.mortbay.jetty.servlet.Context; +import org.mortbay.jetty.servlet.ServletHolder; + +/** + * Test cases for protocol-http + */ +public class TestProtocolHttp { + private static final String RES_DIR = System.getProperty("test.data", "."); + + private Http http; + private Server server; + private Context root; + private Configuration conf; + private int port; + + public void setUp(boolean redirection) throws Exception { + conf = new Configuration(); + conf.addResource("nutch-default.xml"); + conf.addResource("nutch-site-test.xml"); + + http = new Http(); + http.setConf(conf); + + server = new Server(); + + if (redirection) { + root = new Context(server, "/redirection", Context.SESSIONS); + root.setAttribute("newContextURL", "/redirect"); + } else { + root = new Context(server, "/", Context.SESSIONS); + } + + ServletHolder sh = new ServletHolder( + org.apache.jasper.servlet.JspServlet.class); + root.addServlet(sh, "*.jsp"); + root.setResourceBase(RES_DIR); + } + + @After + public void tearDown() throws Exception { + server.stop(); + } + + @Test + public void testStatusCode() throws Exception { + startServer(47504, false); + fetchPage("/basic-http.jsp", 200); + fetchPage("/redirect301.jsp", 301); + fetchPage("/redirect302.jsp", 302); + fetchPage("/nonexists.html", 404); + fetchPage("/brokenpage.jsp", 500); + } + + @Test + public void testRedirectionJetty() throws Exception { + // Redirection via Jetty + startServer(47503, true); + fetchPage("/redirection", 302); + } + + /** + * Starts the Jetty server at a specified port and redirection parameter. + * + * @param portno + * Port number. + * @param redirection + * whether redirection + */ + private void startServer(int portno, boolean redirection) throws Exception { + port = portno; + setUp(redirection); + SelectChannelConnector connector = new SelectChannelConnector(); + connector.setHost("127.0.0.1"); + connector.setPort(port); + + server.addConnector(connector); + server.start(); + } + + /** + * Fetches the specified <code>page</code> from the local Jetty server and + * checks whether the HTTP response status code matches with the expected + * code. Also use jsp pages for redirection. + * + * @param page + * Page to be fetched. + * @param expectedCode + * HTTP response status code expected while fetching the page. + */ + private void fetchPage(String page, int expectedCode) throws Exception { + URL url = new URL("http", "127.0.0.1", port, page); + CrawlDatum crawlDatum = new CrawlDatum(); + Response response = http.getResponse(url, crawlDatum, true); + ProtocolOutput out = http.getProtocolOutput(new Text(url.toString()), + crawlDatum); + Content content = out.getContent(); + assertEquals("HTTP Status Code for " + url, expectedCode, + response.getCode()); + + if (page.compareTo("/nonexists.html") != 0 + && page.compareTo("/brokenpage.jsp") != 0 + && page.compareTo("/redirection") != 0) { + assertEquals("ContentType " + url, "application/xhtml+xml", + content.getContentType()); + } + } +} + Propchange: nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java ------------------------------------------------------------------------------ svn:eol-style = native