Author: tejasp
Date: Thu Jan 23 19:02:55 2014
New Revision: 1560786
URL: http://svn.apache.org/r1560786
Log:
NUTCH-1164 Write JUnit tests for protocol-http
Added:
nutch/branches/2.x/src/plugin/protocol-http/jsp/
nutch/branches/2.x/src/plugin/protocol-http/jsp/basic-http.jsp
nutch/branches/2.x/src/plugin/protocol-http/jsp/brokenpage.jsp
nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect301.jsp
nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect302.jsp
nutch/branches/2.x/src/plugin/protocol-http/src/test/
nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/
nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/build.xml
nutch/branches/2.x/src/plugin/build.xml
nutch/branches/2.x/src/plugin/protocol-http/build.xml
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1560786&r1=1560785&r2=1560786&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Thu Jan 23 19:02:55 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1164 Write JUnit tests for protocol-http (Sertac TURKEL via tejasp)
+
* NUTCH-1710 Add gora package logging to log4j.properties (lewismc)
* NUTCH-1655 Indexer Plugin for Elastic Search (Talat UYARER via lewismc)
Modified: nutch/branches/2.x/build.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/build.xml?rev=1560786&r1=1560785&r2=1560786&view=diff
==============================================================================
--- nutch/branches/2.x/build.xml (original)
+++ nutch/branches/2.x/build.xml Thu Jan 23 19:02:55 2014
@@ -915,13 +915,10 @@
exported="false" />
<library
path="${basedir}/build/plugins/lib-nekohtml/nekohtml-0.9.5.jar"
exported="false" />
- <library
path="${basedir}/build/plugins/lib-nekohtml/nekohtml-0.9.5.jar"
- exported="false" />
<library path="${basedir}/build/plugins/parse-html/tagsoup-1.2.jar"
exported="false" />
<library path="${basedir}/build/plugins/protocol-sftp/jsch-0.1.41.jar"
- exported="false" />
-
+ exported="false" />
<library path="${basedir}/build/plugins/parse-html/tagsoup-1.2.jar"
exported="false" />
@@ -968,6 +965,7 @@
<source path="${basedir}/src/plugin/protocol-httpclient/src/java/" />
<source path="${basedir}/src/plugin/protocol-httpclient/src/test/" />
<source path="${basedir}/src/plugin/protocol-http/src/java/" />
+ <source path="${basedir}/src/plugin/protocol-http/src/test/" />
<source path="${basedir}/src/plugin/protocol-sftp/src/java/" />
<source path="${basedir}/src/plugin/scoring-link/src/java/" />
<source path="${basedir}/src/plugin/scoring-opic/src/java/" />
Modified: nutch/branches/2.x/src/plugin/build.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/build.xml?rev=1560786&r1=1560785&r2=1560786&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/build.xml (original)
+++ nutch/branches/2.x/src/plugin/build.xml Thu Jan 23 19:02:55 2014
@@ -82,6 +82,7 @@
<ant dir="index-more" target="test"/>
<ant dir="language-identifier" target="test"/>
<ant dir="protocol-httpclient" target="test"/>
+ <ant dir="protocol-http" target="test"/>
<ant dir="urlfilter-automaton" target="test"/>
<ant dir="urlfilter-domain" target="test"/>
<ant dir="urlfilter-prefix" target="test"/>
Modified: nutch/branches/2.x/src/plugin/protocol-http/build.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/build.xml?rev=1560786&r1=1560785&r2=1560786&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/build.xml (original)
+++ nutch/branches/2.x/src/plugin/protocol-http/build.xml Thu Jan 23 19:02:55
2014
@@ -19,22 +19,27 @@
<import file="../build-plugin.xml"/>
- <!-- Build compilation dependencies -->
<target name="deps-jar">
<ant target="jar" inheritall="false" dir="../lib-http"/>
</target>
- <!-- Add compilation dependencies to classpath -->
<path id="plugin.deps">
<fileset dir="${nutch.root}/build">
<include name="**/lib-http/*.jar" />
</fileset>
+ <pathelement location="${build.dir}/test/conf"/>
</path>
- <!-- Deploy Unit test dependencies -->
<target name="deps-test">
- <ant target="deploy" inheritall="false" dir="../lib-http"/>
- <ant target="deploy" inheritall="false" dir="../nutch-extensionpoints"/>
+ <copy toDir="${build.test}">
+ <fileset dir="${src.test}" excludes="**/*.java"/>
+ </copy>
</target>
+ <!-- for junit test -->
+ <mkdir dir="${build.test}/data" />
+ <copy todir="${build.test}/data">
+ <fileset dir="jsp"/>
+ </copy>
+
</project>
Added: nutch/branches/2.x/src/plugin/protocol-http/jsp/basic-http.jsp
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/jsp/basic-http.jsp?rev=1560786&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/jsp/basic-http.jsp (added)
+++ nutch/branches/2.x/src/plugin/protocol-http/jsp/basic-http.jsp Thu Jan 23
19:02:55 2014
@@ -0,0 +1,44 @@
+<%--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+--%><%--
+ Example JSP Page to Test Protocol-Http Plugin
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath =
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <base href="<%=basePath%>">
+
+ <title>HelloWorld</title>
+ <meta http-equiv="content-type" content="text/html;charset=utf-8" />
+ <meta name="Language" content="en" />
+ <meta http-equiv="pragma" content="no-cache">
+ <meta http-equiv="cache-control" content="no-cache">
+ <meta http-equiv="expires" content="0">
+ <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+ <meta http-equiv="description" content="This is my page">
+ <!--
+ <link rel="stylesheet" type="text/css" href="styles.css">
+ -->
+ </head>
+
+ <body>
+ Hello World!!! <br>
+ </body>
+</html>
Added: nutch/branches/2.x/src/plugin/protocol-http/jsp/brokenpage.jsp
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/jsp/brokenpage.jsp?rev=1560786&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/jsp/brokenpage.jsp (added)
+++ nutch/branches/2.x/src/plugin/protocol-http/jsp/brokenpage.jsp Thu Jan 23
19:02:55 2014
@@ -0,0 +1,47 @@
+<%--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+--%><%--
+ Example JSP Page to Test Protocol-Http Plugin
+--%>
+
+@ page language="java" import="java.util.*" pageEncoding="UTF-8"
+
+String path = request.getContextPath();
+String basePath =
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <base href="<%=basePath%>">
+
+ <title>HelloWorld</title>
+ <meta http-equiv="content-type" content="text/html;charset=utf-8" />
+ <meta name="Language" content="en" />
+ <meta http-equiv="pragma" content="no-cache">
+ <meta http-equiv="cache-control" content="no-cache">
+ <meta http-equiv="expires" content="0">
+ <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+ <meta http-equiv="description" content="This is my page">
+ <!--
+ <link rel="stylesheet" type="text/css" href="styles.css">
+ -->
+ </head>
+
+ <body>
+ Hello World!!! <br>
+ </body>
+</html>
Added: nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect301.jsp
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect301.jsp?rev=1560786&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect301.jsp (added)
+++ nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect301.jsp Thu Jan 23
19:02:55 2014
@@ -0,0 +1,49 @@
+<%--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+--%><%--
+ Example JSP Page to Test Protocol-Http Plugin
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath =
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <base href="<%=basePath%>">
+
+ <title>My JSP page</title>
+
+ <meta http-equiv="pragma" content="no-cache">
+ <meta http-equiv="cache-control" content="no-cache">
+ <meta http-equiv="expires" content="0">
+ <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+ <meta http-equiv="description" content="This is my page">
+ <!--
+ <link rel="stylesheet" type="text/css" href="styles.css">
+ -->
+
+ </head>
+
+ <body>
+ <%
+ response.setStatus(301);
+ response.setHeader( "Location", "http://nutch.apache.org");
+ response.setHeader( "Connection", "close" );
+ %>
+ You are redirected by JSP<br>
+ </body>
+</html>
Added: nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect302.jsp
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect302.jsp?rev=1560786&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect302.jsp (added)
+++ nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect302.jsp Thu Jan 23
19:02:55 2014
@@ -0,0 +1,49 @@
+<%--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+--%><%--
+ Example JSP Page to Test Protocol-Http Plugin
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath =
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <base href="<%=basePath%>">
+
+ <title>My JSP page</title>
+
+ <meta http-equiv="pragma" content="no-cache">
+ <meta http-equiv="cache-control" content="no-cache">
+ <meta http-equiv="expires" content="0">
+ <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+ <meta http-equiv="description" content="This is my page">
+ <!--
+ <link rel="stylesheet" type="text/css" href="styles.css">
+ -->
+
+ </head>
+
+ <body>
+ <%
+ response.setStatus(302);
+ response.setHeader( "Location", "http://nutch.apache.org");
+ response.setHeader( "Connection", "close" );
+ %>
+ You are sucessfully redirected by JSP<br>
+ </body>
+</html>
Added:
nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml?rev=1560786&view=auto
==============================================================================
---
nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
(added)
+++
nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
Thu Jan 23 19:02:55 2014
@@ -0,0 +1,52 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<configuration>
+
+<property>
+ <name>http.robots.agents</name>
+ <value>Nutch-Test,*</value>
+ <description></description>
+</property>
+
+<property>
+ <name>http.agent.name</name>
+ <value>Nutch-Test</value>
+ <description></description>
+</property>
+
+<property>
+ <name>http.agent.description</name>
+ <value>Nutch protocol-httpclient test</value>
+ <description></description>
+</property>
+
+<property>
+ <name>http.auth.file</name>
+ <value>httpclient-auth-test.xml</value>
+ <description></description>
+</property>
+
+<property>
+ <name>http.timeout</name>
+ <value>60000</value>
+ <description></description>
+</property>
+
+</configuration>
Added:
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java?rev=1560786&view=auto
==============================================================================
---
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
(added)
+++
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
Thu Jan 23 19:02:55 2014
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol.http;
+
+import java.net.URL;
+
+import org.junit.After;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.protocols.Response;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.protocol.ProtocolOutput;
+import org.apache.nutch.storage.WebPage;
+import org.mortbay.jetty.Server;
+import org.mortbay.jetty.nio.SelectChannelConnector;
+import org.mortbay.jetty.servlet.Context;
+import org.mortbay.jetty.servlet.ServletHolder;
+
+/**
+ * Test cases for protocol-http
+ */
+public class TestProtocolHttp {
+ private static final String RES_DIR = System.getProperty("test.data", ".");
+
+ private Http http;
+ private Server server;
+ private Context root;
+ private Configuration conf;
+ private int port;
+
+ public void setUp(boolean redirection) throws Exception {
+ this.conf = new Configuration();
+ this.conf.addResource("nutch-default.xml");
+ this.conf.addResource("nutch-site-test.xml");
+
+ this.http = new Http();
+ this.http.setConf(conf);
+
+ this.server = new Server();
+
+ if (redirection) {
+ this.root = new Context(server, "/redirection", Context.SESSIONS);
+ this.root.setAttribute("newContextURL", "/redirect");
+ }
+ else {
+ this.root = new Context(server, "/", Context.SESSIONS);
+ }
+
+ ServletHolder sh = new
ServletHolder(org.apache.jasper.servlet.JspServlet.class);
+ this.root.addServlet(sh, "*.jsp");
+ this.root.setResourceBase(RES_DIR);
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ server.stop();
+ }
+
+ @Test
+ public void testStatusCode() throws Exception {
+ startServer(47501, false);
+ fetchPage("/basic-http.jsp", 200);
+ fetchPage("/redirect301.jsp", 301);
+ fetchPage("/redirect302.jsp", 302);
+ fetchPage("/nonexists.html", 404);
+ fetchPage("/brokenpage.jsp", 500);
+ }
+
+ @Test
+ public void testRedirectionJetty() throws Exception {
+ // Redirection via Jetty
+ startServer(47500, true);
+ fetchPage("/redirection", 302);
+ }
+
+ /**
+ * Starts the Jetty server at a specified port and redirection parameter.
+ *
+ * @param portno Port number.
+ * @param redirection whether redirection
+ */
+ private void startServer(int portno, boolean redirection) throws Exception {
+ port = portno;
+ setUp(redirection);
+ SelectChannelConnector connector = new SelectChannelConnector();
+ connector.setHost("127.0.0.1");
+ connector.setPort(port);
+
+ server.addConnector(connector);
+ server.start();
+ }
+
+ /**
+ * Fetches the specified <code>page</code> from the local Jetty server and
+ * checks whether the HTTP response status code matches with the expected
+ * code. Also use jsp pages for redirection.
+ *
+ * @param page
+ * Page to be fetched.
+ * @param expectedCode
+ * HTTP response status code expected while fetching the page.
+ */
+ private void fetchPage(String page, int expectedCode) throws Exception {
+ URL url = new URL("http", "127.0.0.1", port, page);
+ Response response = http.getResponse(url, new WebPage(), true);
+ ProtocolOutput out = http.getProtocolOutput(url.toString(), new WebPage());
+ Content content = out.getContent();
+
+ assertEquals("HTTP Status Code for " + url, expectedCode,
response.getCode());
+ if (page.compareTo("/nonexists.html") != 0
+ && page.compareTo("/brokenpage.jsp") != 0
+ && page.compareTo("/redirection") != 0)
+ assertEquals("ContentType " + url, "application/xhtml+xml",
content.getContentType());
+ }
+}