Author: snagel
Date: Thu Oct  9 19:20:51 2014
New Revision: 1630565

URL: http://svn.apache.org/r1630565
Log:
NUTCH-1164 JUnit tests for protocol-http

Added:
    nutch/trunk/src/plugin/protocol-http/jsp/
    nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp   (with props)
    nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp   (with props)
    nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp   (with props)
    nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp   (with props)
    nutch/trunk/src/plugin/protocol-http/src/test/conf/
    nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml   
(with props)
    
nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
   (with props)
Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/build.xml
    nutch/trunk/src/plugin/build.xml
    nutch/trunk/src/plugin/protocol-http/build.xml

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1630565&r1=1630564&r2=1630565&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Oct  9 19:20:51 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development 1.10-SNAPSHOT
 
+* NUTCH-1164 Write JUnit tests for protocol-http (nimafl via snagel)
+
 * NUTCH-1868 Document and improve CLI for FileDumper tool (lewismc)
 
 * NUTCH-1869 Add a flag to -mimeType fiag to FileDumper (lewismc)
@@ -10,7 +12,7 @@ Nutch Current Development 1.10-SNAPSHOT
 
 * NUTCH-1826, NUTCH-1864 indexchecker fails if solr.server.url not configured 
(lewismc, snagel)
 
-* NUTCH-1866 ant eclipse target should not delete runtime (nimafl vai lewismc)
+* NUTCH-1866 ant eclipse target should not delete runtime (nimafl via lewismc)
 
 * NUTCH-1857 readb -dump -format csv should use comma (lewismc)
 

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1630565&r1=1630564&r2=1630565&view=diff
==============================================================================
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Thu Oct  9 19:20:51 2014
@@ -992,7 +992,7 @@
         <source path="${plugins.dir}/protocol-httpclient/src/java/" />
         <source path="${plugins.dir}/protocol-httpclient/src/test/" />
         <source path="${plugins.dir}/protocol-http/src/java/" />
-        <!-- <source path="${plugins.dir}/protocol-http/src/test/" /> --> 
+        <source path="${plugins.dir}/protocol-http/src/test/" />
         <source path="${plugins.dir}/scoring-depth/src/java/" />
         <source path="${plugins.dir}/scoring-link/src/java/" />
         <source path="${plugins.dir}/scoring-opic/src/java/" />

Modified: nutch/trunk/src/plugin/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/build.xml?rev=1630565&r1=1630564&r2=1630565&view=diff
==============================================================================
--- nutch/trunk/src/plugin/build.xml (original)
+++ nutch/trunk/src/plugin/build.xml Thu Oct  9 19:20:51 2014
@@ -88,6 +88,7 @@
      <ant dir="language-identifier" target="test"/>
      <ant dir="lib-http" target="test"/>
      <ant dir="protocol-file" target="test"/>
+     <ant dir="protocol-http" target="test"/>
      <ant dir="protocol-httpclient" target="test"/>
      <!--ant dir="parse-ext" target="test"/-->
      <ant dir="feed" target="test"/>

Modified: nutch/trunk/src/plugin/protocol-http/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/build.xml?rev=1630565&r1=1630564&r2=1630565&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/build.xml (original)
+++ nutch/trunk/src/plugin/protocol-http/build.xml Thu Oct  9 19:20:51 2014
@@ -29,12 +29,22 @@
     <fileset dir="${nutch.root}/build">
       <include name="**/lib-http/*.jar" />
     </fileset>
+    <pathelement location="${build.dir}/test/conf"/>
   </path>
 
   <!-- Deploy Unit test dependencies -->
   <target name="deps-test">
     <ant target="deploy" inheritall="false" dir="../lib-http"/>
     <ant target="deploy" inheritall="false" dir="../nutch-extensionpoints"/>
+    <copy toDir="${build.test}">
+      <fileset dir="${src.test}" excludes="**/*.java"/>
+    </copy>
   </target>
 
+  <!-- for junit test -->
+  <mkdir dir="${build.test}/data" />
+  <copy todir="${build.test}/data">
+      <fileset dir="jsp"/>
+   </copy>
+
 </project>

Added: nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp?rev=1630565&view=auto
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp (added)
+++ nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp Thu Oct  9 19:20:51 
2014
@@ -0,0 +1,44 @@
+<%--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+--%><%--
+  Example JSP Page to Test Protocol-Http Plugin  
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath = 
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml";>
+  <head>
+    <base href="<%=basePath%>">
+    
+    <title>HelloWorld</title>
+    <meta http-equiv="content-type" content="text/html;charset=utf-8" />
+    <meta name="Language" content="en" />
+       <meta http-equiv="pragma" content="no-cache">
+       <meta http-equiv="cache-control" content="no-cache">
+       <meta http-equiv="expires" content="0">    
+       <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+       <meta http-equiv="description" content="This is my page">
+       <!--
+       <link rel="stylesheet" type="text/css" href="styles.css">
+       -->
+  </head>
+  
+  <body>
+    Hello World!!! <br>
+  </body>
+</html>

Propchange: nutch/trunk/src/plugin/protocol-http/jsp/basic-http.jsp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp?rev=1630565&view=auto
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp (added)
+++ nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp Thu Oct  9 19:20:51 
2014
@@ -0,0 +1,47 @@
+<%--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+--%><%--
+  Example JSP Page to Test Protocol-Http Plugin
+--%>
+
+@ page language="java" import="java.util.*" pageEncoding="UTF-8"
+
+String path = request.getContextPath();
+String basePath = 
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml";>
+  <head>
+    <base href="<%=basePath%>">
+    
+    <title>HelloWorld</title>
+    <meta http-equiv="content-type" content="text/html;charset=utf-8" />
+    <meta name="Language" content="en" />
+       <meta http-equiv="pragma" content="no-cache">
+       <meta http-equiv="cache-control" content="no-cache">
+       <meta http-equiv="expires" content="0">    
+       <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+       <meta http-equiv="description" content="This is my page">
+       <!--
+       <link rel="stylesheet" type="text/css" href="styles.css">
+       -->
+  </head>
+  
+  <body>
+    Hello World!!! <br>
+  </body>
+</html>

Propchange: nutch/trunk/src/plugin/protocol-http/jsp/brokenpage.jsp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp?rev=1630565&view=auto
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp (added)
+++ nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp Thu Oct  9 
19:20:51 2014
@@ -0,0 +1,49 @@
+<%--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+--%><%--
+  Example JSP Page to Test Protocol-Http Plugin
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath = 
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml";>
+  <head>
+    <base href="<%=basePath%>">
+    
+    <title>My JSP page</title>
+    
+       <meta http-equiv="pragma" content="no-cache">
+       <meta http-equiv="cache-control" content="no-cache">
+       <meta http-equiv="expires" content="0">    
+       <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+       <meta http-equiv="description" content="This is my page">
+       <!--
+       <link rel="stylesheet" type="text/css" href="styles.css">
+       -->
+
+  </head>
+  
+  <body>
+       <%
+       response.setStatus(301);
+       response.setHeader( "Location", "http://nutch.apache.org";);
+       response.setHeader( "Connection", "close" );
+               %> 
+    You are redirected by JSP<br>
+  </body>
+</html>

Propchange: nutch/trunk/src/plugin/protocol-http/jsp/redirect301.jsp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp?rev=1630565&view=auto
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp (added)
+++ nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp Thu Oct  9 
19:20:51 2014
@@ -0,0 +1,49 @@
+<%--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+--%><%--
+  Example JSP Page to Test Protocol-Http Plugin 
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath = 
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml";>
+  <head>
+    <base href="<%=basePath%>">
+    
+    <title>My JSP page</title>
+    
+       <meta http-equiv="pragma" content="no-cache">
+       <meta http-equiv="cache-control" content="no-cache">
+       <meta http-equiv="expires" content="0">    
+       <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+       <meta http-equiv="description" content="This is my page">
+       <!--
+       <link rel="stylesheet" type="text/css" href="styles.css">
+       -->
+
+  </head>
+  
+  <body>
+       <%
+       response.setStatus(302);
+       response.setHeader( "Location", "http://nutch.apache.org";);
+       response.setHeader( "Connection", "close" );
+               %> 
+    You are sucessfully redirected by JSP<br>
+  </body>
+</html>

Propchange: nutch/trunk/src/plugin/protocol-http/jsp/redirect302.jsp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml?rev=1630565&view=auto
==============================================================================
--- nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml 
(added)
+++ nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml Thu 
Oct  9 19:20:51 2014
@@ -0,0 +1,52 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<configuration>
+
+<property>
+  <name>http.robots.agents</name>
+  <value>Nutch-Test,*</value>
+  <description></description>
+</property>
+
+<property>
+  <name>http.agent.name</name>
+  <value>Nutch-Test</value>
+  <description></description>
+</property>
+
+<property>
+  <name>http.agent.description</name>
+  <value>Nutch protocol-httpclient test</value>
+  <description></description>
+</property>
+
+<property>
+  <name>http.auth.file</name>
+  <value>httpclient-auth-test.xml</value>
+  <description></description>
+</property>
+
+<property>
+  <name>http.timeout</name>
+  <value>60000</value>
+  <description></description>
+</property>
+
+</configuration>
\ No newline at end of file

Propchange: 
nutch/trunk/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java?rev=1630565&view=auto
==============================================================================
--- 
nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
 (added)
+++ 
nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
 Thu Oct  9 19:20:51 2014
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol.http;
+
+import static org.junit.Assert.assertEquals;
+
+import java.net.URL;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.net.protocols.Response;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.protocol.ProtocolOutput;
+import org.junit.After;
+import org.junit.Test;
+import org.mortbay.jetty.Server;
+import org.mortbay.jetty.nio.SelectChannelConnector;
+import org.mortbay.jetty.servlet.Context;
+import org.mortbay.jetty.servlet.ServletHolder;
+
+/**
+ * Test cases for protocol-http
+ */
+public class TestProtocolHttp {
+  private static final String RES_DIR = System.getProperty("test.data", ".");
+
+  private Http http;
+  private Server server;
+  private Context root;
+  private Configuration conf;
+  private int port;
+
+  public void setUp(boolean redirection) throws Exception {
+    conf = new Configuration();
+    conf.addResource("nutch-default.xml");
+    conf.addResource("nutch-site-test.xml");
+
+    http = new Http();
+    http.setConf(conf);
+
+    server = new Server();
+
+    if (redirection) {
+      root = new Context(server, "/redirection", Context.SESSIONS);
+      root.setAttribute("newContextURL", "/redirect");
+    } else {
+      root = new Context(server, "/", Context.SESSIONS);
+    }
+
+    ServletHolder sh = new ServletHolder(
+        org.apache.jasper.servlet.JspServlet.class);
+    root.addServlet(sh, "*.jsp");
+    root.setResourceBase(RES_DIR);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    server.stop();
+  }
+
+  @Test
+  public void testStatusCode() throws Exception {
+    startServer(47504, false);
+    fetchPage("/basic-http.jsp", 200);
+    fetchPage("/redirect301.jsp", 301);
+    fetchPage("/redirect302.jsp", 302);
+    fetchPage("/nonexists.html", 404);
+    fetchPage("/brokenpage.jsp", 500);
+  }
+
+  @Test
+  public void testRedirectionJetty() throws Exception {
+    // Redirection via Jetty
+    startServer(47503, true);
+    fetchPage("/redirection", 302);
+  }
+
+  /**
+   * Starts the Jetty server at a specified port and redirection parameter.
+   *
+   * @param portno
+   *          Port number.
+   * @param redirection
+   *          whether redirection
+   */
+  private void startServer(int portno, boolean redirection) throws Exception {
+    port = portno;
+    setUp(redirection);
+    SelectChannelConnector connector = new SelectChannelConnector();
+    connector.setHost("127.0.0.1");
+    connector.setPort(port);
+
+    server.addConnector(connector);
+    server.start();
+  }
+
+  /**
+   * Fetches the specified <code>page</code> from the local Jetty server and
+   * checks whether the HTTP response status code matches with the expected
+   * code. Also use jsp pages for redirection.
+   *
+   * @param page
+   *          Page to be fetched.
+   * @param expectedCode
+   *          HTTP response status code expected while fetching the page.
+   */
+  private void fetchPage(String page, int expectedCode) throws Exception {
+    URL url = new URL("http", "127.0.0.1", port, page);
+    CrawlDatum crawlDatum = new CrawlDatum();
+    Response response = http.getResponse(url, crawlDatum, true);
+    ProtocolOutput out = http.getProtocolOutput(new Text(url.toString()),
+        crawlDatum);
+    Content content = out.getContent();
+    assertEquals("HTTP Status Code for " + url, expectedCode,
+        response.getCode());
+
+    if (page.compareTo("/nonexists.html") != 0
+        && page.compareTo("/brokenpage.jsp") != 0
+        && page.compareTo("/redirection") != 0) {
+      assertEquals("ContentType " + url, "application/xhtml+xml",
+          content.getContentType());
+    }
+  }
+}
+

Propchange: 
nutch/trunk/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
------------------------------------------------------------------------------
    svn:eol-style = native


Reply via email to