Author: tejasp
Date: Thu Jan 23 19:02:55 2014
New Revision: 1560786

URL: http://svn.apache.org/r1560786
Log:
NUTCH-1164 Write JUnit tests for protocol-http

Added:
    nutch/branches/2.x/src/plugin/protocol-http/jsp/
    nutch/branches/2.x/src/plugin/protocol-http/jsp/basic-http.jsp
    nutch/branches/2.x/src/plugin/protocol-http/jsp/brokenpage.jsp
    nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect301.jsp
    nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect302.jsp
    nutch/branches/2.x/src/plugin/protocol-http/src/test/
    nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/
    
nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
    nutch/branches/2.x/src/plugin/protocol-http/src/test/org/
    nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/
    nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/
    
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/
    
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/
    
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/build.xml
    nutch/branches/2.x/src/plugin/build.xml
    nutch/branches/2.x/src/plugin/protocol-http/build.xml

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1560786&r1=1560785&r2=1560786&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Thu Jan 23 19:02:55 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1164 Write JUnit tests for protocol-http (Sertac TURKEL via tejasp)
+
 * NUTCH-1710 Add gora package logging to log4j.properties (lewismc)
 
 * NUTCH-1655 Indexer Plugin for Elastic Search (Talat UYARER via lewismc)

Modified: nutch/branches/2.x/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/build.xml?rev=1560786&r1=1560785&r2=1560786&view=diff
==============================================================================
--- nutch/branches/2.x/build.xml (original)
+++ nutch/branches/2.x/build.xml Thu Jan 23 19:02:55 2014
@@ -915,13 +915,10 @@
                  exported="false" />
         <library 
path="${basedir}/build/plugins/lib-nekohtml/nekohtml-0.9.5.jar" 
                  exported="false" />
-        <library 
path="${basedir}/build/plugins/lib-nekohtml/nekohtml-0.9.5.jar" 
-                 exported="false" />
         <library path="${basedir}/build/plugins/parse-html/tagsoup-1.2.jar" 
                  exported="false" />
         <library path="${basedir}/build/plugins/protocol-sftp/jsch-0.1.41.jar" 
-                 exported="false" />
-                 
+                 exported="false" />         
         <library path="${basedir}/build/plugins/parse-html/tagsoup-1.2.jar" 
                  exported="false" />
 
@@ -968,6 +965,7 @@
         <source path="${basedir}/src/plugin/protocol-httpclient/src/java/" />
         <source path="${basedir}/src/plugin/protocol-httpclient/src/test/" />
         <source path="${basedir}/src/plugin/protocol-http/src/java/" />
+       <source path="${basedir}/src/plugin/protocol-http/src/test/" />
         <source path="${basedir}/src/plugin/protocol-sftp/src/java/" />
         <source path="${basedir}/src/plugin/scoring-link/src/java/" />
         <source path="${basedir}/src/plugin/scoring-opic/src/java/" />

Modified: nutch/branches/2.x/src/plugin/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/build.xml?rev=1560786&r1=1560785&r2=1560786&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/build.xml (original)
+++ nutch/branches/2.x/src/plugin/build.xml Thu Jan 23 19:02:55 2014
@@ -82,6 +82,7 @@
      <ant dir="index-more" target="test"/>
      <ant dir="language-identifier" target="test"/>
      <ant dir="protocol-httpclient" target="test"/>
+     <ant dir="protocol-http" target="test"/>
      <ant dir="urlfilter-automaton" target="test"/>
      <ant dir="urlfilter-domain" target="test"/>
      <ant dir="urlfilter-prefix" target="test"/>

Modified: nutch/branches/2.x/src/plugin/protocol-http/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/build.xml?rev=1560786&r1=1560785&r2=1560786&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/build.xml (original)
+++ nutch/branches/2.x/src/plugin/protocol-http/build.xml Thu Jan 23 19:02:55 
2014
@@ -19,22 +19,27 @@
 
   <import file="../build-plugin.xml"/>
 
-  <!-- Build compilation dependencies -->
   <target name="deps-jar">
     <ant target="jar" inheritall="false" dir="../lib-http"/>
   </target>
 
-  <!-- Add compilation dependencies to classpath -->
   <path id="plugin.deps">
     <fileset dir="${nutch.root}/build">
       <include name="**/lib-http/*.jar" />
     </fileset>
+    <pathelement location="${build.dir}/test/conf"/>
   </path>
 
-  <!-- Deploy Unit test dependencies -->
   <target name="deps-test">
-    <ant target="deploy" inheritall="false" dir="../lib-http"/>
-    <ant target="deploy" inheritall="false" dir="../nutch-extensionpoints"/>
+    <copy toDir="${build.test}">
+      <fileset dir="${src.test}" excludes="**/*.java"/>
+    </copy>
   </target>
 
+  <!-- for junit test -->
+  <mkdir dir="${build.test}/data" />
+  <copy todir="${build.test}/data">
+      <fileset dir="jsp"/>
+   </copy>
+
 </project>

Added: nutch/branches/2.x/src/plugin/protocol-http/jsp/basic-http.jsp
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/jsp/basic-http.jsp?rev=1560786&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/jsp/basic-http.jsp (added)
+++ nutch/branches/2.x/src/plugin/protocol-http/jsp/basic-http.jsp Thu Jan 23 
19:02:55 2014
@@ -0,0 +1,44 @@
+<%--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+--%><%--
+  Example JSP Page to Test Protocol-Http Plugin  
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath = 
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml";>
+  <head>
+    <base href="<%=basePath%>">
+    
+    <title>HelloWorld</title>
+    <meta http-equiv="content-type" content="text/html;charset=utf-8" />
+    <meta name="Language" content="en" />
+       <meta http-equiv="pragma" content="no-cache">
+       <meta http-equiv="cache-control" content="no-cache">
+       <meta http-equiv="expires" content="0">    
+       <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+       <meta http-equiv="description" content="This is my page">
+       <!--
+       <link rel="stylesheet" type="text/css" href="styles.css">
+       -->
+  </head>
+  
+  <body>
+    Hello World!!! <br>
+  </body>
+</html>

Added: nutch/branches/2.x/src/plugin/protocol-http/jsp/brokenpage.jsp
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/jsp/brokenpage.jsp?rev=1560786&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/jsp/brokenpage.jsp (added)
+++ nutch/branches/2.x/src/plugin/protocol-http/jsp/brokenpage.jsp Thu Jan 23 
19:02:55 2014
@@ -0,0 +1,47 @@
+<%--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+--%><%--
+  Example JSP Page to Test Protocol-Http Plugin
+--%>
+
+@ page language="java" import="java.util.*" pageEncoding="UTF-8"
+
+String path = request.getContextPath();
+String basePath = 
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml";>
+  <head>
+    <base href="<%=basePath%>">
+    
+    <title>HelloWorld</title>
+    <meta http-equiv="content-type" content="text/html;charset=utf-8" />
+    <meta name="Language" content="en" />
+       <meta http-equiv="pragma" content="no-cache">
+       <meta http-equiv="cache-control" content="no-cache">
+       <meta http-equiv="expires" content="0">    
+       <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+       <meta http-equiv="description" content="This is my page">
+       <!--
+       <link rel="stylesheet" type="text/css" href="styles.css">
+       -->
+  </head>
+  
+  <body>
+    Hello World!!! <br>
+  </body>
+</html>

Added: nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect301.jsp
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect301.jsp?rev=1560786&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect301.jsp (added)
+++ nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect301.jsp Thu Jan 23 
19:02:55 2014
@@ -0,0 +1,49 @@
+<%--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+--%><%--
+  Example JSP Page to Test Protocol-Http Plugin
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath = 
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml";>
+  <head>
+    <base href="<%=basePath%>">
+    
+    <title>My JSP page</title>
+    
+       <meta http-equiv="pragma" content="no-cache">
+       <meta http-equiv="cache-control" content="no-cache">
+       <meta http-equiv="expires" content="0">    
+       <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+       <meta http-equiv="description" content="This is my page">
+       <!--
+       <link rel="stylesheet" type="text/css" href="styles.css">
+       -->
+
+  </head>
+  
+  <body>
+       <%
+       response.setStatus(301);
+       response.setHeader( "Location", "http://nutch.apache.org";);
+       response.setHeader( "Connection", "close" );
+               %> 
+    You are redirected by JSP<br>
+  </body>
+</html>

Added: nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect302.jsp
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect302.jsp?rev=1560786&view=auto
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect302.jsp (added)
+++ nutch/branches/2.x/src/plugin/protocol-http/jsp/redirect302.jsp Thu Jan 23 
19:02:55 2014
@@ -0,0 +1,49 @@
+<%--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+--%><%--
+  Example JSP Page to Test Protocol-Http Plugin 
+--%><%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%><%
+String path = request.getContextPath();
+String basePath = 
request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
+%>
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml";>
+  <head>
+    <base href="<%=basePath%>">
+    
+    <title>My JSP page</title>
+    
+       <meta http-equiv="pragma" content="no-cache">
+       <meta http-equiv="cache-control" content="no-cache">
+       <meta http-equiv="expires" content="0">    
+       <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
+       <meta http-equiv="description" content="This is my page">
+       <!--
+       <link rel="stylesheet" type="text/css" href="styles.css">
+       -->
+
+  </head>
+  
+  <body>
+       <%
+       response.setStatus(302);
+       response.setHeader( "Location", "http://nutch.apache.org";);
+       response.setHeader( "Connection", "close" );
+               %> 
+    You are sucessfully redirected by JSP<br>
+  </body>
+</html>

Added: 
nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml?rev=1560786&view=auto
==============================================================================
--- 
nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml 
(added)
+++ 
nutch/branches/2.x/src/plugin/protocol-http/src/test/conf/nutch-site-test.xml 
Thu Jan 23 19:02:55 2014
@@ -0,0 +1,52 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<configuration>
+
+<property>
+  <name>http.robots.agents</name>
+  <value>Nutch-Test,*</value>
+  <description></description>
+</property>
+
+<property>
+  <name>http.agent.name</name>
+  <value>Nutch-Test</value>
+  <description></description>
+</property>
+
+<property>
+  <name>http.agent.description</name>
+  <value>Nutch protocol-httpclient test</value>
+  <description></description>
+</property>
+
+<property>
+  <name>http.auth.file</name>
+  <value>httpclient-auth-test.xml</value>
+  <description></description>
+</property>
+
+<property>
+  <name>http.timeout</name>
+  <value>60000</value>
+  <description></description>
+</property>
+
+</configuration>

Added: 
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java?rev=1560786&view=auto
==============================================================================
--- 
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
 (added)
+++ 
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
 Thu Jan 23 19:02:55 2014
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol.http;
+
+import java.net.URL;
+
+import org.junit.After;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.net.protocols.Response;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.protocol.ProtocolOutput;
+import org.apache.nutch.storage.WebPage;
+import org.mortbay.jetty.Server;
+import org.mortbay.jetty.nio.SelectChannelConnector;
+import org.mortbay.jetty.servlet.Context;
+import org.mortbay.jetty.servlet.ServletHolder;
+
+/**
+ * Test cases for protocol-http 
+ */
+public class TestProtocolHttp {
+  private static final String RES_DIR = System.getProperty("test.data", ".");
+
+  private Http http;
+  private Server server;
+  private Context root;
+  private Configuration conf;
+  private int port;
+  
+  public void setUp(boolean redirection) throws Exception {
+    this.conf = new Configuration();
+    this.conf.addResource("nutch-default.xml");
+    this.conf.addResource("nutch-site-test.xml");
+
+    this.http = new Http();
+    this.http.setConf(conf);
+    
+    this.server = new Server();
+    
+    if (redirection) {
+      this.root = new Context(server, "/redirection", Context.SESSIONS);
+      this.root.setAttribute("newContextURL", "/redirect");
+    } 
+    else {
+      this.root = new Context(server, "/", Context.SESSIONS);
+    }
+
+    ServletHolder sh = new 
ServletHolder(org.apache.jasper.servlet.JspServlet.class);
+    this.root.addServlet(sh, "*.jsp");
+    this.root.setResourceBase(RES_DIR);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    server.stop();
+  }
+
+  @Test
+  public void testStatusCode() throws Exception {
+    startServer(47501, false);
+    fetchPage("/basic-http.jsp", 200);
+    fetchPage("/redirect301.jsp", 301);
+    fetchPage("/redirect302.jsp", 302);
+    fetchPage("/nonexists.html", 404);
+    fetchPage("/brokenpage.jsp", 500);
+  }
+
+  @Test
+  public void testRedirectionJetty() throws Exception {
+    // Redirection via Jetty
+    startServer(47500, true);
+    fetchPage("/redirection", 302);
+  }
+  
+  /**
+   * Starts the Jetty server at a specified port and redirection parameter.
+   * 
+   * @param portno Port number.
+   * @param redirection whether redirection        
+   */
+  private void startServer(int portno, boolean redirection) throws Exception {
+    port = portno;
+    setUp(redirection);
+    SelectChannelConnector connector = new SelectChannelConnector();
+    connector.setHost("127.0.0.1");
+    connector.setPort(port);
+
+    server.addConnector(connector);
+    server.start();
+  }
+
+  /**
+   * Fetches the specified <code>page</code> from the local Jetty server and
+   * checks whether the HTTP response status code matches with the expected
+   * code. Also use jsp pages for redirection.
+   * 
+   * @param page
+   *          Page to be fetched.
+   * @param expectedCode
+   *          HTTP response status code expected while fetching the page.
+   */
+  private void fetchPage(String page, int expectedCode) throws Exception {
+    URL url = new URL("http", "127.0.0.1", port, page);
+    Response response = http.getResponse(url, new WebPage(), true);
+    ProtocolOutput out = http.getProtocolOutput(url.toString(), new WebPage());
+    Content content = out.getContent();
+    
+    assertEquals("HTTP Status Code for " + url, expectedCode, 
response.getCode());
+    if (page.compareTo("/nonexists.html") != 0 
+                && page.compareTo("/brokenpage.jsp") != 0
+         && page.compareTo("/redirection") != 0)
+      assertEquals("ContentType " + url, "application/xhtml+xml", 
content.getContentType());
+  }
+}


Reply via email to