Author: bfoster
Date: Thu Jun  2 23:26:28 2011
New Revision: 1130871

URL: http://svn.apache.org/viewvc?rev=1130871&view=rev
Log:

- refactoring to better support unit-testing

-----------------
OODT-194

Modified:
    
oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java
    
oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java
    
oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java

Modified: 
oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java
URL: 
http://svn.apache.org/viewvc/oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java?rev=1130871&r1=1130870&r2=1130871&view=diff
==============================================================================
--- 
oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java
 (original)
+++ 
oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/HttpProtocol.java
 Thu Jun  2 23:26:28 2011
@@ -42,9 +42,11 @@ import java.io.OutputStream;
 import java.net.HttpURLConnection;
 import java.net.URI;
 import java.net.URL;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Map;
 import java.util.Scanner;
 import java.util.StringTokenizer;
 import java.util.regex.Matcher;
@@ -67,7 +69,7 @@ public class HttpProtocol implements Pro
 
   static String IGNORE = "ignore";
 
-  static HashMap<String, LinkedList<ProtocolFile>> linkChildren = new 
HashMap<String, LinkedList<ProtocolFile>>();
+  static Map<String, List<HttpFile>> linkChildren = new HashMap<String, 
List<HttpFile>>();
 
   static boolean takeAllFiles = true;
 
@@ -170,7 +172,11 @@ public class HttpProtocol implements Pro
   }
 
   public List<ProtocolFile> ls() throws ProtocolException {
-    return parseLink(currentFile);
+       List<ProtocolFile> lsResults = new ArrayList<ProtocolFile>();
+       for (HttpFile file : parseLink(currentFile)) {
+               lsResults.add(file);
+       }
+    return lsResults;
   }
 
   public ProtocolFile pwd() throws ProtocolException {
@@ -186,9 +192,9 @@ public class HttpProtocol implements Pro
     return this.isConnected;
   }
 
-  public LinkedList<ProtocolFile> parseLink(HttpFile file)
+  public List<HttpFile> parseLink(HttpFile file)
       throws ProtocolException {
-    LinkedList<ProtocolFile> children = linkChildren.get(file.getLink()
+    List<HttpFile> children = linkChildren.get(file.getLink()
         .toString());
     if (file.isDir() && children == null) {
       try {
@@ -202,13 +208,8 @@ public class HttpProtocol implements Pro
         }
 
         // Find links in URL.
-        List<Link> links = HttpUtils.findLinks(conn);
-        
-        // Convert links to HttpFiles.
-        children = new LinkedList<ProtocolFile>();
-        for (Link link : links) {
-          children.add(HttpUtils.toHttpFile(link, file));
-        }
+        children = new LinkedList<HttpFile>();
+        children.addAll(HttpUtils.findLinks(file));
         
         // Save children links found.
         linkChildren.put(file.getLink().toString(), children);
@@ -301,12 +302,12 @@ public class HttpProtocol implements Pro
       if (st.hasMoreTokens()) {
         do {
           String token = st.nextToken();
-          LinkedList<ProtocolFile> children = this.parseLink(curPath);
-          for (ProtocolFile pFile : children) {
+          List<HttpFile> children = this.parseLink(curPath);
+          for (HttpFile pFile : children) {
             if (pFile.getName().equals(token)) {
               // System.out.println("token " + token + " " +
               // pFile);
-              curPath = (HttpFile) pFile;
+              curPath = pFile;
               continue;
             }
           }

Modified: 
oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java
URL: 
http://svn.apache.org/viewvc/oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java?rev=1130871&r1=1130870&r2=1130871&view=diff
==============================================================================
--- 
oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java
 (original)
+++ 
oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/HttpUtils.java
 Thu Jun  2 23:26:28 2011
@@ -19,12 +19,16 @@ package org.apache.oodt.cas.protocol.htt
 //JDK imports
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.HttpURLConnection;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Scanner;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 //APACHE imports
 import org.apache.commons.lang.Validate;
@@ -36,6 +40,7 @@ import org.apache.tika.parser.ParseConte
 import org.apache.tika.parser.html.HtmlParser;
 import org.apache.tika.sax.Link;
 import org.apache.tika.sax.LinkContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
 import org.xml.sax.SAXException;
 
 /**
@@ -62,7 +67,9 @@ public class HttpUtils {
        public static URI resolveUri(URI base, String path) throws 
URISyntaxException {
                Validate.notNull(base, "base URI must not be NULL");
                Validate.notNull(path, "resolve path must not be NULL");
-               if (path.startsWith("/")) {
+               if (path.startsWith("http://";)) {
+                       return new URI(path);
+               } else if (path.startsWith("/")) {
                        return new URI(base.getScheme() + "://" + 
base.getHost() + path);
                } else {
                        if (base.toString().endsWith("/")) {
@@ -96,13 +103,18 @@ public class HttpUtils {
     return sb.toString();
   }
 
-       public static List<Link> findLinks(HttpURLConnection conn) throws 
IOException, SAXException, TikaException {
-    LinkContentHandler handler = new LinkContentHandler();
-    
-    new HtmlParser().parse(new 
ByteArrayInputStream(HttpUtils.readUrl(conn).getBytes()),
-        handler, new Metadata(), new ParseContext());
-    
-    return handler.getLinks();
+       public static List<HttpFile> findLinks(HttpFile file) throws 
IOException, URISyntaxException {
+               // Pattern looking for <a href="(group-1)"/>(group-2)</a>
+               Pattern linkPattern = 
Pattern.compile("<\\s*a\\s+href\\s*=\\s*\"(.+?)\"\\s*>(.+?)<\\s*/\\s*a\\s*>"); 
+               Matcher matcher = 
linkPattern.matcher(HttpUtils.readUrl(connect(file.getLink())));
+               List<HttpFile> httpFiles = new ArrayList<HttpFile>();
+               while (matcher.find()) {
+                       String link = matcher.group(1);
+                       String virtualPath = matcher.group(2);
+                       URL url = resolveUri(file.getLink().toURI(), 
link).toURL();
+                       httpFiles.add(new HttpFile(link, isDirectory(url, 
virtualPath), url, file));
+               }
+               return httpFiles;
        }
        
        public static HttpFile toHttpFile(Link link, HttpFile parent) throws 
IOException {

Modified: 
oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java
URL: 
http://svn.apache.org/viewvc/oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java?rev=1130871&r1=1130870&r2=1130871&view=diff
==============================================================================
--- 
oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java
 (original)
+++ 
oodt/branches/protocol/protocol-http/src/test/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java
 Thu Jun  2 23:26:28 2011
@@ -23,6 +23,12 @@ import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.util.List;
+
+import org.apache.oodt.cas.protocol.http.HttpFile;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.sax.Link;
+import org.xml.sax.SAXException;
 
 //JUnits imports
 import junit.framework.TestCase;
@@ -34,7 +40,11 @@ import junit.framework.TestCase;
  */
 public class TestHttpUtils extends TestCase {
        
-       private static final String URL_OF_THIS_TEST = 
"http://svn.apache.org/repos/asf/oodt/branches/protocol/protocol-http/src/main/java/org/apache/oodt/cas/protocol/http/util/TestHttpUtils.java";;
+       private static final String APACHE_SVN_SITE = "http://svn.apache.org";;
+       
+       private static final String PROTOCOL_HTTP_SVN_LOC = 
"/repos/asf/oodt/branches/protocol/protocol-http";
+       private static final String PARENT_URL_OF_THIS_TEST = 
PROTOCOL_HTTP_SVN_LOC + "/src/test/org/apache/oodt/cas/protocol/http/util";
+       private static final String URL_OF_THIS_TEST = PARENT_URL_OF_THIS_TEST 
+ "/TestHttpUtils.java";
        
        public void testResolveUri() throws URISyntaxException {
                URI baseUri = new URI("http://localhost/base/directory/";);
@@ -53,11 +63,33 @@ public class TestHttpUtils extends TestC
        }
        
        public void testConnectUrl() throws MalformedURLException, IOException {
-               HttpURLConnection conn = HttpUtils.connect(new 
URL(URL_OF_THIS_TEST));
+               HttpURLConnection conn = HttpUtils.connect(new 
URL(APACHE_SVN_SITE + URL_OF_THIS_TEST));
                assertNotSame(0, conn.getDate());
                String urlText = HttpUtils.readUrl(conn);
                assertTrue(urlText.contains("public class TestHttpUtils extends 
TestCase {"));
                conn.disconnect();
        }
+       
+       public void testRedirector() throws MalformedURLException {
+               URL url = new URL("http://localhost:80";);
+               URL redirectedURL = new URL("http://localhost:8080";);
+               assertFalse(HttpUtils.checkForRedirection(url, url));
+               assertTrue(HttpUtils.checkForRedirection(url, redirectedURL));
+       }
 
+       public void testFindLinks() throws MalformedURLException, IOException, 
URISyntaxException {
+               URL url = new URL(APACHE_SVN_SITE + PARENT_URL_OF_THIS_TEST);
+               HttpFile parent = new HttpFile(PARENT_URL_OF_THIS_TEST, true, 
url, null);
+               HttpURLConnection conn = HttpUtils.connect(url);
+               System.out.println(HttpUtils.readUrl(conn));
+               List<HttpFile> httpFiles = HttpUtils.findLinks(parent);
+               boolean foundThisTest = false;
+               for (HttpFile httpFile : httpFiles) {
+                       if (httpFile.getName().equals("TestHttpUtils.java")) {
+                               foundThisTest = true;
+                               break;
+                       }
+               }
+               assertTrue(foundThisTest);
+       }
 }


Reply via email to