Author: rfrovarp
Date: Mon Dec  5 03:20:04 2011
New Revision: 1210317

URL: http://svn.apache.org/viewvc?rev=1210317&view=rev
Log:
Fix DROIDS-161

Modified:
    
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
    
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java

Modified: 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java?rev=1210317&r1=1210316&r2=1210317&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
 (original)
+++ 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
 Mon Dec  5 03:20:04 2011
@@ -96,7 +96,13 @@ public class TikaDocumentParser implemen
       if (task instanceof LinkTask) {
              for(org.apache.tika.sax.Link tikaLink : linkHandler.getLinks()) {
                try {
-                 extractedTasks.add(new LinkTask((LinkTask)task, new 
URI(tikaLink.getUri()), depth, tikaLink.getText()));
+                 URI uri = new URI(tikaLink.getUri());
+            // Test to see if the scheme is empty
+            // This would indicate a relative URL, so resolve it against the 
task URI
+            if(uri.getScheme() == null) {
+              uri = ((Link) task).getURI().resolve(uri);
+            }
+            extractedTasks.add(new LinkTask((Link)task, uri, depth, 
tikaLink.getText()));
                } catch (URISyntaxException e) {
                  if(log.isWarnEnabled()) {
                    log.warn("URI not valid: "+ tikaLink.getUri());

Modified: 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
URL: 
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java?rev=1210317&r1=1210316&r2=1210317&view=diff
==============================================================================
--- 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
 (original)
+++ 
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
 Mon Dec  5 03:20:04 2011
@@ -99,7 +99,13 @@ public class TikaHtmlParser implements T
              int depth = task.getDepth() + 1;
              for(org.apache.tika.sax.Link tikaLink : linkHandler.getLinks()) {
                try {
-                 extractedTasks.add(new LinkTask((Link)task, new 
URI(tikaLink.getUri()), depth, tikaLink.getText()));
+                 URI uri = new URI(tikaLink.getUri());
+                 // Test to see if the scheme is empty
+                 // This would indicate a relative URL, so resolve it against 
the task URI
+                 if(uri.getScheme() == null) {
+                   uri = ((Link) task).getURI().resolve(uri);
+                 }
+                 extractedTasks.add(new LinkTask((Link)task, uri, depth, 
tikaLink.getText()));
                } catch (URISyntaxException e) {
                  if(log.isWarnEnabled()) {
                    log.warn("URI not valid: "+ tikaLink.getUri());


Reply via email to