Author: tommaso
Date: Mon Sep 20 05:42:35 2010
New Revision: 998787

URL: http://svn.apache.org/viewvc?rev=998787&view=rev
Log:
[UIMA-1878] - applied patch from Greg Holmberg to handle spaces in path string

Modified:
    
uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/FileSystemCollectionReader.java
    
uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/TIKAWrapper.java

Modified: 
uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/FileSystemCollectionReader.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/FileSystemCollectionReader.java?rev=998787&r1=998786&r2=998787&view=diff
==============================================================================
--- 
uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/FileSystemCollectionReader.java
 (original)
+++ 
uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/FileSystemCollectionReader.java
 Mon Sep 20 05:42:35 2010
@@ -96,11 +96,11 @@ public class FileSystemCollectionReader 
 
                // call Tika wrapper 
                try {
-                       tika.populateCASfromURL(aCAS, file.toURL(), this.mMIME, 
this.mLanguage);
+                       tika.populateCASfromURI(aCAS, file.toURI(), this.mMIME, 
this.mLanguage);
                } catch (CASException e) {
                        getLogger().log(Level.WARNING,"Problem converting file 
: "+file.toURL()+"\t"+e.getMessage());
-               jcas.setDocumentText(" ");
-               return;
+                       throw new IOException(e);
+               //jcas.setDocumentText(" "); return;
                }
        }
 

Modified: 
uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/TIKAWrapper.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/TIKAWrapper.java?rev=998787&r1=998786&r2=998787&view=diff
==============================================================================
--- 
uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/TIKAWrapper.java
 (original)
+++ 
uima/sandbox/trunk/TikaAnnotator/src/main/java/org/apache/uima/tika/TIKAWrapper.java
 Mon Sep 20 05:42:35 2010
@@ -22,7 +22,7 @@ package org.apache.uima.tika;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.net.URL;
+import java.net.URI;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
@@ -33,6 +33,7 @@ import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.util.Level;
 
 
 public class TIKAWrapper {
@@ -54,16 +55,16 @@ public class TIKAWrapper {
        }
        
        
-       public void populateCASfromURL(CAS cas, URL url, String language) 
throws CASException{
-               populateCASfromURL(cas, url, null, language);
+       public void populateCASfromURI(CAS cas, URI uri, String language) 
throws CASException{
+               populateCASfromURI(cas, uri, null, language);
        }
        
-       public void populateCASfromURL(CAS cas, URL url, String mime, String 
language) throws CASException{
+       public void populateCASfromURI(CAS cas, URI uri, String mime, String 
language) throws CASException{
        
                InputStream originalStream=null;
                try {
-                       originalStream = new BufferedInputStream(url
-                                       .openStream());
+                       originalStream = new BufferedInputStream(
+                                       uri.toURL().openStream());
                } catch (IOException e1) {
                        new CASException(e1);
                }
@@ -86,8 +87,8 @@ public class TIKAWrapper {
            catch (Exception e){
                // if we have a problem just dump the message and continue
                // getLogger().log(Level.WARNING,"Problem converting file : 
"+URI+"\t"+e.getMessage());
-               cas.setDocumentText("");
-               return;
+               // cas.setDocumentText(""); return;
+               throw new CASException(e);
            }
            finally {
                        // set language if it was explicitly specified as a 
configuration
@@ -126,7 +127,7 @@ public class TIKAWrapper {
            
            FeatureValue fv = new FeatureValue(jcas);
        fv.setName("uri");
-       fv.setValue(url.toString());
+       fv.setValue(uri.toString());
        docAnnotation.setFeatures(i,fv);
            
            docAnnotation.addToIndexes();


Reply via email to