Author: rwesten
Date: Mon Nov 26 10:57:32 2012
New Revision: 1413551

URL: http://svn.apache.org/viewvc?rev=1413551&view=rev
Log:
implementation for STANBOL-809 merged from the Stanbol NLP branch over to the 
trunk.

Modified:
    
stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java

Modified: 
stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java?rev=1413551&r1=1413550&r2=1413551&view=diff
==============================================================================
--- 
stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
 (original)
+++ 
stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
 Mon Nov 26 10:57:32 2012
@@ -139,6 +139,7 @@ public class TikaEngine 
     private ContentItemFactory ciFactory;
     
     private static class MediaTypeAndStream {
+        String uri;
         MediaType mediaType;
         InputStream in;
     }
@@ -265,10 +266,13 @@ public class TikaEngine 
     private MediaTypeAndStream extractMediaType(ContentItem ci) {
         MediaTypeAndStream mtas = new MediaTypeAndStream();
         mtas.mediaType = getMediaType(ci.getBlob());
+        mtas.uri = ci.getUri().getUnicodeString();
         if(mtas.mediaType == null || 
mtas.mediaType.equals(MediaType.OCTET_STREAM)){
             mtas.in = new BufferedInputStream(ci.getStream());
+            Metadata m = new Metadata();
+            m.add(Metadata.RESOURCE_NAME_KEY, mtas.uri);
             try {
-                mtas.mediaType = detector.detect(mtas.in, new Metadata());
+                mtas.mediaType = detector.detect(mtas.in, m);
             } catch (IOException e) {
                 log.warn("Exception while detection the MediaType of the" +
                         "parsed ContentItem "+ci.getUri(),e);


Reply via email to