Author: kwright
Date: Tue Sep 26 07:17:54 2017
New Revision: 1809688

URL: http://svn.apache.org/viewvc?rev=1809688&view=rev
Log:
Fix for CONNECTORS-1459.  Committed on behalf of Julien Massiera.

Modified:
    manifoldcf/trunk/CHANGES.txt
    
manifoldcf/trunk/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/TikaExtractor.java

Modified: manifoldcf/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1809688&r1=1809687&r2=1809688&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Tue Sep 26 07:17:54 2017
@@ -3,6 +3,10 @@ $Id$
 
 ======================= 2.9-dev =====================
 
+CONNECTORS-1459: Allow the Tika service to override the mime type
+in the metadata.
+(Julien Massiera)
+
 CONNECTORS-1458: Update to use SolrJ 7.0.0.  This required some
 revision to the connector, and also to make zookeeper.jar available
 to all connectors as a root dependency.

Modified: 
manifoldcf/trunk/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/TikaExtractor.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/TikaExtractor.java?rev=1809688&r1=1809687&r2=1809688&view=diff
==============================================================================
--- 
manifoldcf/trunk/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/TikaExtractor.java
 (original)
+++ 
manifoldcf/trunk/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/TikaExtractor.java
 Tue Sep 26 07:17:54 2017
@@ -627,7 +627,15 @@ public class TikaExtractor extends org.a
                 }
                 metaJson = (JSONObject) parser.parse(sb.toString());
                 for (Object key : metaJson.keySet()) {
-                  metadata.add(key.toString(), metaJson.get(key).toString());
+                  String metadataKey = key.toString();
+                  String metadataValue =  metaJson.get(key).toString();
+                  
+                  // Replace the content type by the one found by Tika
+                  if(metadataKey.equals("Content-Type")) {
+                    metadata.remove(metadataKey);
+                  }
+                  
+                  metadata.add(metadataKey, metadataValue);
                 }
               } finally {
                 tikaServerIs.close();


Reply via email to