Author: kwright
Date: Tue Sep 26 07:17:54 2017
New Revision: 1809688
URL: http://svn.apache.org/viewvc?rev=1809688&view=rev
Log:
Fix for CONNECTORS-1459. Committed on behalf of Julien Massiera.
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/TikaExtractor.java
Modified: manifoldcf/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1809688&r1=1809687&r2=1809688&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Tue Sep 26 07:17:54 2017
@@ -3,6 +3,10 @@ $Id$
======================= 2.9-dev =====================
+CONNECTORS-1459: Allow the Tika service to override the mime type
+in the metadata.
+(Julien Massiera)
+
CONNECTORS-1458: Update to use SolrJ 7.0.0. This required some
revision to the connector, and also to make zookeeper.jar available
to all connectors as a root dependency.
Modified:
manifoldcf/trunk/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/TikaExtractor.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/TikaExtractor.java?rev=1809688&r1=1809687&r2=1809688&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/TikaExtractor.java
(original)
+++
manifoldcf/trunk/connectors/tikaservice/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/TikaExtractor.java
Tue Sep 26 07:17:54 2017
@@ -627,7 +627,15 @@ public class TikaExtractor extends org.a
}
metaJson = (JSONObject) parser.parse(sb.toString());
for (Object key : metaJson.keySet()) {
- metadata.add(key.toString(), metaJson.get(key).toString());
+ String metadataKey = key.toString();
+ String metadataValue = metaJson.get(key).toString();
+
+ // Replace the content type by the one found by Tika
+ if(metadataKey.equals("Content-Type")) {
+ metadata.remove(metadataKey);
+ }
+
+ metadata.add(metadataKey, metadataValue);
}
} finally {
tikaServerIs.close();