Repository: cxf
Updated Branches:
  refs/heads/3.1.x-fixes 4aa035327 -> 29129d880


Updating TikaContentExtractor to support the embedded attachments


Project: http://git-wip-us.apache.org/repos/asf/cxf/repo
Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/29129d88
Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/29129d88
Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/29129d88

Branch: refs/heads/3.1.x-fixes
Commit: 29129d880455fcd535fdb9004a7677fcd1f51906
Parents: 4aa0353
Author: Sergey Beryozkin <sberyoz...@gmail.com>
Authored: Thu Sep 15 11:21:46 2016 +0100
Committer: Sergey Beryozkin <sberyoz...@gmail.com>
Committed: Thu Sep 15 11:22:54 2016 +0100

----------------------------------------------------------------------
 .../ext/search/tika/TikaContentExtractor.java   | 40 +++++++++++++++-----
 1 file changed, 31 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cxf/blob/29129d88/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
----------------------------------------------------------------------
diff --git 
a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
 
b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
index fd3511a..e4d1918 100644
--- 
a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
+++ 
b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
@@ -36,6 +36,7 @@ import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.ToTextContentHandler;
@@ -47,6 +48,13 @@ public class TikaContentExtractor {
     private final Detector detector;
     
     /**
+     * Create new Tika-based content extractor using AutoDetectParser.  
+     */
+    public TikaContentExtractor() {
+        this(new AutoDetectParser(), false);
+    }
+    
+    /**
      * Create new Tika-based content extractor using the provided parser 
instance.  
      * @param parser parser instance
      */
@@ -159,9 +167,6 @@ public class TikaContentExtractor {
         if (in == null) {
             return null;
         }
-        if (context == null) {
-            context = new ParseContext();
-        }
         final Metadata metadata = new Metadata();            
         
         try {
@@ -171,20 +176,37 @@ public class TikaContentExtractor {
                 mediaType = MediaType.parse(mtHint.toString());
             } else if (detector != null && in.markSupported()) {
                 mediaType = detector.detect(in, metadata);
-            } 
+            }
+            if (mediaType != null) {
+                metadata.set(Metadata.CONTENT_TYPE, mediaType.toString());
+            }
             
             Parser parser = null;
-            for (Parser p : parsers) {
-                if (mediaType != null && 
!p.getSupportedTypes(context).contains(mediaType)) {
-                    continue;
+            if (parsers.size() == 1) {
+                parser = parsers.get(0);
+            } else {
+                for (Parser p : parsers) {
+                    if (mediaType != null && 
!p.getSupportedTypes(context).contains(mediaType)) {
+                        continue;
+                    }
+                    parser = p;
+                    break;
                 }
-                parser = p;
-                break;
             }
             if (parser == null) {
                 return null;
             }
             
+            if (context == null) {
+                context = new ParseContext();
+            }
+            if (context.get(Parser.class) == null) {
+                // to process the embedded attachments
+                context.set(Parser.class, 
+                            parser instanceof AutoDetectParser ? parser : new 
AutoDetectParser());
+            }
+            
+            
             try {
                 parser.parse(in, handler, metadata, context);
             } catch (Exception ex) {

Reply via email to