Repository: cxf
Updated Branches:
  refs/heads/3.1.x-fixes 1ac741ef5 -> fbe8db21d


Adding ContentHandler back to TikaContent


Project: http://git-wip-us.apache.org/repos/asf/cxf/repo
Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/fbe8db21
Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/fbe8db21
Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/fbe8db21

Branch: refs/heads/3.1.x-fixes
Commit: fbe8db21d300177b37d5f9b0aa3a4e1c99bde857
Parents: 1ac741e
Author: Sergey Beryozkin <[email protected]>
Authored: Thu Nov 10 14:15:57 2016 +0000
Committer: Sergey Beryozkin <[email protected]>
Committed: Thu Nov 10 14:17:44 2016 +0000

----------------------------------------------------------------------
 .../jaxrs/ext/search/tika/TikaContentExtractor.java    | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cxf/blob/fbe8db21/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
----------------------------------------------------------------------
diff --git 
a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
 
b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
index e4d1918..d69da2d 100644
--- 
a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
+++ 
b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
@@ -162,7 +162,7 @@ public class TikaContentExtractor {
      * @return the extracted content and metadata or null if extraction is not 
possible 
      *         or was unsuccessful
      */
-    public TikaContent extract(final InputStream in, final ContentHandler 
handler, 
+    public TikaContent extract(final InputStream in, ContentHandler handler, 
                                javax.ws.rs.core.MediaType mtHint, ParseContext 
context) {    
         if (in == null) {
             return null;
@@ -215,12 +215,13 @@ public class TikaContentExtractor {
                 // extraction process. If we get an exception with a null 
handler then a given parser is still 
                 // not ready to accept null handlers so lets retry with 
IgnoreContentHandler.
                 if (handler == null) {
-                    parser.parse(in, new IgnoreContentHandler(), metadata, 
context);
+                    handler = new IgnoreContentHandler();
+                    parser.parse(in, handler, metadata, context);
                 } else {
                     throw ex;
                 }
             }
-            return new TikaContent(handler == null ? null : 
handler.toString(), metadata, mediaType);
+            return new TikaContent(handler, metadata, mediaType);
         } catch (final IOException ex) {
             LOG.log(Level.WARNING, "Unable to extract media type from input 
stream", ex);
         } catch (final SAXException ex) {
@@ -269,10 +270,10 @@ public class TikaContentExtractor {
      */
     public static class TikaContent implements Serializable {
         private static final long serialVersionUID = -1240120543378490963L;
-        private String content;
+        private ContentHandler content;
         private Metadata metadata;
         private MediaType mediaType;
-        public TikaContent(String content, Metadata metadata, MediaType 
mediaType) {
+        public TikaContent(ContentHandler content, Metadata metadata, 
MediaType mediaType) {
             this.content = content;
             this.metadata = metadata;
             this.mediaType = mediaType;
@@ -283,7 +284,7 @@ public class TikaContentExtractor {
          *         to parse the content  
          */
         public String getContent() {
-            return content;
+            return content instanceof ToTextContentHandler ? 
content.toString() : null;
         }
         /**
          * Return the metadata

Reply via email to