This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_1x by this push:
     new 2006dc5  TIKA-3352: Add json output for /tika endpoint in tika-server
2006dc5 is described below

commit 2006dc566c28f2655a8b7e625c49d6d1591e3d48
Author: tallison <[email protected]>
AuthorDate: Wed Apr 14 09:52:00 2021 -0400

    TIKA-3352: Add json output for /tika endpoint in tika-server
---
 CHANGES.txt                                        |  2 +
 .../java/org/apache/tika/server/TikaServerCli.java |  2 +-
 .../server/resource/RecursiveMetadataResource.java |  4 +-
 .../apache/tika/server/resource/TikaResource.java  | 95 ++++++++++++++++++++-
 .../java/org/apache/tika/server/CXFTestBase.java   |  6 +-
 .../tika/server/RecursiveMetadataResourceTest.java | 17 ++++
 .../org/apache/tika/server/StackTraceTest.java     | 17 +++-
 .../server/TikaResourceMetadataFilterTest.java     | 83 ++++++++++++++++++
 .../tika/server/TikaResourceNoStackTest.java       | 83 ++++++++++++++++++
 .../org/apache/tika/server/TikaResourceTest.java   | 98 ++++++++++++++++++++++
 .../resources/configs/metadata-filter-include.xml  | 30 +++++++
 .../src/test/resources/mock/hello_world.xml        | 26 ++++++
 .../src/test/resources/mock/hello_world_long.xml   | 30 +++++++
 13 files changed, 486 insertions(+), 7 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 4c3464b..bc05fc7 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
 Release 1.27 - ???
 
+   * Add json output for /tika endpoint in tika-server (TIKA-3352).
+
    * Tika's PDFParser should use the underlying file if one is passed in
      via a TikaInputStream (TIKA-3350)
 
diff --git 
a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java 
b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
index 5b88a66..336ffb5 100644
--- a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
+++ b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
@@ -307,7 +307,7 @@ public class TikaServerCli {
             } else {
                 serverStatus = new ServerStatus(serverId, 0, true);
             }
-            TikaResource.init(tika, digester, inputStreamFactory, 
serverStatus);
+            TikaResource.init(tika, returnStackTrace, digester, 
inputStreamFactory, serverStatus);
             JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
 
             List<ResourceProvider> rCoreProviders = new ArrayList<>();
diff --git 
a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
 
b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
index 71e7180..ec37779 100644
--- 
a/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
+++ 
b/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
@@ -45,8 +45,8 @@ import org.slf4j.LoggerFactory;
 @Path("/rmeta")
 public class RecursiveMetadataResource {
 
-    private static final String HANDLER_TYPE_PARAM = "handler";
-    private static final BasicContentHandlerFactory.HANDLER_TYPE 
DEFAULT_HANDLER_TYPE =
+    protected static final String HANDLER_TYPE_PARAM = "handler";
+    protected static final BasicContentHandlerFactory.HANDLER_TYPE 
DEFAULT_HANDLER_TYPE =
             BasicContentHandlerFactory.HANDLER_TYPE.XML;
     private static final Logger LOG = 
LoggerFactory.getLogger(RecursiveMetadataResource.class);
 
diff --git 
a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java 
b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
index 118d7c3..3425741 100644
--- 
a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
+++ 
b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
@@ -27,6 +27,7 @@ import org.apache.tika.Tika;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.EncryptedDocumentException;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.extractor.DocumentSelector;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaMetadataKeys;
@@ -40,12 +41,16 @@ import org.apache.tika.parser.PasswordProvider;
 import org.apache.tika.parser.html.BoilerpipeContentHandler;
 import org.apache.tika.parser.ocr.TesseractOCRConfig;
 import org.apache.tika.parser.pdf.PDFParserConfig;
+import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
+import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ExpandedTitleContentHandler;
 import org.apache.tika.sax.RichTextContentHandler;
 import org.apache.tika.server.InputStreamFactory;
 import org.apache.tika.server.ServerStatus;
 import org.apache.tika.server.TikaServerParseException;
+import org.apache.tika.utils.ExceptionUtils;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.ContentHandler;
@@ -56,6 +61,7 @@ import javax.ws.rs.GET;
 import javax.ws.rs.POST;
 import javax.ws.rs.PUT;
 import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
 import javax.ws.rs.Produces;
 import javax.ws.rs.WebApplicationException;
 import javax.ws.rs.core.Context;
@@ -83,6 +89,8 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
+import static 
org.apache.tika.server.resource.RecursiveMetadataResource.DEFAULT_HANDLER_TYPE;
+import static 
org.apache.tika.server.resource.RecursiveMetadataResource.HANDLER_TYPE_PARAM;
 
 @Path("/tika")
 public class TikaResource {
@@ -103,9 +111,12 @@ public class TikaResource {
     private static DigestingParser.Digester digester = null;
     private static InputStreamFactory inputStreamFactory = null;
     private static ServerStatus SERVER_STATUS = null;
-    public static void init(TikaConfig config, DigestingParser.Digester 
digestr,
+    private static boolean INCLUDE_STACK_TRACE = false;
+    public static void init(TikaConfig config,
+                            boolean includeStackTrace, 
DigestingParser.Digester digestr,
                             InputStreamFactory iSF, ServerStatus serverStatus) 
{
         tikaConfig = config;
+        INCLUDE_STACK_TRACE = includeStackTrace;
         digester = digestr;
         inputStreamFactory = iSF;
         SERVER_STATUS = serverStatus;
@@ -586,6 +597,88 @@ public class TikaResource {
                 metadata, httpHeaders.getRequestHeaders(), info, "xml");
     }
 
+
+    @POST
+    @Consumes("multipart/form-data")
+    @Produces("application/json")
+    @Path("form{" + HANDLER_TYPE_PARAM + " : (\\w+)?}")
+    public Metadata getJsonFromMultipart(Attachment att,
+                                         @Context HttpHeaders httpHeaders,
+                                         @Context final UriInfo info,
+                                         @PathParam(HANDLER_TYPE_PARAM)
+                                                 String handlerTypeName)
+            throws IOException, TikaException {
+        Metadata metadata = new Metadata();
+        parseToMetadata(getInputStream(att.getObject(InputStream.class), 
metadata, httpHeaders),
+                metadata, preparePostHeaderMap(att, httpHeaders), info, 
handlerTypeName);
+        TikaResource.getConfig().getMetadataFilter().filter(metadata);
+        return metadata;
+    }
+
+    @PUT
+    @Consumes("*/*")
+    @Produces("application/json")
+    @Path("{" + HANDLER_TYPE_PARAM + " : (\\w+)?}")
+    public Metadata getJson(final InputStream is, @Context
+            HttpHeaders httpHeaders,
+                            @Context final UriInfo info, 
@PathParam(HANDLER_TYPE_PARAM)
+                                    String handlerTypeName)
+            throws IOException, TikaException {
+        Metadata metadata = new Metadata();
+        parseToMetadata(getInputStream(is, metadata, httpHeaders), metadata,
+                httpHeaders.getRequestHeaders(), info, handlerTypeName);
+        TikaResource.getConfig().getMetadataFilter().filter(metadata);
+        return metadata;
+    }
+
+    private void parseToMetadata(InputStream inputStream,
+                                 Metadata metadata,
+                                 MultivaluedMap<String, String> httpHeaders,
+                                 UriInfo info, String handlerTypeName) throws 
IOException {
+        final Parser parser = createParser();
+        final ParseContext context = new ParseContext();
+
+        fillMetadata(parser, metadata, context, httpHeaders);
+        fillParseContext(context, httpHeaders, parser);
+
+
+        logRequest(LOG, info, metadata);
+        int writeLimit = -1;
+        if (httpHeaders.containsKey("writeLimit")) {
+            writeLimit = Integer.parseInt(httpHeaders.getFirst("writeLimit"));
+        }
+        BasicContentHandlerFactory.HANDLER_TYPE type =
+                BasicContentHandlerFactory.parseHandlerType(handlerTypeName, 
DEFAULT_HANDLER_TYPE);
+        BasicContentHandlerFactory fact = new BasicContentHandlerFactory(type, 
writeLimit);
+        ContentHandler contentHandler = fact.getNewContentHandler();
+
+        try {
+            parse(parser, LOG, info.getPath(), inputStream, contentHandler, 
metadata, context);
+        } catch (TikaServerParseException e) {
+            if (INCLUDE_STACK_TRACE) {
+                Throwable cause = e.getCause();
+                if (cause != null) {
+                    
metadata.add(AbstractRecursiveParserWrapperHandler.CONTAINER_EXCEPTION,
+                            ExceptionUtils.getStackTrace(cause));
+                } else {
+                    
metadata.add(AbstractRecursiveParserWrapperHandler.CONTAINER_EXCEPTION,
+                            ExceptionUtils.getStackTrace(e));
+                }
+            } else {
+                throw e;
+            }
+        } catch (OutOfMemoryError e) {
+            if (INCLUDE_STACK_TRACE) {
+                
metadata.add(AbstractRecursiveParserWrapperHandler.CONTAINER_EXCEPTION,
+                        ExceptionUtils.getStackTrace(e));
+            } else {
+                throw e;
+            }
+        } finally {
+            metadata.add(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT, 
contentHandler.toString());
+        }
+    }
+
     private StreamingOutput produceOutput(final InputStream is, Metadata 
metadata, final MultivaluedMap<String, String> httpHeaders,
                                           final UriInfo info, final String 
format) {
         final Parser parser = createParser();
diff --git a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java 
b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
index ada3ce7..cfbafd0 100644
--- a/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
+++ b/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java
@@ -92,7 +92,7 @@ public abstract class CXFTestBase {
     public void setUp() throws Exception {
 
         this.tika = new TikaConfig(getTikaConfigInputStream());
-        TikaResource.init(tika,
+        TikaResource.init(tika, isIncludeStackTrace(),
                 new CommonsDigester(DIGESTER_READ_LIMIT, "md5,sha1:32"),
                 new DefaultInputStreamFactory(), new ServerStatus("", 0,true));
         JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();
@@ -121,6 +121,10 @@ public abstract class CXFTestBase {
         server = sf.create();
     }
 
+    protected boolean isIncludeStackTrace() {
+        return false;
+    }
+
     protected InputStream getTikaConfigInputStream() {
         return 
getClass().getResourceAsStream("tika-config-for-server-tests.xml");
     }
diff --git 
a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
 
b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
index 36ddf3c..d0c84c7 100644
--- 
a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
+++ 
b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
@@ -60,6 +60,7 @@ public class RecursiveMetadataResourceTest extends 
CXFTestBase {
     private static final String SLASH = "/";
 
     private static final String TEST_RECURSIVE_DOC = 
"test_recursive_embedded.docx";
+    private static final String TEST_NULL_POINTER = "mock/null_pointer.xml";
 
     @Override
     protected void setUpResources(JAXRSServerFactoryBean sf) {
@@ -372,4 +373,20 @@ public class RecursiveMetadataResourceTest extends 
CXFTestBase {
                 
metadataList.get(6).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
 
     }
+
+    @Test
+    public void testNPE() throws Exception {
+        Response response = WebClient.create(endPoint + 
META_PATH).accept("application/json")
+                .put(ClassLoader.getSystemResourceAsStream(TEST_NULL_POINTER));
+
+        Reader reader = new InputStreamReader((InputStream) 
response.getEntity(), UTF_8);
+        List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
+        Metadata metadata = metadataList.get(0);
+        assertEquals("Nikolai Lobachevsky", metadata.get("author"));
+        assertEquals("application/mock+xml", 
metadata.get(Metadata.CONTENT_TYPE));
+        assertContains("some content", 
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+        assertContains("null pointer message",
+                
metadata.get(AbstractRecursiveParserWrapperHandler.CONTAINER_EXCEPTION));
+
+    }
 }
diff --git 
a/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java 
b/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java
index 5115fd0..b3821b3 100644
--- a/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java
@@ -78,15 +78,24 @@ public class StackTraceTest extends CXFTestBase {
         sf.setProviders(providers);
     }
 
+    @Override
+    protected boolean isIncludeStackTrace() {
+        return true;
+    }
+
     @Test
     public void testEncrypted() throws Exception {
         for (String path : PATHS) {
             if ("/rmeta".equals(path)) {
                 continue;
             }
+            String accept = "*/*";
+            if ("/tika".equals(path)) {
+                accept = "text/plain";
+            }
             Response response = WebClient
                     .create(endPoint + path)
-                    .accept("*/*")
+                    .accept(accept)
                     .header("Content-Disposition",
                             "attachment; filename=" + TEST_PASSWORD_PROTECTED)
                     
.put(ClassLoader.getSystemResourceAsStream(TEST_PASSWORD_PROTECTED));
@@ -105,9 +114,13 @@ public class StackTraceTest extends CXFTestBase {
             if ("/rmeta".equals(path)) {
                 continue;
             }
+            String accept = "*/*";
+            if ("/tika".equals(path)) {
+                accept = "text/plain";
+            }
             Response response = WebClient
                     .create(endPoint + path)
-                    .accept("*/*")
+                    .accept(accept)
                     .put(ClassLoader.getSystemResourceAsStream(TEST_NULL));
             assertNotNull("null response: " + path, response);
             assertEquals("unprocessable: " + path, UNPROCESSEABLE, 
response.getStatus());
diff --git 
a/tika-server/src/test/java/org/apache/tika/server/TikaResourceMetadataFilterTest.java
 
b/tika-server/src/test/java/org/apache/tika/server/TikaResourceMetadataFilterTest.java
new file mode 100644
index 0000000..4bf44ad
--- /dev/null
+++ 
b/tika-server/src/test/java/org/apache/tika/server/TikaResourceMetadataFilterTest.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import javax.ws.rs.core.Response;
+
+import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.junit.Test;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.serialization.JsonMetadata;
+import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
+import org.apache.tika.server.resource.TikaResource;
+import org.apache.tika.server.writer.JSONMessageBodyWriter;
+
+public class TikaResourceMetadataFilterTest extends CXFTestBase {
+
+    public static final String TEST_HELLO_WORLD = "mock/hello_world.xml";
+
+    private static final String TIKA_PATH = "/tika";
+
+    @Override
+    protected InputStream getTikaConfigInputStream() {
+        return 
getClass().getResourceAsStream("/configs/metadata-filter-include.xml");
+    }
+
+    @Override
+    protected void setUpResources(JAXRSServerFactoryBean sf) {
+        sf.setResourceClasses(TikaResource.class);
+        sf.setResourceProvider(TikaResource.class,
+                new SingletonResourceProvider(new TikaResource()));
+    }
+
+    @Override
+    protected void setUpProviders(JAXRSServerFactoryBean sf) {
+        List<Object> providers = new ArrayList<Object>();
+        providers.add(new TikaServerParseExceptionMapper(false));
+        providers.add(new JSONMessageBodyWriter());
+        sf.setProviders(providers);
+    }
+
+
+    @Test
+    public void testBasic() throws Exception {
+        Response response = WebClient.create(endPoint + TIKA_PATH).accept(
+                "application/json")
+                .put(ClassLoader.getSystemResourceAsStream(TEST_HELLO_WORLD));
+        Metadata metadata =
+                JsonMetadata.fromJson(new InputStreamReader(
+                        ((InputStream)response.getEntity()), 
StandardCharsets.UTF_8));
+        assertEquals(2, metadata.names().length);
+        assertNull(metadata.get("author"));
+        assertEquals("application/mock+xml", 
metadata.get(Metadata.CONTENT_TYPE));
+        assertContains("hello world", 
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+    }
+
+}
diff --git 
a/tika-server/src/test/java/org/apache/tika/server/TikaResourceNoStackTest.java 
b/tika-server/src/test/java/org/apache/tika/server/TikaResourceNoStackTest.java
new file mode 100644
index 0000000..4f231b2
--- /dev/null
+++ 
b/tika-server/src/test/java/org/apache/tika/server/TikaResourceNoStackTest.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import javax.ws.rs.core.Response;
+
+import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.junit.Test;
+
+import org.apache.tika.server.resource.TikaResource;
+import org.apache.tika.server.writer.JSONMessageBodyWriter;
+
+public class TikaResourceNoStackTest extends CXFTestBase {
+
+    public static final String TEST_HELLO_WORLD_LONG = 
"mock/hello_world_long.xml";
+    public static final String TEST_NULL_POINTER = "mock/null_pointer.xml";
+
+    private static final String TIKA_PATH = "/tika";
+
+    @Override
+    protected boolean isIncludeStackTrace() {
+        return false;
+    }
+
+    @Override
+    protected void setUpResources(JAXRSServerFactoryBean sf) {
+        sf.setResourceClasses(TikaResource.class);
+        sf.setResourceProvider(TikaResource.class,
+                new SingletonResourceProvider(new TikaResource()));
+    }
+
+    @Override
+    protected void setUpProviders(JAXRSServerFactoryBean sf) {
+        List<Object> providers = new ArrayList<Object>();
+        providers.add(new TikaServerParseExceptionMapper(false));
+        providers.add(new JSONMessageBodyWriter());
+        sf.setProviders(providers);
+    }
+
+    @Test
+    public void testJsonNPE() throws Exception {
+        Response response = WebClient.create(endPoint + TIKA_PATH).accept(
+                "application/json")
+                .put(ClassLoader.getSystemResourceAsStream(TEST_NULL_POINTER));
+        assertEquals(422, response.getStatus());
+        String content = getStringFromInputStream((InputStream) 
response.getEntity());
+        assertEquals(0, content.length());
+    }
+
+    @Test
+    public void testJsonWriteLimit() throws Exception {
+        Response response = WebClient.create(endPoint + TIKA_PATH)
+                .header("writeLimit", "100")
+                .accept("application/json")
+                
.put(ClassLoader.getSystemResourceAsStream(TEST_HELLO_WORLD_LONG));
+        assertEquals(500, response.getStatus());
+        String content = getStringFromInputStream((InputStream) 
response.getEntity());
+        assertEquals(0, content.length());
+    }
+
+}
diff --git 
a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java 
b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
index ddfd316..8940a18 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
@@ -26,9 +26,16 @@ import org.apache.cxf.jaxrs.ext.multipart.Attachment;
 import org.apache.cxf.jaxrs.ext.multipart.ContentDisposition;
 import org.apache.cxf.jaxrs.ext.multipart.MultipartBody;
 import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.serialization.JsonMetadata;
 import org.apache.tika.parser.ocr.TesseractOCRConfig;
 import org.apache.tika.parser.ocr.TesseractOCRParser;
+import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
 import org.apache.tika.server.resource.TikaResource;
+import org.apache.tika.server.writer.JSONMessageBodyWriter;
+
 import org.junit.Test;
 
 import javax.ws.rs.ProcessingException;
@@ -36,6 +43,7 @@ import javax.ws.rs.core.MediaType;
 import javax.ws.rs.core.Response;
 import java.io.FileNotFoundException;
 import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.net.URISyntaxException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
@@ -52,6 +60,11 @@ public class TikaResourceTest extends CXFTestBase {
     public static final String TEST_PASSWORD_PROTECTED = "password.xls";
     private static final String TEST_RECURSIVE_DOC = 
"test_recursive_embedded.docx";
     private static final String TEST_OOM = "mock/fake_oom.xml";
+    public static final String TEST_HELLO_WORLD = "mock/hello_world.xml";
+    public static final String TEST_HELLO_WORLD_LONG = 
"mock/hello_world_long.xml";
+    public static final String TEST_NULL_POINTER = "mock/null_pointer.xml";
+
+
 
     private static final String STREAM_CLOSED_FAULT = "java.io.IOException: 
Stream Closed";
 
@@ -59,6 +72,12 @@ public class TikaResourceTest extends CXFTestBase {
     private static final String TIKA_POST_PATH = "/tika/form";
     private static final int UNPROCESSEABLE = 422;
 
+
+    @Override
+    protected boolean isIncludeStackTrace() {
+        return true;
+    }
+
     @Override
     protected void setUpResources(JAXRSServerFactoryBean sf) {
         sf.setResourceClasses(TikaResource.class);
@@ -69,6 +88,7 @@ public class TikaResourceTest extends CXFTestBase {
     @Override
     protected void setUpProviders(JAXRSServerFactoryBean sf) {
         List<Object> providers = new ArrayList<Object>();
+        providers.add(new JSONMessageBodyWriter());
         providers.add(new TikaServerParseExceptionMapper(false));
         sf.setProviders(providers);
     }
@@ -604,4 +624,82 @@ public class TikaResourceTest extends CXFTestBase {
         return new MultipartBody(att);
     }
 
+    @Test
+    public void testJson() throws Exception {
+        Response response = WebClient.create(endPoint + TIKA_PATH).accept(
+                "application/json")
+                .put(ClassLoader.getSystemResourceAsStream(TEST_HELLO_WORLD));
+        Metadata metadata =
+                JsonMetadata.fromJson(new InputStreamReader(
+                        ((InputStream)response.getEntity()), 
StandardCharsets.UTF_8));
+
+        assertEquals("Nikolai Lobachevsky", metadata.get("author"));
+        assertEquals("application/mock+xml", 
metadata.get(Metadata.CONTENT_TYPE));
+        assertContains("hello world",
+                
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+    }
+
+    @Test
+    public void testJsonNPE() throws Exception {
+        Response response = WebClient.create(endPoint + TIKA_PATH).accept(
+                "application/json")
+                .put(ClassLoader.getSystemResourceAsStream(TEST_NULL_POINTER));
+        Metadata metadata =
+                JsonMetadata.fromJson(new InputStreamReader(
+                        ((InputStream)response.getEntity()), 
StandardCharsets.UTF_8));
+
+        assertEquals("Nikolai Lobachevsky", metadata.get("author"));
+        assertEquals("application/mock+xml", 
metadata.get(Metadata.CONTENT_TYPE));
+        assertContains("some content", 
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+        assertContains("null pointer message",
+                
metadata.get(AbstractRecursiveParserWrapperHandler.CONTAINER_EXCEPTION));
+    }
+
+    @Test
+    public void testJsonWriteLimit() throws Exception {
+        Response response = WebClient.create(endPoint + TIKA_PATH)
+                .header("writeLimit", "100")
+                .accept("application/json")
+                
.put(ClassLoader.getSystemResourceAsStream(TEST_HELLO_WORLD_LONG));
+        Metadata metadata =
+                JsonMetadata.fromJson(new InputStreamReader(
+                        ((InputStream)response.getEntity()), 
StandardCharsets.UTF_8));
+
+        assertEquals("Nikolai Lobachevsky", metadata.get("author"));
+        assertEquals("application/mock+xml", 
metadata.get(Metadata.CONTENT_TYPE));
+        assertContains("Hello world", 
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+        assertNotFound("dissolve", 
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+        
assertTrue(metadata.get(AbstractRecursiveParserWrapperHandler.CONTAINER_EXCEPTION).startsWith(
+                
"org.apache.tika.sax.WriteOutContentHandler$WriteLimitReachedException"
+        ));
+    }
+
+    @Test
+    public void testJsonHandlerType() throws Exception {
+        Response response = WebClient.create(endPoint + TIKA_PATH)
+                .accept("application/json")
+                
.put(ClassLoader.getSystemResourceAsStream(TEST_HELLO_WORLD_LONG));
+        Metadata metadata =
+                JsonMetadata.fromJson(new InputStreamReader(
+                        ((InputStream)response.getEntity()), 
StandardCharsets.UTF_8));
+
+        assertEquals("Nikolai Lobachevsky", metadata.get("author"));
+        assertEquals("application/mock+xml", 
metadata.get(Metadata.CONTENT_TYPE));
+        assertContains("Hello world", 
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+        //default is xhtml
+        assertContains("<p>", 
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+
+        response = WebClient.create(endPoint + TIKA_PATH + "/text")
+                .accept("application/json")
+                
.put(ClassLoader.getSystemResourceAsStream(TEST_HELLO_WORLD_LONG));
+        metadata =
+                JsonMetadata.fromJson(new InputStreamReader(
+                        ((InputStream)response.getEntity()), 
StandardCharsets.UTF_8));
+
+        assertEquals("Nikolai Lobachevsky", metadata.get("author"));
+        assertEquals("application/mock+xml", 
metadata.get(Metadata.CONTENT_TYPE));
+        assertContains("Hello world", 
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+        assertNotFound("<p>", 
metadata.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+    }
+
 }
diff --git a/tika-server/src/test/resources/configs/metadata-filter-include.xml 
b/tika-server/src/test/resources/configs/metadata-filter-include.xml
new file mode 100644
index 0000000..3a7a7c1
--- /dev/null
+++ b/tika-server/src/test/resources/configs/metadata-filter-include.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <metadataFilters>
+        <metadataFilter 
class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
+            <params>
+                <param name="include" type="list">
+                    <string>X-TIKA:content</string>
+                    <string>extended-properties:Application</string>
+                    <string>Content-Type</string>
+                </param>
+            </params>
+        </metadataFilter>
+    </metadataFilters>
+</properties>
diff --git a/tika-server/src/test/resources/mock/hello_world.xml 
b/tika-server/src/test/resources/mock/hello_world.xml
new file mode 100644
index 0000000..27cd62a
--- /dev/null
+++ b/tika-server/src/test/resources/mock/hello_world.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<mock>
+    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+    <metadata action="add" name="title">你好,世界</metadata>
+    <metadata action="add" name="my-key">parsers-value</metadata>
+    <write element="p">hello world</write>
+</mock>
\ No newline at end of file
diff --git a/tika-server/src/test/resources/mock/hello_world_long.xml 
b/tika-server/src/test/resources/mock/hello_world_long.xml
new file mode 100644
index 0000000..bf06ad2
--- /dev/null
+++ b/tika-server/src/test/resources/mock/hello_world_long.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<mock>
+    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+    <metadata action="add" name="title">你好,世界</metadata>
+    <metadata action="add" name="my-key">parsers-value</metadata>
+    <write element="p">Hello world...</write>
+    <write element="p">When in the Course of human events, it becomes 
necessary for one people to dissolve the
+        political bands which have connected them with another, and to assume 
among the powers of the earth, the
+        separate and equal station to which the Laws of Nature and of Nature’s 
God entitle them, a decent respect
+        to the opinions of mankind requires that they should declare the 
causes which impel them to the separation.</write>
+</mock>
\ No newline at end of file

Reply via email to