Author: tallison Date: Fri Feb 20 19:11:44 2015 New Revision: 1661193 URL: http://svn.apache.org/r1661193 Log: TIKA-1323: allow tika-server to return stack traces from parse exceptions for easier analysis of parser exceptions via tika-server.
Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseException.java - copied, changed from r1661119, tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java tika/trunk/tika-server/src/test/resources/META-INF/ tika/trunk/tika-server/src/test/resources/META-INF/services/ tika/trunk/tika-server/src/test/resources/META-INF/services/org.apache.tika.parser.Parser tika/trunk/tika-server/src/test/resources/evil/ tika/trunk/tika-server/src/test/resources/evil/null_pointer.evil tika/trunk/tika-server/src/test/resources/mime/ tika/trunk/tika-server/src/test/resources/mime/custom-mimetypes.xml Removed: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java Modified: tika/trunk/CHANGES.txt tika/trunk/tika-server/pom.xml tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java Modified: tika/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/CHANGES.txt (original) +++ tika/trunk/CHANGES.txt Fri Feb 20 19:11:44 2015 @@ -1,4 +1,6 @@ Release 1.8 - Current Development + * Tika's JAX-RS server can now return stacktraces for + parse exceptions. (TIKA-1323) * Added EvilParser for testing handling of exceptions, errors and hangs in code that uses parsers. (TIKA-1533) Modified: tika/trunk/tika-server/pom.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/pom.xml?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/pom.xml (original) +++ tika/trunk/tika-server/pom.xml Fri Feb 20 19:11:44 2015 @@ -98,10 +98,16 @@ <scope>test</scope> </dependency> <dependency> + <groupId>${project.groupId}</groupId> + <artifactId>tika-parsers</artifactId> + <version>${project.version}</version> + <type>test-jar</type> + <scope>test</scope> + </dependency> + <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> </dependency> - <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-jcl</artifactId> Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java Fri Feb 20 19:11:44 2015 @@ -17,8 +17,6 @@ package org.apache.tika.server; -import java.io.InputStream; - import javax.ws.rs.Consumes; import javax.ws.rs.POST; import javax.ws.rs.PUT; @@ -31,11 +29,13 @@ import javax.ws.rs.core.MultivaluedMap; import javax.ws.rs.core.Response; import javax.ws.rs.core.UriInfo; +import java.io.IOException; +import java.io.InputStream; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.cxf.jaxrs.ext.multipart.Attachment; import org.apache.tika.config.TikaConfig; - import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.AutoDetectParser; import org.apache.tika.parser.ParseContext; @@ -123,15 +123,14 @@ public class MetadataResource { } private Metadata parseMetadata(InputStream is, - MultivaluedMap<String, String> httpHeaders, UriInfo info) throws Exception { + MultivaluedMap<String, String> httpHeaders, UriInfo info) throws IOException { final Metadata metadata = new Metadata(); final ParseContext context = new ParseContext(); AutoDetectParser parser = TikaResource.createParser(tikaConfig); TikaResource.fillMetadata(parser, metadata, context, httpHeaders); TikaResource.fillParseContext(context, httpHeaders); TikaResource.logRequest(logger, info, metadata); - - parser.parse(is, new DefaultHandler(), metadata, context); + TikaResource.parse(parser, logger, info.getPath(), is, new DefaultHandler(), metadata, context); return metadata; } } Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java Fri Feb 20 19:11:44 2015 @@ -14,35 +14,36 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -package org.apache.tika.server; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.cxf.jaxrs.ext.multipart.Attachment; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.RecursiveParserWrapper; -import org.apache.tika.sax.BasicContentHandlerFactory; -import org.xml.sax.helpers.DefaultHandler; - -import javax.ws.rs.Consumes; -import javax.ws.rs.POST; -import javax.ws.rs.PUT; + +package org.apache.tika.server; + +import javax.ws.rs.Consumes; +import javax.ws.rs.POST; +import javax.ws.rs.PUT; import javax.ws.rs.Path; import javax.ws.rs.Produces; import javax.ws.rs.core.Context; import javax.ws.rs.core.HttpHeaders; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.core.Response; -import javax.ws.rs.core.UriInfo; -import java.io.InputStream; - -@Path("/rmeta") -public class RecursiveMetadataResource { - private static final Log logger = LogFactory.getLog(RecursiveMetadataResource.class); +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.UriInfo; + +import java.io.InputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.cxf.jaxrs.ext.multipart.Attachment; +import org.apache.tika.config.TikaConfig; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.RecursiveParserWrapper; +import org.apache.tika.sax.BasicContentHandlerFactory; +import org.xml.sax.helpers.DefaultHandler; + +@Path("/rmeta") +public class RecursiveMetadataResource { + private static final Log logger = LogFactory.getLog(RecursiveMetadataResource.class); private TikaConfig tikaConfig; @@ -75,11 +76,10 @@ public class RecursiveMetadataResource { BasicContentHandlerFactory.HANDLER_TYPE type = BasicContentHandlerFactory.HANDLER_TYPE.TEXT; RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser, new BasicContentHandlerFactory(type, -1)); - TikaResource.fillMetadata(parser, metadata, context, httpHeaders); - TikaResource.fillParseContext(context, httpHeaders); - TikaResource.logRequest(logger, info, metadata); - - wrapper.parse(is, new DefaultHandler(), metadata, context); - return new MetadataList(wrapper.getMetadata()); - } -} + TikaResource.fillMetadata(parser, metadata, context, httpHeaders); + TikaResource.fillParseContext(context, httpHeaders); + TikaResource.logRequest(logger, info, metadata); + TikaResource.parse(wrapper, logger, info.getPath(), is, new DefaultHandler(), metadata, context); + return new MetadataList(wrapper.getMetadata()); + } +} Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java Fri Feb 20 19:11:44 2015 @@ -53,11 +53,9 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.cxf.jaxrs.ext.multipart.Attachment; import org.apache.poi.extractor.ExtractorFactory; -import org.apache.poi.hwpf.OldWordFileFormatException; import org.apache.tika.config.TikaConfig; import org.apache.tika.detect.Detector; import org.apache.tika.exception.EncryptedDocumentException; -import org.apache.tika.exception.TikaException; import org.apache.tika.io.IOUtils; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; @@ -266,37 +264,7 @@ public static void fillMetadata(AutoDete TikaInputStream tis = TikaInputStream.get(is); try { - parser.parse(tis, body, metadata, context); - } catch (SAXException e) { - throw new WebApplicationException(e); - } catch (EncryptedDocumentException e) { - logger.warn(String.format( - Locale.ROOT, - "%s: Encrypted document", - info.getPath() - ), e); - - throw new WebApplicationException(e, Response.status(422).build()); - } catch (TikaException e) { - logger.warn(String.format( - Locale.ROOT, - "%s: Text extraction failed", - info.getPath() - ), e); - - if (e.getCause()!=null && e.getCause() instanceof WebApplicationException) { - throw (WebApplicationException) e.getCause(); - } - - if (e.getCause()!=null && e.getCause() instanceof IllegalStateException) { - throw new WebApplicationException(Response.status(422).build()); - } - - if (e.getCause()!=null && e.getCause() instanceof OldWordFileFormatException) { - throw new WebApplicationException(Response.status(422).build()); - } - - throw new WebApplicationException(Response.Status.INTERNAL_SERVER_ERROR); + parse(parser, logger, info.getPath(), tis, body, metadata, context); } finally { tis.close(); } @@ -368,44 +336,38 @@ public static void fillMetadata(AutoDete TikaInputStream tis = TikaInputStream.get(is); try { - parser.parse(tis, content, metadata, context); - } - catch (SAXException e) { - throw new WebApplicationException(e); - } - catch (EncryptedDocumentException e) { - logger.warn(String.format( - Locale.ROOT, - "%s: Encrypted document", - info.getPath() - ), e); - throw new WebApplicationException(e, Response.status(422).build()); - } - catch (TikaException e) { - logger.warn(String.format( - Locale.ROOT, - "%s: Text extraction failed", - info.getPath() - ), e); - - if (e.getCause()!=null && e.getCause() instanceof WebApplicationException) - throw (WebApplicationException) e.getCause(); - - if (e.getCause()!=null && e.getCause() instanceof IllegalStateException) - throw new WebApplicationException(Response.status(422).build()); - - if (e.getCause()!=null && e.getCause() instanceof OldWordFileFormatException) - throw new WebApplicationException(Response.status(422).build()); - - throw new WebApplicationException(Response.Status.INTERNAL_SERVER_ERROR); - } - finally { + parse(parser, logger, info.getPath(), tis, content, metadata, context); + } finally { tis.close(); } } }; } + public static void parse(Parser parser, Log logger, String path, InputStream inputStream, + ContentHandler handler, Metadata metadata, ParseContext parseContext) throws IOException { + try { + parser.parse(inputStream, handler, metadata, parseContext); + } catch (SAXException e) { + throw new TikaServerParseException(e); + } catch (EncryptedDocumentException e) { + logger.warn(String.format( + Locale.ROOT, + "%s: Encrypted document", + path + ), e); + throw new TikaServerParseException(e); + } catch (Exception e) { + logger.warn(String.format( + Locale.ROOT, + "%s: Text extraction failed", + path + ), e); + throw new TikaServerParseException(e); + } + } + + public static void logRequest(Log logger, UriInfo info, Metadata metadata) { if (metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE)==null) { logger.info(String.format( Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java Fri Feb 20 19:11:44 2015 @@ -50,6 +50,7 @@ public class TikaServerCli { options.addOption("h", "host", true, "host name (default = " + DEFAULT_HOST + ')'); options.addOption("p", "port", true, "listen port (default = " + DEFAULT_PORT + ')'); options.addOption("l", "log", true, "request URI log level ('debug' or 'info')"); + options.addOption("s", "includeStack", false, "whether or not to return a stack trace\nif there is an exception during 'parse'"); options.addOption("?", "help", false, "this help message"); return options; @@ -82,7 +83,12 @@ public class TikaServerCli { if (line.hasOption("port")) { port = Integer.valueOf(line.getOptionValue("port")); } - + + boolean returnStackTrace = false; + if (line.hasOption("includeStack")) { + returnStackTrace = true; + } + TikaLoggingFilter logFilter = null; if (line.hasOption("log")) { String logLevel = line.getOptionValue("log"); @@ -120,7 +126,7 @@ public class TikaServerCli { providers.add(new JSONMessageBodyWriter()); providers.add(new XMPMessageBodyWriter()); providers.add(new TextMessageBodyWriter()); - providers.add(new TikaExceptionMapper()); + providers.add(new TikaServerParseExceptionMapper(returnStackTrace)); if (logFilter != null) { providers.add(logFilter); } Copied: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseException.java (from r1661119, tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java) URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseException.java?p2=tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseException.java&p1=tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java&r1=1661119&r2=1661193&rev=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseException.java Fri Feb 20 19:11:44 2015 @@ -1,3 +1,5 @@ +package org.apache.tika.server; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -15,22 +17,19 @@ * limitations under the License. */ -package org.apache.tika.server; +import javax.ws.rs.WebApplicationException; -import org.apache.tika.exception.TikaException; +/** + * Simple wrapper exception to be thrown for consistent handling + * of exceptions that can happen during a parse. + */ +public class TikaServerParseException extends WebApplicationException { -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.Response; -import javax.ws.rs.ext.ExceptionMapper; -import javax.ws.rs.ext.Provider; + public TikaServerParseException(String msg) { + super(msg); + } -@Provider -public class TikaExceptionMapper implements ExceptionMapper<TikaException> { - public Response toResponse(TikaException e) { - if (e.getCause() !=null && e.getCause() instanceof WebApplicationException) { - return ((WebApplicationException) e.getCause()).getResponse(); - } else { - return Response.serverError().build(); + public TikaServerParseException(Exception e) { + super(e); } - } } Added: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java?rev=1661193&view=auto ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java (added) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java Fri Feb 20 19:11:44 2015 @@ -0,0 +1,79 @@ +package org.apache.tika.server; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.Response; +import javax.ws.rs.ext.ExceptionMapper; +import javax.ws.rs.ext.Provider; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.io.Writer; + +import org.apache.poi.hwpf.OldWordFileFormatException; +import org.apache.tika.exception.EncryptedDocumentException; +import org.apache.tika.exception.TikaException; + +@Provider +public class TikaServerParseExceptionMapper implements ExceptionMapper<TikaServerParseException> { + private final boolean returnStack; + + public TikaServerParseExceptionMapper(boolean returnStack) { + this.returnStack = returnStack; + } + + public Response toResponse(TikaServerParseException e) { + if (e.getMessage().equals(Response.Status.UNSUPPORTED_MEDIA_TYPE.toString())) { + return buildResponse(e, 415); + } + Throwable cause = e.getCause(); + if (cause == null) { + return buildResponse(e, Response.Status.INTERNAL_SERVER_ERROR.getStatusCode()); + } else { + if (cause instanceof EncryptedDocumentException) { + return buildResponse(cause, 422); + } else if (cause instanceof TikaException) { + //unsupported media type + Throwable causeOfCause = cause.getCause(); + if (causeOfCause instanceof WebApplicationException) { + return ((WebApplicationException)causeOfCause).getResponse(); + } + return buildResponse(cause, 422); + } else if (cause instanceof IllegalStateException) { + return buildResponse(cause, 422); + } else if(cause instanceof OldWordFileFormatException) { + return buildResponse(cause, 422); + } else if (cause instanceof WebApplicationException) { + return ((WebApplicationException) e.getCause()).getResponse(); + } else { + return buildResponse(e, 500); + } + } + } + + private Response buildResponse(Throwable cause, int i) { + if (returnStack && cause != null) { + Writer result = new StringWriter(); + PrintWriter writer = new PrintWriter(result); + cause.printStackTrace(writer); + return Response.status(i).entity(result.toString()).type("text/plain").build(); + } else { + return Response.status(i).build(); + } + } +} Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java Fri Feb 20 19:11:44 2015 @@ -35,7 +35,6 @@ import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.Locale; import java.util.Map; import au.com.bytecode.opencsv.CSVWriter; @@ -51,7 +50,6 @@ import org.apache.poi.poifs.filesystem.O import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.util.IOUtils; import org.apache.tika.config.TikaConfig; -import org.apache.tika.exception.TikaException; import org.apache.tika.extractor.EmbeddedDocumentExtractor; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; @@ -125,18 +123,7 @@ public class UnpackerResource { MutableInt count = new MutableInt(); pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, files)); - - try { - parser.parse(is, ch, metadata, pc); - } catch (TikaException ex) { - logger.warn(String.format( - Locale.ROOT, - "%s: Unpacker failed", - info.getPath() - ), ex); - - throw ex; - } + TikaResource.parse(parser, logger, info.getPath(), is, ch, metadata, pc); if (count.intValue() == 0 && !saveAll) { throw new WebApplicationException(Response.Status.NO_CONTENT); Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java (original) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java Fri Feb 20 19:11:44 2015 @@ -17,12 +17,13 @@ package org.apache.tika.server; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileOutputStream; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Enumeration; @@ -147,7 +148,22 @@ public abstract class CXFTestBase { private File writeTemporaryArchiveFile(InputStream inputStream, String archiveType) throws IOException { File tempFile = File.createTempFile("tmp-", "." + archiveType); - IOUtils.copy(inputStream, new FileOutputStream(tempFile)); - return tempFile; - } -} + IOUtils.copy(inputStream, new FileOutputStream(tempFile)); + return tempFile; + } + + protected static InputStream copy(InputStream in, int remaining) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + while (remaining > 0) { + byte[] bytes = new byte[remaining]; + int n = in.read(bytes); + if (n <= 0) { + break; + } + out.write(bytes, 0, n); + remaining -= n; + } + return new ByteArrayInputStream(out.toByteArray()); + } + +} Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java (original) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java Fri Feb 20 19:11:44 2015 @@ -48,13 +48,13 @@ public class DetectorResourceTest extend @Override protected void setUpProviders(JAXRSServerFactoryBean sf) { - List<Object> providers = new ArrayList<Object>(); - providers.add(new TarWriter()); - providers.add(new ZipWriter()); - providers.add(new TikaExceptionMapper()); - sf.setProviders(providers); - - } + List<Object> providers = new ArrayList<Object>(); + providers.add(new TarWriter()); + providers.add(new ZipWriter()); + providers.add(new TikaServerParseExceptionMapper(false)); + sf.setProviders(providers); + + } @Test public void testDetectCsvWithExt() throws Exception { Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java (original) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java Fri Feb 20 19:11:44 2015 @@ -222,19 +222,6 @@ public class MetadataResourceTest extend assertContains("<rdf:li>Maxim Valyanskiy</rdf:li>", s); } - private static InputStream copy(InputStream in, int remaining) throws IOException { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - while (remaining > 0) { - byte[] bytes = new byte[remaining]; - int n = in.read(bytes); - if (n <= 0) { - break; - } - out.write(bytes, 0, n); - remaining -= n; - } - return new ByteArrayInputStream(out.toByteArray()); - } } Added: tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java?rev=1661193&view=auto ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java (added) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java Fri Feb 20 19:11:44 2015 @@ -0,0 +1,143 @@ +package org.apache.tika.server; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; +import org.apache.cxf.jaxrs.client.WebClient; +import org.apache.cxf.jaxrs.lifecycle.ResourceProvider; +import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; +import org.junit.Assert; +import org.junit.Test; + + +/** + * Test to make sure that no stack traces are returned + * when the stack trace param is set to false. + */ +public class StackTraceOffTest extends CXFTestBase { + public static final String TEST_NULL = "evil/null_pointer.evil"; + public static final String TEST_PASSWORD_PROTECTED = "password.xls"; + + private static final String[] PATHS = new String[]{ + "/tika", + "/rmeta", + "/unpack", + "/meta", + }; + private static final int UNPROCESSEABLE = 422; + + @Override + protected void setUpResources(JAXRSServerFactoryBean sf) { + List<ResourceProvider> rCoreProviders = new ArrayList<ResourceProvider>(); + rCoreProviders.add(new SingletonResourceProvider(new MetadataResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new RecursiveMetadataResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new DetectorResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new TikaResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new UnpackerResource(tika))); + sf.setResourceProviders(rCoreProviders); + } + + @Override + protected void setUpProviders(JAXRSServerFactoryBean sf) { + List<Object> providers = new ArrayList<Object>(); + providers.add(new TikaServerParseExceptionMapper(false)); + providers.add(new JSONMessageBodyWriter()); + providers.add(new CSVMessageBodyWriter()); + providers.add(new XMPMessageBodyWriter()); + providers.add(new TextMessageBodyWriter()); + sf.setProviders(providers); + } + + @Test + public void testEncrypted() throws Exception { + for (String path : PATHS) { + Response response = WebClient + .create(endPoint + path) + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=" + TEST_PASSWORD_PROTECTED) + .put(ClassLoader.getSystemResourceAsStream(TEST_PASSWORD_PROTECTED)); + assertNotNull("null response: " + path, response); + assertEquals("unprocessable: " + path, UNPROCESSEABLE, response.getStatus()); + String msg = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("should be empty: "+path, "", msg); + } + } + + @Test + public void testNullPointerOnTika() throws Exception { + for (String path : PATHS) { + Response response = WebClient + .create(endPoint + path) + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=null_pointer.evil") + .put(ClassLoader.getSystemResourceAsStream(TEST_NULL)); + assertNotNull("null response: " + path, response); + assertEquals("unprocessable: " + path, UNPROCESSEABLE, response.getStatus()); + String msg = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("should be empty: "+path, "", msg); + } + } + + @Test + public void test415() throws Exception { + //no stack traces for 415 + for (String path : PATHS) { + Response response = WebClient + .create(endPoint + path) + .type("blechdeblah/deblechdeblah") + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=null_pointer.evil") + .put(ClassLoader.getSystemResourceAsStream(TEST_NULL)); + assertNotNull("null response: " + path, response); + assertEquals("bad type: " + path, 415, response.getStatus()); + String msg = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("should be empty: "+path, "", msg); + } + } + + //For now, make sure that non-complete document + //still returns BAD_REQUEST. We may want to + //make MetadataResource return the same types of parse + //exceptions as the others... + @Test + public void testMeta() throws Exception { + InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC); + + Response response = WebClient.create(endPoint + "/meta" + "/Author").type("application/msword") + .accept(MediaType.TEXT_PLAIN).put(copy(stream, 8000)); + Assert.assertEquals(Response.Status.BAD_REQUEST.getStatusCode(), response.getStatus()); + String msg = getStringFromInputStream((InputStream) response.getEntity()); + assertEquals("Failed to get metadata field Author", msg); + } +} Added: tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java?rev=1661193&view=auto ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java (added) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java Fri Feb 20 19:11:44 2015 @@ -0,0 +1,139 @@ +package org.apache.tika.server; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; +import org.apache.cxf.jaxrs.client.WebClient; +import org.apache.cxf.jaxrs.lifecycle.ResourceProvider; +import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; +import org.junit.Assert; +import org.junit.Test; + +public class StackTraceTest extends CXFTestBase { + public static final String TEST_NULL = "evil/null_pointer.evil"; + public static final String TEST_PASSWORD_PROTECTED = "password.xls"; + + private static final String[] PATHS = new String[]{ + "/tika", + "/rmeta", + "/unpack", + "/meta", + }; + private static final int UNPROCESSEABLE = 422; + + @Override + protected void setUpResources(JAXRSServerFactoryBean sf) { + List<ResourceProvider> rCoreProviders = new ArrayList<ResourceProvider>(); + rCoreProviders.add(new SingletonResourceProvider(new MetadataResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new RecursiveMetadataResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new DetectorResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new TikaResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new UnpackerResource(tika))); + sf.setResourceProviders(rCoreProviders); + } + + @Override + protected void setUpProviders(JAXRSServerFactoryBean sf) { + List<Object> providers = new ArrayList<Object>(); + providers.add(new TikaServerParseExceptionMapper(true)); + providers.add(new JSONMessageBodyWriter()); + providers.add(new CSVMessageBodyWriter()); + providers.add(new XMPMessageBodyWriter()); + providers.add(new TextMessageBodyWriter()); + sf.setProviders(providers); + } + + @Test + public void testEncrypted() throws Exception { + for (String path : PATHS) { + Response response = WebClient + .create(endPoint + path) + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=" + TEST_PASSWORD_PROTECTED) + .put(ClassLoader.getSystemResourceAsStream(TEST_PASSWORD_PROTECTED)); + assertNotNull("null response: " + path, response); + assertEquals("unprocessable: " + path, UNPROCESSEABLE, response.getStatus()); + String msg = getStringFromInputStream((InputStream) response + .getEntity()); + assertContains("org.apache.tika.exception.EncryptedDocumentException", + msg); + } + } + + @Test + public void testNullPointerOnTika() throws Exception { + for (String path : PATHS) { + Response response = WebClient + .create(endPoint + path) + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=null_pointer.evil") + .put(ClassLoader.getSystemResourceAsStream(TEST_NULL)); + assertNotNull("null response: " + path, response); + assertEquals("unprocessable: " + path, UNPROCESSEABLE, response.getStatus()); + String msg = getStringFromInputStream((InputStream) response + .getEntity()); + assertContains("Caused by: java.lang.NullPointerException: null pointer message", + msg); + } + } + + @Test + public void test415() throws Exception { + //no stack traces for 415 + for (String path : PATHS) { + Response response = WebClient + .create(endPoint + path) + .type("blechdeblah/deblechdeblah") + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=null_pointer.evil") + .put(ClassLoader.getSystemResourceAsStream(TEST_NULL)); + assertNotNull("null response: " + path, response); + assertEquals("bad type: " + path, 415, response.getStatus()); + String msg = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("should be empty: "+path, "", msg); + } + } + + //For now, make sure that non-complete document + //still returns BAD_REQUEST. We may want to + //make MetadataResource return the same types of parse + //exceptions as the others... + @Test + public void testMeta() throws Exception { + InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC); + + Response response = WebClient.create(endPoint + "/meta" + "/Author").type("application/msword") + .accept(MediaType.TEXT_PLAIN).put(copy(stream, 8000)); + Assert.assertEquals(Response.Status.BAD_REQUEST.getStatusCode(), response.getStatus()); + String msg = getStringFromInputStream((InputStream) response.getEntity()); + assertEquals("Failed to get metadata field Author", msg); + } +} Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java (original) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java Fri Feb 20 19:11:44 2015 @@ -21,6 +21,8 @@ import static org.junit.Assert.assertEqu import static org.junit.Assert.assertTrue; import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; import javax.ws.rs.core.Response; @@ -45,7 +47,11 @@ public class TikaResourceTest extends CX } @Override - protected void setUpProviders(JAXRSServerFactoryBean sf) {} + protected void setUpProviders(JAXRSServerFactoryBean sf) { + List<Object> providers = new ArrayList<Object>(); + providers.add(new TikaServerParseExceptionMapper(false)); + sf.setProviders(providers); + } @Test public void testHelloWorld() throws Exception { Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java?rev=1661193&r1=1661192&r2=1661193&view=diff ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java (original) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java Fri Feb 20 19:11:44 2015 @@ -76,7 +76,7 @@ public class UnpackerResourceTest extend List<Object> providers = new ArrayList<Object>(); providers.add(new TarWriter()); providers.add(new ZipWriter()); - providers.add(new TikaExceptionMapper()); + providers.add(new TikaServerParseExceptionMapper(false)); sf.setProviders(providers); } Added: tika/trunk/tika-server/src/test/resources/META-INF/services/org.apache.tika.parser.Parser URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/resources/META-INF/services/org.apache.tika.parser.Parser?rev=1661193&view=auto ============================================================================== --- tika/trunk/tika-server/src/test/resources/META-INF/services/org.apache.tika.parser.Parser (added) +++ tika/trunk/tika-server/src/test/resources/META-INF/services/org.apache.tika.parser.Parser Fri Feb 20 19:11:44 2015 @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.tika.parser.evil.EvilParser \ No newline at end of file Added: tika/trunk/tika-server/src/test/resources/evil/null_pointer.evil URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/resources/evil/null_pointer.evil?rev=1661193&view=auto ============================================================================== --- tika/trunk/tika-server/src/test/resources/evil/null_pointer.evil (added) +++ tika/trunk/tika-server/src/test/resources/evil/null_pointer.evil Fri Feb 20 19:11:44 2015 @@ -0,0 +1,17 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +<throwable message="null pointer message">java.lang.NullPointerException</throwable> \ No newline at end of file Added: tika/trunk/tika-server/src/test/resources/mime/custom-mimetypes.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/resources/mime/custom-mimetypes.xml?rev=1661193&view=auto ============================================================================== --- tika/trunk/tika-server/src/test/resources/mime/custom-mimetypes.xml (added) +++ tika/trunk/tika-server/src/test/resources/mime/custom-mimetypes.xml Fri Feb 20 19:11:44 2015 @@ -0,0 +1,24 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<mime-info> + <mime-type type="application/evil"> + <glob pattern="*.evil"/> + <sub-class-of type="text/plain"/> + </mime-type> +</mime-info> \ No newline at end of file