This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_1x in repository https://gitbox.apache.org/repos/asf/tika.git
View the commit online: https://github.com/apache/tika/commit/f216d84a9a6430fe370233a406054d157dc5fedb commit f216d84a9a6430fe370233a406054d157dc5fedb Author: tallison <[email protected]> AuthorDate: Wed Nov 20 14:33:02 2019 -0500 TIKA-2979 -- tika-server shouldn't throw an exception for a file format for which there is no parser. # Conflicts: # tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java --- CHANGES.txt | 3 +++ .../apache/tika/server/resource/TikaResource.java | 14 ------------ .../org/apache/tika/server/StackTraceOffTest.java | 26 +++++++++++++--------- .../org/apache/tika/server/StackTraceTest.java | 25 +++++++++++++-------- .../apache/tika/server/UnpackerResourceTest.java | 4 ++-- 5 files changed, 37 insertions(+), 35 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index ce27843..906e97d 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -4,6 +4,9 @@ Release 1.23 - ??/??/??? users configure OCR on rendered page images. This will have the effect of increasing rendered image size (TIKA-2624). + * NOTE: tika-server no longer returns 415 for file types for which there + is no parser. + * Upgrade to POI 4.1.1 (TIKA-2851). * Upgrade to PDFBox 2.0.17 (TIKA-2951). diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java index b0bbdd8..082228e 100644 --- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java +++ b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java @@ -27,7 +27,6 @@ import org.apache.tika.config.TikaConfig; import org.apache.tika.detect.Detector; import org.apache.tika.exception.EncryptedDocumentException; import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.metadata.TikaMetadataKeys; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AutoDetectParser; @@ -117,19 +116,6 @@ public class TikaResource { public static Parser createParser() { final Parser parser = new AutoDetectParser(tikaConfig); - Map<MediaType, Parser> parsers = ((AutoDetectParser)parser).getParsers(); - - ((AutoDetectParser)parser).setParsers(parsers); - - ((AutoDetectParser)parser).setFallback(new Parser() { - public Set<MediaType> getSupportedTypes(ParseContext parseContext) { - return parser.getSupportedTypes(parseContext); - } - - public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) { - throw new WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE); - } - }); if (digester != null) { return new DigestingParser(parser, digester); } diff --git a/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java b/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java index d385581..84c7806 100644 --- a/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java +++ b/tika-server/src/test/java/org/apache/tika/server/StackTraceOffTest.java @@ -38,6 +38,7 @@ import org.apache.tika.server.resource.TikaResource; import org.apache.tika.server.resource.UnpackerResource; import org.apache.tika.server.writer.CSVMessageBodyWriter; import org.apache.tika.server.writer.JSONMessageBodyWriter; +import org.apache.tika.server.writer.MetadataListMessageBodyWriter; import org.apache.tika.server.writer.TextMessageBodyWriter; import org.apache.tika.server.writer.XMPMessageBodyWriter; import org.junit.Assert; @@ -79,6 +80,7 @@ public class StackTraceOffTest extends CXFTestBase { providers.add(new CSVMessageBodyWriter()); providers.add(new XMPMessageBodyWriter()); providers.add(new TextMessageBodyWriter()); + providers.add(new MetadataListMessageBodyWriter()); sf.setProviders(providers); } @@ -115,24 +117,28 @@ public class StackTraceOffTest extends CXFTestBase { } @Test - public void test415() throws Exception { + public void testEmptyParser() throws Exception { + //As of Tika 1.23, we're no longer returning 415 for file types + //that don't have a parser //no stack traces for 415 for (String path : PATHS) { + Response response = WebClient .create(endPoint + path) - .type("blechdeblah/deblechdeblah") - .accept("*/*") - .put(ClassLoader.getSystemResourceAsStream(TEST_NULL)); - assertNotNull("null response: " + path, response); - assertEquals("bad type: " + path, 415, response.getStatus()); - String msg = getStringFromInputStream((InputStream) response - .getEntity()); - assertEquals("should be empty: " + path, "", msg); + .accept("*:*") + .put(ClassLoader.getSystemResourceAsStream("testDigilite.fdf")); + if (path.equals("/unpack")) { + //"NO CONTENT" + assertEquals("bad type: " + path, 204, response.getStatus()); + } else { + assertEquals("bad type: " + path, 200, response.getStatus()); + assertNotNull("null response: " + path, response); + } } } //For now, make sure that non-complete document - //still returns BAD_REQUEST. We may want to + //still returtestXDP.xdpns BAD_REQUEST. We may want to //make MetadataResource return the same types of parse //exceptions as the others... @Test diff --git a/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java b/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java index 24882f7..b284d16 100644 --- a/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java +++ b/tika-server/src/test/java/org/apache/tika/server/StackTraceTest.java @@ -37,6 +37,7 @@ import org.apache.tika.server.resource.TikaResource; import org.apache.tika.server.resource.UnpackerResource; import org.apache.tika.server.writer.CSVMessageBodyWriter; import org.apache.tika.server.writer.JSONMessageBodyWriter; +import org.apache.tika.server.writer.MetadataListMessageBodyWriter; import org.apache.tika.server.writer.TextMessageBodyWriter; import org.apache.tika.server.writer.XMPMessageBodyWriter; import org.junit.Assert; @@ -73,6 +74,7 @@ public class StackTraceTest extends CXFTestBase { providers.add(new CSVMessageBodyWriter()); providers.add(new XMPMessageBodyWriter()); providers.add(new TextMessageBodyWriter()); + providers.add(new MetadataListMessageBodyWriter()); sf.setProviders(providers); } @@ -111,22 +113,27 @@ public class StackTraceTest extends CXFTestBase { } @Test - public void test415() throws Exception { + public void testEmptyParser() throws Exception { + //As of Tika 1.23, we're no longer returning 415 for file types + //that don't have a parser //no stack traces for 415 for (String path : PATHS) { + Response response = WebClient .create(endPoint + path) - .type("blechdeblah/deblechdeblah") - .accept("*/*") - .put(ClassLoader.getSystemResourceAsStream(TEST_NULL)); - assertNotNull("null response: " + path, response); - assertEquals("bad type: " + path, 415, response.getStatus()); - String msg = getStringFromInputStream((InputStream) response - .getEntity()); - assertEquals("should be empty: " + path, "", msg); + .accept("*:*") + .put(ClassLoader.getSystemResourceAsStream("testDigilite.fdf")); + if (path.equals("/unpack")) { + //"NO CONTENT" + assertEquals("bad type: " + path, 204, response.getStatus()); + } else { + assertEquals("bad type: " + path, 200, response.getStatus()); + assertNotNull("null response: " + path, response); + } } } + //For now, make sure that non-complete document //still returns BAD_REQUEST. We may want to //make MetadataResource return the same types of parse diff --git a/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java index 64b8ab9..350257c 100644 --- a/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java +++ b/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java @@ -141,13 +141,13 @@ public class UnpackerResourceTest extends CXFTestBase { } @Test - public void test415() throws Exception { + public void test204() throws Exception { Response response = WebClient.create(endPoint + UNPACKER_PATH) .type("xxx/xxx") .accept("*/*") .put(ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV)); - assertEquals(415, response.getStatus()); + assertEquals(204, response.getStatus()); } @Test
