Author: nick Date: Sat May 23 12:21:05 2015 New Revision: 1681337 URL: http://svn.apache.org/r1681337 Log: TIKA-1635 Disabled zlib parser support, not yet enabled pending a fix for a commons compress bug
Added: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java?rev=1681337&r1=1681336&r2=1681337&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java Sat May 23 12:21:05 2015 @@ -27,6 +27,7 @@ import org.apache.commons.compress.compr import org.apache.commons.compress.compressors.CompressorInputStream; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.compressors.gzip.GzipUtils; import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream; @@ -57,6 +58,8 @@ public class CompressorParser extends Ab private static final MediaType GZIP_ALT = MediaType.application("x-gzip"); private static final MediaType XZ = MediaType.application("x-xz"); private static final MediaType PACK = MediaType.application("application/x-java-pack200"); + // TODO Not yet supported by CompressorStreamFactory, see COMPRESS-316 + private static final MediaType ZLIB = MediaType.application("zlib"); private static final Set<MediaType> SUPPORTED_TYPES = MediaType.set(BZIP, BZIP2, GZIP, GZIP_ALT, XZ, PACK); @@ -73,6 +76,8 @@ public class CompressorParser extends Ab return GZIP; } else if (stream instanceof XZCompressorInputStream) { return XZ; + } else if (stream instanceof DeflateCompressorInputStream) { + return ZLIB; } else if (stream instanceof Pack200CompressorInputStream) { return PACK; } else { @@ -133,6 +138,8 @@ public class CompressorParser extends Ab name = name.substring(0, name.length() - 4); } else if (name.endsWith(".xz")) { name = name.substring(0, name.length() - 3); + } else if (name.endsWith(".zlib")) { + name = name.substring(0, name.length() - 5); } else if (name.endsWith(".pack")) { name = name.substring(0, name.length() - 5); } else if (name.length() > 0) { Added: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java?rev=1681337&view=auto ============================================================================== --- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java (added) +++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZlibParserTest.java Sat May 23 12:21:05 2015 @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.pkg; + +import static org.junit.Assert.assertEquals; + +import java.io.InputStream; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.BodyContentHandler; +import org.junit.Ignore; +import org.junit.Test; +import org.xml.sax.ContentHandler; + +/** + * Test case for parsing zlib compressed + * + * Note - currently disabled, pending a fix for COMPRESS-316 + */ +public class ZlibParserTest extends AbstractPkgTest { + @Test + @Ignore + public void testZlibParsing() throws Exception { + Parser parser = new AutoDetectParser(); // Should auto-detect! + ContentHandler handler = new BodyContentHandler(); + Metadata metadata = new Metadata(); + + InputStream stream = ZipParserTest.class.getResourceAsStream( + "/test-documents/testTXT.zlib"); + try { + parser.parse(stream, handler, metadata, recursingContext); + } finally { + stream.close(); + } + + assertEquals("application/zlib", metadata.get(Metadata.CONTENT_TYPE)); + String content = handler.toString(); + assertContains("Test d'indexation de Txt", content); + assertContains("http://www.apache.org", content); + } + + /** + * Tests that the ParseContext parser is correctly + * fired for all the embedded entries. + */ + @Test + @Ignore + public void testEmbedded() throws Exception { + Parser parser = new AutoDetectParser(); // Should auto-detect! + ContentHandler handler = new BodyContentHandler(); + Metadata metadata = new Metadata(); + + InputStream stream = ZipParserTest.class.getResourceAsStream( + "/test-documents/testTXT.zlib"); + try { + parser.parse(stream, handler, metadata, trackingContext); + } finally { + stream.close(); + } + + // Should have found a single text document inside + assertEquals(1, tracker.filenames.size()); + assertEquals(1, tracker.mediatypes.size()); + assertEquals(1, tracker.modifiedAts.size()); + + // Won't have names, dates or types, as zlib doesn't have that + assertEquals(null, tracker.filenames.get(0)); + assertEquals(null, tracker.mediatypes.get(0)); + assertEquals(null, tracker.createdAts.get(0)); + assertEquals(null, tracker.modifiedAts.get(0)); + } +}