Author: maxcom
Date: Fri Mar 23 09:45:34 2012
New Revision: 1304247
URL: http://svn.apache.org/viewvc?rev=1304247&view=rev
Log:
New rewritten UnpackerResource for TIKA-593:
1) Support for TAR output (addition to ZIP)
2) Fix for empty OLE attachements problem (simular to TIKA-877)
3) "/all" resource to get text + meta + emdeddings in one request
Added:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TarWriter.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java
Removed:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java?rev=1304247&r1=1304246&r2=1304247&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
Fri Mar 23 09:45:34 2012
@@ -55,16 +55,22 @@ public class MetadataResource {
return new StreamingOutput() {
public void write(OutputStream outputStream) throws IOException,
WebApplicationException {
- CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream));
- for (String name : metadata.names()) {
- String[] values = metadata.getValues(name);
- ArrayList<String> list = new ArrayList<String>(values.length+1);
- list.add(name);
- list.addAll(Arrays.asList(values));
- writer.writeNext(list.toArray(values));
- }
- writer.close();
+ metadataToCsv(metadata, outputStream);
}
};
}
+
+ public static void metadataToCsv(Metadata metadata, OutputStream
outputStream) throws IOException {
+ CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream,
"UTF-8"));
+
+ for (String name : metadata.names()) {
+ String[] values = metadata.getValues(name);
+ ArrayList<String> list = new ArrayList<String>(values.length+1);
+ list.add(name);
+ list.addAll(Arrays.asList(values));
+ writer.writeNext(list.toArray(values));
+ }
+
+ writer.close();
+ }
}
Added:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TarWriter.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TarWriter.java?rev=1304247&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TarWriter.java
(added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TarWriter.java
Fri Mar 23 09:45:34 2012
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
+
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.MultivaluedMap;
+import javax.ws.rs.ext.MessageBodyWriter;
+import javax.ws.rs.ext.Provider;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Type;
+import java.util.Map;
+
+@Provider
+@Produces("application/x-tar")
+public class TarWriter implements MessageBodyWriter<Map<String, byte[]>> {
+ private static void tarStoreBuffer(TarArchiveOutputStream zip, String name,
byte[] dataBuffer) throws IOException {
+ TarArchiveEntry entry = new TarArchiveEntry(name);
+
+ entry.setSize(dataBuffer.length);
+
+ zip.putArchiveEntry(entry);
+
+ zip.write(dataBuffer);
+
+ zip.closeArchiveEntry();
+ }
+
+ public boolean isWriteable(Class<?> type, Type genericType, Annotation[]
annotations, MediaType mediaType) {
+ return Map.class.isAssignableFrom(type);
+ }
+
+ public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type
genericType, Annotation[] annotations, MediaType mediaType) {
+ return -1;
+ }
+
+ public void writeTo(Map<String, byte[]> parts, Class<?> type, Type
genericType, Annotation[] annotations, MediaType mediaType,
MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws
IOException, WebApplicationException {
+ TarArchiveOutputStream zip = new TarArchiveOutputStream(entityStream);
+
+ for (Map.Entry<String, byte[]> entry : parts.entrySet()) {
+ tarStoreBuffer(zip, entry.getKey(), entry.getValue());
+ }
+
+ zip.close();
+ }
+}
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java?rev=1304247&r1=1304246&r2=1304247&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
Fri Mar 23 09:45:34 2012
@@ -20,19 +20,19 @@ package org.apache.tika.server;
import org.apache.commons.lang.mutable.MutableInt;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.poi.poifs.filesystem.Ole10Native;
-import org.apache.poi.poifs.filesystem.Ole10NativeException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.poifs.filesystem.*;
import org.apache.poi.util.IOUtils;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.OfficeParser;
+import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
@@ -41,17 +41,19 @@ import javax.ws.rs.PUT;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
-import javax.ws.rs.core.*;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collections;
-import java.util.zip.ZipOutputStream;
+import javax.ws.rs.core.Context;
+import javax.ws.rs.core.HttpHeaders;
+import javax.ws.rs.core.Response;
+import javax.ws.rs.core.UriInfo;
+import java.io.*;
+import java.util.HashMap;
+import java.util.Map;
-@Path("/unpacker{id:(/.*)?}")
+@Path("/")
public class UnpackerResource {
private static final Log logger = LogFactory.getLog(UnpackerResource.class);
+ public static final String TEXT_FILENAME = "__TEXT__";
+ private static final String META_FILENAME = "__METADATA__";
private final TikaConfig tikaConfig;
@@ -59,13 +61,34 @@ public class UnpackerResource {
tikaConfig = TikaConfig.getDefaultConfig();
}
+ @Path("unpacker{id:(/.*)?}")
@PUT
- @Produces("application/zip")
- public StreamingOutput getText(
+ @Produces({"application/zip", "application/x-tar"})
+ public Map<String, byte[]> unpack(
InputStream is,
@Context HttpHeaders httpHeaders,
@Context UriInfo info
) throws Exception {
+ return process(is, httpHeaders, info, false);
+ }
+
+ @Path("all{id:(/.*)?}")
+ @PUT
+ @Produces({"application/zip", "application/x-tar"})
+ public Map<String, byte[]> unpackAll(
+ InputStream is,
+ @Context HttpHeaders httpHeaders,
+ @Context UriInfo info
+ ) throws Exception {
+ return process(is, httpHeaders, info, true);
+ }
+
+ private Map<String, byte[]> process(
+ InputStream is,
+ @Context HttpHeaders httpHeaders,
+ @Context UriInfo info,
+ boolean saveAll
+ ) throws Exception {
Metadata metadata = new Metadata();
AutoDetectParser parser = TikaResource.createParser();
@@ -73,14 +96,21 @@ public class UnpackerResource {
TikaResource.fillMetadata(parser, metadata, httpHeaders);
TikaResource.logRequest(logger, info, metadata);
- ContentHandler ch = new DefaultHandler();
+ ContentHandler ch;
+ ByteArrayOutputStream text = new ByteArrayOutputStream();
+
+ if (saveAll) {
+ ch = new BodyContentHandler(new RichTextContentHandler(new
OutputStreamWriter(text, "UTF-8")));
+ } else {
+ ch = new DefaultHandler();
+ }
ParseContext pc = new ParseContext();
- ZipOutput zout = new ZipOutput();
+ Map<String, byte[]> files = new HashMap<String, byte[]>();
MutableInt count = new MutableInt();
- pc.set(EmbeddedDocumentExtractor.class, new
MyEmbeddedDocumentExtractor(count, zout));
+ pc.set(EmbeddedDocumentExtractor.class, new
MyEmbeddedDocumentExtractor(count, files));
try {
parser.parse(is, ch, metadata, pc);
@@ -89,20 +119,31 @@ public class UnpackerResource {
"%s: Unpacker failed",
info.getPath()
), ex);
+
+ throw ex;
}
- if (count.intValue() == 0) {
+ if (count.intValue() == 0 && !saveAll) {
throw new WebApplicationException(Response.Status.NO_CONTENT);
}
- return zout;
+ if (saveAll) {
+ files.put(TEXT_FILENAME, text.toByteArray());
+
+ ByteArrayOutputStream metaStream = new ByteArrayOutputStream();
+ MetadataResource.metadataToCsv(metadata, metaStream);
+
+ files.put(META_FILENAME, metaStream.toByteArray());
+ }
+
+ return files;
}
private class MyEmbeddedDocumentExtractor implements
EmbeddedDocumentExtractor {
private final MutableInt count;
- private final ZipOutput zout;
+ private final Map<String, byte[]> zout;
- MyEmbeddedDocumentExtractor(MutableInt count, ZipOutput zout) {
+ MyEmbeddedDocumentExtractor(MutableInt count, Map<String, byte[]> zout) {
this.count = count;
this.zout = zout;
}
@@ -123,7 +164,7 @@ public class UnpackerResource {
name = Integer.toString(count.intValue());
}
- if (!name.contains(".")) {
+ if (!name.contains(".") && contentType!=null) {
try {
String ext =
tikaConfig.getMimeRepository().forName(contentType).getExtension();
@@ -159,17 +200,48 @@ public class UnpackerResource {
} else {
name += '.' + type.getExtension();
}
- }
+ }
final String finalName = name;
- zout.put(new PartExtractor<byte[]>() {
- public void extract(byte[] part, ZipOutputStream output) throws
IOException {
- ZipUtils.zipStoreBuffer(output, finalName, part);
+ if (data.length > 0) {
+ zout.put(finalName, data);
+
+ count.increment();
+ } else {
+ if (inputStream instanceof TikaInputStream) {
+ TikaInputStream tin = (TikaInputStream) inputStream;
+
+ if (tin.getOpenContainer()!=null && tin.getOpenContainer()
instanceof DirectoryEntry) {
+ POIFSFileSystem fs = new POIFSFileSystem();
+ copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
+ ByteArrayOutputStream bos2 = new ByteArrayOutputStream();
+ fs.writeFilesystem(bos2);
+ bos2.close();
+
+ zout.put(finalName, bos2.toByteArray());
+ }
}
- }, Collections.singletonList(data));
+ }
+ }
- count.increment();
+ protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir)
+ throws IOException {
+ for (Entry entry : sourceDir) {
+ if (entry instanceof DirectoryEntry) {
+ // Need to recurse
+ DirectoryEntry newDir = destDir.createDirectory(entry.getName());
+ copy((DirectoryEntry) entry, newDir);
+ } else {
+ // Copy entry
+ InputStream contents = new DocumentInputStream((DocumentEntry)
entry);
+ try {
+ destDir.createDocument(entry.getName(), contents);
+ } finally {
+ contents.close();
+ }
+ }
+ }
}
}
-}
+}
\ No newline at end of file
Added:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java?rev=1304247&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java
(added)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java
Fri Mar 23 09:45:34 2012
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
+
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.MultivaluedMap;
+import javax.ws.rs.ext.MessageBodyWriter;
+import javax.ws.rs.ext.Provider;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Type;
+import java.util.Map;
+import java.util.UUID;
+import java.util.zip.CRC32;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipException;
+import java.util.zip.ZipOutputStream;
+
+@Provider
+@Produces("application/zip")
+public class ZipWriter implements MessageBodyWriter<Map<String, byte[]>> {
+ private static void zipStoreBuffer(ZipArchiveOutputStream zip, String name,
byte[] dataBuffer) throws IOException {
+ ZipEntry zipEntry = new ZipEntry(name!=null?name:
UUID.randomUUID().toString());
+ zipEntry.setMethod(ZipOutputStream.STORED);
+
+ zipEntry.setSize(dataBuffer.length);
+ CRC32 crc32 = new CRC32();
+ crc32.update(dataBuffer);
+ zipEntry.setCrc(crc32.getValue());
+
+ try {
+ zip.putArchiveEntry(new ZipArchiveEntry(zipEntry));
+ } catch (ZipException ex) {
+ if (name!=null) {
+ zipStoreBuffer(zip, "x-"+name, dataBuffer);
+ return;
+ }
+ }
+
+ zip.write(dataBuffer);
+
+ zip.closeArchiveEntry();
+ }
+
+ public boolean isWriteable(Class<?> type, Type genericType, Annotation[]
annotations, MediaType mediaType) {
+ return Map.class.isAssignableFrom(type);
+ }
+
+ public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type
genericType, Annotation[] annotations, MediaType mediaType) {
+ return -1;
+ }
+
+ public void writeTo(Map<String, byte[]> parts, Class<?> type, Type
genericType, Annotation[] annotations, MediaType mediaType,
MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws
IOException, WebApplicationException {
+ ZipArchiveOutputStream zip = new ZipArchiveOutputStream(entityStream);
+
+ zip.setMethod(ZipArchiveOutputStream.STORED);
+
+ for (Map.Entry<String, byte[]> entry : parts.entrySet()) {
+ zipStoreBuffer(zip, entry.getKey(), entry.getValue());
+ }
+
+ zip.close();
+ }
+}
Modified:
tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java?rev=1304247&r1=1304246&r2=1304247&view=diff
==============================================================================
---
tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
(original)
+++
tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
Fri Mar 23 09:45:34 2012
@@ -17,21 +17,27 @@
package org.apache.tika.server;
+import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.test.framework.JerseyTest;
import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.compress.archivers.ArchiveEntry;
+import org.apache.commons.compress.archivers.ArchiveInputStream;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.tika.io.IOUtils;
import org.junit.Test;
-import java.io.*;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipInputStream;
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.*;
public class UnpackerResourceTest extends JerseyTest {
private static final String UNPACKER_PATH = "/unpacker";
+ private static final String ALL_PATH = "/all";
private static final String TEST_DOC_WAV = "Doc1_ole.doc";
private static final String WAV1_MD5 = "bdd0a78a54968e362445364f95d8dc96";
@@ -74,12 +80,32 @@ public class UnpackerResourceTest extend
.type(APPLICATION_MSWORD)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
- ZipInputStream zip = new ZipInputStream(is);
+ ArchiveInputStream zip = new ZipArchiveInputStream(is);
- Map<String, String> data = readZip(zip);
+ Map<String, String> data = readArchive(zip);
assertEquals(WAV1_MD5, data.get(WAV1_NAME));
assertEquals(WAV2_MD5, data.get(WAV2_NAME));
+
+ assertFalse(data.containsKey(UnpackerResource.TEXT_FILENAME));
+ }
+
+ @Test
+ public void testDocWAVText() throws Exception {
+ InputStream is =
+ resource()
+ .path(ALL_PATH)
+ .type(APPLICATION_MSWORD)
+ .put(InputStream.class,
ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+
+ ArchiveInputStream zip = new ZipArchiveInputStream(is);
+
+ Map<String, String> data = readArchive(zip);
+
+ assertEquals(WAV1_MD5, data.get(WAV1_NAME));
+ assertEquals(WAV2_MD5, data.get(WAV2_NAME));
+
+ assertTrue(data.containsKey(UnpackerResource.TEXT_FILENAME));
}
@Test
@@ -90,9 +116,9 @@ public class UnpackerResourceTest extend
.type(APPLICATION_MSWORD)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
- ZipInputStream zip = new ZipInputStream(is);
+ ZipArchiveInputStream zip = new ZipArchiveInputStream(is);
- Map<String, String> data = readZip(zip);
+ Map<String, String> data = readArchive(zip);
assertEquals(JPG_MD5, data.get(JPG_NAME));
}
@@ -105,9 +131,9 @@ public class UnpackerResourceTest extend
.type(APPLICATION_MSWORD)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream("2pic.doc"));
- ZipInputStream zip = new ZipInputStream(is);
+ ZipArchiveInputStream zip = new ZipArchiveInputStream(is);
- Map<String, String> data = readZip(zip);
+ Map<String, String> data = readArchive(zip);
assertEquals(JPG2_MD5, data.get(JPG2_NAME));
}
@@ -119,15 +145,26 @@ public class UnpackerResourceTest extend
.path(UNPACKER_PATH)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream(TEST_DOCX_IMAGE));
- ZipInputStream zip = new ZipInputStream(is);
+ ZipArchiveInputStream zip = new ZipArchiveInputStream(is);
- Map<String, String> data = readZip(zip);
+ Map<String, String> data = readArchive(zip);
assertEquals(DOCX_IMAGE1_MD5, data.get(DOCX_IMAGE1_NAME));
assertEquals(DOCX_IMAGE2_MD5, data.get(DOCX_IMAGE2_NAME));
}
@Test
+ public void test415() throws Exception {
+ ClientResponse cr =
+ resource()
+ .path(UNPACKER_PATH)
+ .type("xxx/xxx")
+ .put(ClientResponse.class,
ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+
+ assertEquals(415, cr.getStatus());
+ }
+
+ @Test
public void testExeDOCX() throws Exception {
String TEST_DOCX_EXE = "2exe.docx";
InputStream is =
@@ -135,29 +172,14 @@ public class UnpackerResourceTest extend
.path(UNPACKER_PATH)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream(TEST_DOCX_EXE));
- ZipInputStream zip = new ZipInputStream(is);
+ ZipArchiveInputStream zip = new ZipArchiveInputStream(is);
- Map<String, String> data = readZip(zip);
+ Map<String, String> data = readArchive(zip);
assertEquals(DOCX_EXE1_MD5, data.get(DOCX_EXE1_NAME));
assertEquals(DOCX_EXE2_MD5, data.get(DOCX_EXE2_NAME));
}
-/*
- @Test
- public void testImageXSLX() throws Exception {
- InputStream is =
- webResource
- .path(UNPACKER_PATH)
- .put(InputStream.class,
ClassLoader.getSystemResourceAsStream("pic.xlsx"));
-
- ZipInputStream zip = new ZipInputStream(is);
- Map<String, String> data = readZip(zip);
-
- assertEquals(XSL_IMAGE1_MD5, data.get(XSLX_IMAGE1_NAME));
- assertEquals(XSL_IMAGE2_MD5, data.get(XSLX_IMAGE2_NAME));
- }
-*/
@Test
public void testImageXSL() throws Exception {
InputStream is =
@@ -165,19 +187,19 @@ public class UnpackerResourceTest extend
.path(UNPACKER_PATH)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream("pic.xls"));
- ZipInputStream zip = new ZipInputStream(is);
+ ZipArchiveInputStream zip = new ZipArchiveInputStream(is);
- Map<String, String> data = readZip(zip);
+ Map<String, String> data = readArchive(zip);
assertEquals(XSL_IMAGE1_MD5, data.get("0.jpg"));
assertEquals(XSL_IMAGE2_MD5, data.get("1.jpg"));
}
- private static Map<String, String> readZip(ZipInputStream zip) throws
IOException {
+ private static Map<String, String> readArchive(ArchiveInputStream zip)
throws IOException {
Map<String, String> data = new HashMap<String, String>();
while (true) {
- ZipEntry entry = zip.getNextEntry();
+ ArchiveEntry entry = zip.getNextEntry();
if (entry==null) {
break;
@@ -192,4 +214,55 @@ public class UnpackerResourceTest extend
return data;
}
+
+ private static String readArchiveText(ArchiveInputStream zip) throws
IOException {
+ while (true) {
+ ArchiveEntry entry = zip.getNextEntry();
+
+ if (entry==null) {
+ break;
+ }
+
+ if (!entry.getName().equals(UnpackerResource.TEXT_FILENAME)) {
+ continue;
+ }
+
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+
+ IOUtils.copy(zip, bos);
+
+ return bos.toString("UTF-8");
+ }
+
+ return null;
+ }
+
+ @Test
+ public void testTarDocPicture() throws Exception {
+ InputStream is =
+ resource()
+ .path(UNPACKER_PATH)
+ .type(APPLICATION_MSWORD)
+ .accept("application/x-tar")
+ .put(InputStream.class,
ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
+
+ ArchiveInputStream zip = new TarArchiveInputStream(is);
+
+ Map<String, String> data = readArchive(zip);
+
+ assertEquals(JPG_MD5, data.get(JPG_NAME));
+ }
+
+ @Test
+ public void testText() throws IOException {
+ InputStream is
+ = resource()
+ .path(ALL_PATH)
+ .header(CONTENT_TYPE, APPLICATION_XML)
+ .put(InputStream.class,
ClassLoader.getSystemResourceAsStream("test.doc"));
+ String responseMsg = readArchiveText(new ZipArchiveInputStream(is));
+
+ assertNotNull(responseMsg);
+ assertTrue(responseMsg.contains("test"));
+ }
}