Author: jukka
Date: Thu Sep 4 10:47:45 2008
New Revision: 692173
URL: http://svn.apache.org/viewvc?rev=692173&view=rev
Log:
TIKA-149: Parser for zip files
Added a test case that checks auto-detection of zip streams and parsing of all
zip entries.
Added:
incubator/tika/trunk/src/test/java/org/apache/tika/parser/zip/
incubator/tika/trunk/src/test/java/org/apache/tika/parser/zip/ZipParserTest.java
(with props)
Added:
incubator/tika/trunk/src/test/java/org/apache/tika/parser/zip/ZipParserTest.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/parser/zip/ZipParserTest.java?rev=692173&view=auto
==============================================================================
---
incubator/tika/trunk/src/test/java/org/apache/tika/parser/zip/ZipParserTest.java
(added)
+++
incubator/tika/trunk/src/test/java/org/apache/tika/parser/zip/ZipParserTest.java
Thu Sep 4 10:47:45 2008
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.zip;
+
+import java.io.InputStream;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+
+/**
+ * Test case for parsing zip files.
+ */
+public class ZipParserTest extends TestCase {
+
+ public void testZipParsing() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ InputStream stream = ZipParserTest.class.getResourceAsStream(
+ "/test-documents/test-documents.zip");
+ try {
+ parser.parse(stream, handler, metadata);
+ } finally {
+ stream.close();
+ }
+
+ assertEquals("application/zip", metadata.get(Metadata.CONTENT_TYPE));
+ String content = handler.toString();
+ assertTrue(content.contains("testEXCEL.xls"));
+ assertTrue(content.contains("Sample Excel Worksheet"));
+ assertTrue(content.contains("testHTML.html"));
+ assertTrue(content.contains("Test Indexation Html"));
+ assertTrue(content.contains("testOpenOffice2.odt"));
+ assertTrue(content.contains("This is a sample Open Office document"));
+ assertTrue(content.contains("testPDF.pdf"));
+ assertTrue(content.contains("Apache Tika"));
+ assertTrue(content.contains("testPPT.ppt"));
+ assertTrue(content.contains("Sample Powerpoint Slide"));
+ assertTrue(content.contains("testRTF.rtf"));
+ assertTrue(content.contains("indexation Word"));
+ assertTrue(content.contains("testTXT.txt"));
+ assertTrue(content.contains("Test d'indexation de Txt"));
+ assertTrue(content.contains("testWORD.doc"));
+ assertTrue(content.contains("This is a sample Microsoft Word
Document"));
+ assertTrue(content.contains("testXML.xml"));
+ assertTrue(content.contains("Rida Benjelloun"));
+ }
+
+}
Propchange:
incubator/tika/trunk/src/test/java/org/apache/tika/parser/zip/ZipParserTest.java
------------------------------------------------------------------------------
svn:eol-style = native