Author: jukka
Date: Sat Sep 12 21:02:54 2009
New Revision: 814236

URL: http://svn.apache.org/viewvc?rev=814236&view=rev
Log:
TIKA-269: Ease of use -facade for Tika

Use the new facade to simplify some parser tests.

Modified:
    
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
    
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
    
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
    
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java

Modified: 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java?rev=814236&r1=814235&r2=814236&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
 Sat Sep 12 21:02:54 2009
@@ -16,15 +16,10 @@
  */
 package org.apache.tika.parser.asm;
 
-import java.io.InputStream;
+import junit.framework.TestCase;
 
+import org.apache.tika.Tika;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.xml.sax.ContentHandler;
-
-import junit.framework.TestCase;
 
 /**
  * Test case for parsing Java class files.
@@ -32,24 +27,16 @@
 public class ClassParserTest extends TestCase {
 
     public void testClassParsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
+        String path = "/test-documents/AutoDetectParser.class";
         Metadata metadata = new Metadata();
-
-        InputStream stream = ClassParserTest.class.getResourceAsStream(
-                "/test-documents/AutoDetectParser.class");
-        try {
-            parser.parse(stream, handler, metadata);
-        } finally {
-            stream.close();
-        }
+        String content = Tika.parseToString(
+                ClassParserTest.class.getResourceAsStream(path), metadata);
 
         assertEquals("AutoDetectParser", metadata.get(Metadata.TITLE));
         assertEquals(
                 "AutoDetectParser.class",
                 metadata.get(Metadata.RESOURCE_NAME_KEY));
 
-        String content = handler.toString();
         assertTrue(content.contains("package org.apache.tika.parser;"));
         assertTrue(content.contains(
                 "class AutoDetectParser extends CompositeParser"));

Modified: 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java?rev=814236&r1=814235&r2=814236&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
 Sat Sep 12 21:02:54 2009
@@ -16,61 +16,56 @@
  */
 package org.apache.tika.parser.audio;
 
-import java.io.InputStream;
-
 import junit.framework.TestCase;
 
+import org.apache.tika.Tika;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.Parser;
-import org.xml.sax.helpers.DefaultHandler;
 
 public class AudioParserTest extends TestCase {
 
-    private final Parser parser = new AudioParser();
-
     public void testWAV() throws Exception {
+        String path = "/test-documents/testWAV.wav";
         Metadata metadata = new Metadata();
-        metadata.set(Metadata.CONTENT_TYPE, "audio/x-wav");
-        InputStream stream = getClass().getResourceAsStream(
-                "/test-documents/testWAV.wav");
-
-        parser.parse(stream, new DefaultHandler(), metadata);
+        String content = Tika.parseToString(
+                AudioParserTest.class.getResourceAsStream(path), metadata);
 
+        assertEquals("audio/x-wav", metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("44100.0", metadata.get("samplerate"));
         assertEquals("2", metadata.get("channels"));
         assertEquals("16", metadata.get("bits"));
         assertEquals("PCM_SIGNED", metadata.get("encoding"));
 
+        assertEquals("", content);
     }
 
     public void testAIFF() throws Exception {
+        String path = "/test-documents/testAIFF.aif";
         Metadata metadata = new Metadata();
-        metadata.set(Metadata.CONTENT_TYPE, "audio/x-aiff");
-        InputStream stream = getClass().getResourceAsStream(
-                "/test-documents/testAIFF.aif");
-
-        parser.parse(stream, new DefaultHandler(), metadata);
+        String content = Tika.parseToString(
+                AudioParserTest.class.getResourceAsStream(path), metadata);
 
+        assertEquals("audio/x-aiff", metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("44100.0", metadata.get("samplerate"));
         assertEquals("2", metadata.get("channels"));
         assertEquals("16", metadata.get("bits"));
         assertEquals("PCM_SIGNED", metadata.get("encoding"));
 
+        assertEquals("", content);
     }
 
     public void testAU() throws Exception {
+        String path = "/test-documents/testAU.au";
         Metadata metadata = new Metadata();
-        metadata.set(Metadata.CONTENT_TYPE, "audio/basic");
-        InputStream stream = getClass().getResourceAsStream(
-                "/test-documents/testAU.au");
-
-        parser.parse(stream, new DefaultHandler(), metadata);
+        String content = Tika.parseToString(
+                AudioParserTest.class.getResourceAsStream(path), metadata);
 
+        assertEquals("audio/basic", metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("44100.0", metadata.get("samplerate"));
         assertEquals("2", metadata.get("channels"));
         assertEquals("16", metadata.get("bits"));
         assertEquals("PCM_SIGNED", metadata.get("encoding"));
 
+        assertEquals("", content);
     }
 
 }

Modified: 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java?rev=814236&r1=814235&r2=814236&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
 Sat Sep 12 21:02:54 2009
@@ -16,32 +16,24 @@
  */
 package org.apache.tika.parser.audio;
 
-import java.io.InputStream;
-
 import junit.framework.TestCase;
 
+import org.apache.tika.Tika;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.xml.sax.ContentHandler;
 
 public class MidiParserTest extends TestCase {
 
-    private final Parser parser = new MidiParser();
-
     public void testMID() throws Exception {
+        String path = "/test-documents/testMID.mid";
         Metadata metadata = new Metadata();
-        metadata.set(Metadata.CONTENT_TYPE, "audio/midi");
-        InputStream stream = getClass().getResourceAsStream(
-                "/test-documents/testMID.mid");
-
-        ContentHandler handler = new BodyContentHandler();
-        parser.parse(stream, handler, metadata);
+        String content = Tika.parseToString(
+                MidiParserTest.class.getResourceAsStream(path), metadata);
 
+        assertEquals("audio/midi", metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("2", metadata.get("tracks"));
         assertEquals("0", metadata.get("patches"));
         assertEquals("PPQ", metadata.get("divisionType"));
 
-        assertTrue(handler.toString().contains("Untitled"));
+        assertTrue(content.contains("Untitled"));
     }
 }

Modified: 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java?rev=814236&r1=814235&r2=814236&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
 Sat Sep 12 21:02:54 2009
@@ -20,13 +20,13 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.StringWriter;
+import java.util.HashMap;
 
 import junit.framework.TestCase;
 
+import org.apache.tika.Tika;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.TeeContentHandler;
 import org.xml.sax.Attributes;
@@ -36,31 +36,26 @@
 
 public class HtmlParserTest extends TestCase {
 
-    private Parser parser = new HtmlParser();
-
-    private static InputStream getStream(String name) {
-        return Thread.currentThread().getContextClassLoader()
-                .getResourceAsStream(name);
-    }
-
     public void testParseAscii() throws Exception {
+        String path = "/test-documents/testHTML.html";
         final StringWriter href = new StringWriter();
-
         ContentHandler body = new BodyContentHandler();
-        ContentHandler link = new DefaultHandler() {
-            @Override
-            public void startElement(
-                    String u, String l, String n, Attributes a)
-                    throws SAXException {
-                if ("a".equals(l)) {
-                    href.append(a.getValue("href"));
-                }
-            }
-        };
         Metadata metadata = new Metadata();
-        InputStream stream = getStream("test-documents/testHTML.html");
+        InputStream stream = HtmlParserTest.class.getResourceAsStream(path);
         try {
-            parser.parse(stream, new TeeContentHandler(body, link), metadata);
+            ContentHandler link = new DefaultHandler() {
+                @Override
+                public void startElement(
+                        String u, String l, String n, Attributes a)
+                        throws SAXException {
+                    if ("a".equals(l)) {
+                        href.append(a.getValue("href"));
+                    }
+                }
+            };
+            new HtmlParser().parse(
+                    stream, new TeeContentHandler(body, link),
+                    metadata, new HashMap<String, Object>());
         } finally {
             stream.close();
         }
@@ -69,6 +64,7 @@
                 "Title : Test Indexation Html", metadata.get(Metadata.TITLE));
         assertEquals("Tika Developers", metadata.get("Author"));
         assertEquals("5", metadata.get("refresh"));
+
         assertEquals("http://www.apache.org/";, href.toString());
 
         String content = body.toString();
@@ -81,13 +77,10 @@
     }
 
     public void XtestParseUTF8() throws IOException, SAXException, 
TikaException {
-        ContentHandler handler = new BodyContentHandler();
+        String path = "/test-documents/testXHTML_utf8.html";
         Metadata metadata = new Metadata();
-
-        parser.parse(
-                getStream("test-documents/testHTML_utf8.html"),
-                handler, metadata);
-        String content = handler.toString();
+        String content = Tika.parseToString(
+                HtmlParserTest.class.getResourceAsStream(path), metadata);
 
         assertTrue("Did not contain expected text:"
                 + "Title : Tilte with UTF-8 chars öäå", content
@@ -102,21 +95,14 @@
     }
 
     public void testXhtmlParsing() throws Exception {
-        Parser parser = new AutoDetectParser(); // Should auto-detect!
-        ContentHandler handler = new BodyContentHandler();
+        String path = "/test-documents/testXHTML.html";
         Metadata metadata = new Metadata();
-
-        InputStream stream = HtmlParserTest.class.getResourceAsStream(
-                "/test-documents/testXHTML.html");
-        try {
-            parser.parse(stream, handler, metadata);
-        } finally {
-            stream.close();
-        }
+        String content = Tika.parseToString(
+                HtmlParserTest.class.getResourceAsStream(path), metadata);
 
         assertEquals("application/xhtml+xml", 
metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("XHTML test document", metadata.get(Metadata.TITLE));
-        String content = handler.toString();
+
         assertEquals("Tika Developers", metadata.get("Author"));
         assertEquals("5", metadata.get("refresh"));
         assertTrue(content.contains("ability of Apache Tika"));
@@ -125,13 +111,11 @@
     }
 
     public void testParseEmpty() throws Exception {
-        Metadata metadata = new Metadata();
-        StringWriter writer = new StringWriter();
-        parser.parse(
+        ContentHandler handler = new BodyContentHandler();
+        new HtmlParser().parse(
                 new ByteArrayInputStream(new byte[0]),
-                new BodyContentHandler(writer), metadata);
-        String content = writer.toString();
-        assertEquals("", content);
+                handler,  new Metadata(), new HashMap<String, Object>());
+        assertEquals("", handler.toString());
     }
 
     /**
@@ -140,11 +124,8 @@
      */
     public void testCharactersDirectlyUnderBodyElement() throws Exception {
         String test = "<html><body>test</body></html>";
-        ContentHandler handler = new BodyContentHandler();
-        parser.parse(
-                new ByteArrayInputStream(test.getBytes("UTF-8")),
-                handler, new Metadata());
-        String content = handler.toString();
+        String content = Tika.parseToString(
+                new ByteArrayInputStream(test.getBytes("UTF-8")));
         assertEquals("test", content);
     }
 
@@ -155,11 +136,8 @@
     public void testWhitespaceBetweenTableCells() throws Exception {
         String test =
             
"<html><body><table><tr><td>a</td><td>b</td></table></body></html>";
-        ContentHandler handler = new BodyContentHandler();
-        parser.parse(
-                new ByteArrayInputStream(test.getBytes("UTF-8")),
-                handler, new Metadata());
-        String content = handler.toString();
+        String content = Tika.parseToString(
+                new ByteArrayInputStream(test.getBytes("UTF-8")));
         assertTrue(content.contains("a"));
         assertTrue(content.contains("b"));
         assertFalse(content.contains("ab"));


Reply via email to