Author: jukka
Date: Fri Dec 27 03:31:22 2013
New Revision: 1553619

URL: http://svn.apache.org/r1553619
Log:
TIKA-1110: Incorrect declared SUPPORTED_TYPES in ChmParser

Fix the XHTML output of ChmParser and add a test case for proper integration 
with AutoDetectParser

Removed:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHM2XHTML.java
Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java?rev=1553619&r1=1553618&r2=1553619&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java 
(original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java 
Fri Dec 27 03:31:22 2013
@@ -28,6 +28,7 @@ import org.apache.tika.metadata.Metadata
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -50,13 +51,16 @@ public class ChmParser extends AbstractP
             Metadata metadata, ParseContext context) throws IOException,
             SAXException, TikaException {
         CHMDocumentInformation chmInfo = CHMDocumentInformation.load(stream);
-        metadata.set(Metadata.CONTENT_TYPE, "application/vnd.ms-htmlhelp");
-        extractMetadata(chmInfo, metadata);
-        CHM2XHTML.process(chmInfo, handler);
-    }
 
-    private void extractMetadata(CHMDocumentInformation chmInfo,
-            Metadata metadata) throws TikaException, IOException {
+        // metadata
+        metadata.set(Metadata.CONTENT_TYPE, "application/vnd.ms-htmlhelp");
         chmInfo.getCHMDocInformation(metadata);
+
+        // content
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.characters(chmInfo.getText());
+        xhtml.endDocument();
     }
+
 }

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=1553619&r1=1553618&r2=1553619&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
 Fri Dec 27 03:31:22 2013
@@ -49,6 +49,7 @@ public class AutoDetectParserTest {
     private static final String KEYNOTE    = "application/vnd.apple.keynote";
     private static final String PAGES      = "application/vnd.apple.pages";
     private static final String NUMBERS    = "application/vnd.apple.numbers";
+    private static final String CHM        = "application/vnd.ms-htmlhelp";
     private static final String RTF        = "application/rtf";
     private static final String PLAINTEXT  = "text/plain; charset=ISO-8859-1";
     private static final String UTF8TEXT   = "text/plain; charset=UTF-8";
@@ -165,6 +166,11 @@ public class AutoDetectParserTest {
     }
 
     @Test
+    public void testChm() throws Exception {
+        assertAutoDetect("testChm.chm", CHM, "If you do not specify a window 
type or a window name, the main window is used.");
+    }
+
+    @Test
     public void testEpub() throws Exception {
         assertAutoDetect(
                 "testEPUB.epub", "application/epub+zip",


Reply via email to