Author: kkrugler
Date: Thu Aug 12 22:54:57 2010
New Revision: 985028

URL: http://svn.apache.org/viewvc?rev=985028&view=rev
Log:
TIKA-478: Fix up missing end </body> and </html> tags for document with no real 
content.

Modified:
    
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java?rev=985028&r1=985027&r2=985028&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java 
Thu Aug 12 22:54:57 2010
@@ -188,8 +188,9 @@ public class XHTMLContentHandler extends
     public void endDocument() throws SAXException {
         lazyEndHead();
         
-        endElement("body");
-        endElement("html");
+        super.endElement(XHTML, "body", "body");
+        super.endElement(XHTML, "html", "html");
+        
         endPrefixMapping("");
         super.endDocument();
     }

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java?rev=985028&r1=985027&r2=985028&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
 Thu Aug 12 22:54:57 2010
@@ -444,7 +444,12 @@ public class HtmlParserTest extends Test
         
         // link element should be in <head> section
         assertTrue(Pattern.matches("(?s)<html.*<head>.*<link .*</head>.*$", 
result));
+        
+        // There should be ending elements.
+        assertTrue(Pattern.matches("(?s).*</body>.*</html>$", result));
+
     }
 
 
+
 }


Reply via email to