Author: jukka
Date: Sun Aug 21 12:56:21 2011
New Revision: 1159979

URL: http://svn.apache.org/viewvc?rev=1159979&view=rev
Log:
TIKA-692: TikaCLI -x or -h on a Word doc sometimes adds newline after </b> tag

Automatically pretty-print the <head> section generated by the 
XHTMLContentHandler

Modified:
    
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java?rev=1159979&r1=1159978&r2=1159979&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java 
Sun Aug 21 12:56:21 2011
@@ -128,7 +128,9 @@ public class XHTMLContentHandler extends
             // Call directly, so we don't go through our startElement(), which 
will
             // ignore these elements.
             super.startElement(XHTML, "html", "html", EMPTY_ATTRIBUTES);
+            newline();
             super.startElement(XHTML, "head", "head", EMPTY_ATTRIBUTES);
+            newline();
         }
     }
 
@@ -165,6 +167,7 @@ public class XHTMLContentHandler extends
                         attributes.addAttribute("", "content", "content", 
"CDATA", value);
                         super.startElement(XHTML, "meta", "meta", attributes);
                         super.endElement(XHTML, "meta", "meta");
+                        newline();
                     }
                 }
             }
@@ -175,10 +178,11 @@ public class XHTMLContentHandler extends
                 char[] titleChars = title.toCharArray();
                 super.characters(titleChars, 0, titleChars.length);
             }
-            
             super.endElement(XHTML, "title", "title");
+            newline();
             
             super.endElement(XHTML, "head", "head");
+            newline();
             
             if (useFrameset) {
                 super.startElement(XHTML, "frameset", "frameset", 
EMPTY_ATTRIBUTES);


Reply via email to