Author: jukka
Date: Tue Mar 18 17:17:25 2008
New Revision: 638656
URL: http://svn.apache.org/viewvc?rev=638656&view=rev
Log:
TIKA-131: Lazy XHTML prefix generation
Modified:
incubator/tika/trunk/CHANGES.txt
incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
Modified: incubator/tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=638656&r1=638655&r2=638656&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Tue Mar 18 17:17:25 2008
@@ -29,6 +29,9 @@
12. TIKA-130 - self-or-descendant axis does not match self in streaming XPath
(Jukka Zitting)
+13. TIKA-131 - Lazy XHTML prefix generation (Jukka Zitting)
+
+
Release 0.1-incubating - 12/27/2007
1. TIKA-5 - Port Metadata Framework from Nutch (mattmann)
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java?rev=638656&r1=638655&r2=638656&view=diff
==============================================================================
---
incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
(original)
+++
incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
Tue Mar 18 17:17:25 2008
@@ -17,6 +17,7 @@
package org.apache.tika.sax;
import org.apache.tika.metadata.Metadata;
+import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
@@ -38,14 +39,29 @@
*/
private final Metadata metadata;
+ /**
+ * Flag to indicate whether the document element has been started.
+ */
+ private boolean started = false;
+
public XHTMLContentHandler(ContentHandler handler, Metadata metadata) {
super(handler);
this.metadata = metadata;
}
/**
- * Starts an XHTML document by setting up the namespace mappings and
- * writing following header:
+ * Starts an XHTML document by setting up the namespace mappings.
+ * The standard XHTML prefix is generated lazily when the first
+ * element is started.
+ */
+ @Override
+ public void startDocument() throws SAXException {
+ super.startDocument();
+ startPrefixMapping("", XHTML);
+ }
+
+ /**
+ * Generates the following XHTML prefix when called for the first time:
* <pre>
* <html>
* <head>
@@ -54,19 +70,20 @@
* <body>
* </pre>
*/
- public void startDocument() throws SAXException {
- super.startDocument();
- startPrefixMapping("", XHTML);
- startElement("html");
- startElement("head");
- startElement("title");
- String title = metadata.get(Metadata.TITLE);
- if (title != null && title.length() > 0) {
- characters(title);
+ private void lazyStartDocument() throws SAXException {
+ if (!started) {
+ started = true;
+ startElement("html");
+ startElement("head");
+ startElement("title");
+ String title = metadata.get(Metadata.TITLE);
+ if (title != null && title.length() > 0) {
+ characters(title);
+ }
+ endElement("title");
+ endElement("head");
+ startElement("body");
}
- endElement("title");
- endElement("head");
- startElement("body");
}
/**
@@ -77,11 +94,21 @@
* </html>
* </pre>
*/
+ @Override
public void endDocument() throws SAXException {
+ lazyStartDocument();
endElement("body");
endElement("html");
endPrefixMapping("");
super.endDocument();
+ }
+
+ @Override
+ public void startElement(
+ String uri, String local, String name, Attributes attributes)
+ throws SAXException {
+ lazyStartDocument();
+ super.startElement(uri, local, name, attributes);
}
public void startElement(String name) throws SAXException {