Author: jukka
Date: Sun Jan 25 20:25:45 2009
New Revision: 737578
URL: http://svn.apache.org/viewvc?rev=737578&view=rev
Log:
TIKA-190: wrong handling of ignorableWhitespace/characters in
SafeContentHandler and WriteoutContentHandler
Patch by Uwe Schindler.
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/sax/SafeContentHandler.java
lucene/tika/trunk/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/sax/SafeContentHandler.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/sax/SafeContentHandler.java?rev=737578&r1=737577&r2=737578&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/sax/SafeContentHandler.java
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/sax/SafeContentHandler.java
Sun Jan 25 20:25:45 2009
@@ -70,7 +70,7 @@
private final Output ignorableWhitespaceOutput = new Output() {
public void write(char[] ch, int start, int length)
throws SAXException {
- SafeContentHandler.super.characters(ch, start, length);
+ SafeContentHandler.super.ignorableWhitespace(ch, start, length);
}
};
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java?rev=737578&r1=737577&r2=737578&view=diff
==============================================================================
---
lucene/tika/trunk/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
(original)
+++
lucene/tika/trunk/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
Sun Jan 25 20:25:45 2009
@@ -78,6 +78,20 @@
}
}
+
+ /**
+ * Writes the given ignorable characters to the given character stream.
+ */
+ @Override
+ public void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException {
+ try {
+ writer.write(ch, start, length);
+ } catch (IOException e) {
+ throw new SAXException("Error writing out character content", e);
+ }
+ }
+
/**
* Flushes the character stream so that no characters are forgotten
* in internal buffers.