Repository: tika
Updated Branches:
  refs/heads/master 636060eb6 -> 8d29f7a62


TIKA-2030 - add processing for <text:s/> element in odt, thanks to David Pilato 
for identifying this.


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c0320f14
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c0320f14
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c0320f14

Branch: refs/heads/master
Commit: c0320f14194608d31b9ffaae9250f28c46017b75
Parents: 95b2cd1
Author: tballison <talli...@mitre.org>
Authored: Fri Jul 8 14:15:50 2016 -0400
Committer: tballison <talli...@mitre.org>
Committed: Fri Jul 8 14:15:50 2016 -0400

----------------------------------------------------------------------
 .../parser/odf/OpenDocumentContentParser.java   |   3 +++
 .../test-documents/testOpenOffice2.odt          | Bin 26448 -> 27554 bytes
 2 files changed, 3 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/c0320f14/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
index a32d406..b40ed27 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
@@ -68,6 +68,7 @@ public class OpenDocumentContentParser extends AbstractParser 
{
 
     private static final class OpenDocumentElementMappingContentHandler extends
             ElementMappingContentHandler {
+        private static final char[] SPACE = new char[]{ ' '};
         private final ContentHandler handler;
         private final BitSet textNodeStack = new BitSet();
         private int nodeDepth = 0;
@@ -283,6 +284,8 @@ public class OpenDocumentContentParser extends 
AbstractParser {
                     startList(attrs.getValue(TEXT_NS, "style-name"));
                 } else if (TEXT_NS.equals(namespaceURI) && 
"span".equals(localName)) {
                     startSpan(attrs.getValue(TEXT_NS, "style-name"));
+                } else if (TEXT_NS.equals(namespaceURI) && 
"s".equals(localName)) {
+                    handler.characters(SPACE, 0, 1);
                 } else {
                     super.startElement(namespaceURI, localName, qName, attrs);
                 }

http://git-wip-us.apache.org/repos/asf/tika/blob/c0320f14/tika-parsers/src/test/resources/test-documents/testOpenOffice2.odt
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/resources/test-documents/testOpenOffice2.odt 
b/tika-parsers/src/test/resources/test-documents/testOpenOffice2.odt
index bc31925..f6c72b6 100644
Binary files 
a/tika-parsers/src/test/resources/test-documents/testOpenOffice2.odt and 
b/tika-parsers/src/test/resources/test-documents/testOpenOffice2.odt differ

Reply via email to