Author: tpalsulich
Date: Mon Apr 13 16:23:49 2015
New Revision: 1673236

URL: http://svn.apache.org/r1673236
Log:
TIKA-1600. Reformat ODF Parser files and move OpenDocumentParserTest tests to 
ODFParserTest.

Removed:
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/odf/OpenDocumentParserTest.java
Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java?rev=1673236&r1=1673235&r2=1673236&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
 Mon Apr 13 16:23:49 2015
@@ -36,13 +36,13 @@ import org.xml.sax.helpers.AttributesImp
 public class NSNormalizerContentHandler extends ContentHandlerDecorator {
 
     private static final String OLD_NS =
-        "http://openoffice.org/2000/";;
+            "http://openoffice.org/2000/";;
 
     private static final String NEW_NS =
-        "urn:oasis:names:tc:opendocument:xmlns:";
+            "urn:oasis:names:tc:opendocument:xmlns:";
 
     private static final String DTD_PUBLIC_ID =
-        "-//OpenOffice.org//DTD OfficeDocument 1.0//EN";
+            "-//OpenOffice.org//DTD OfficeDocument 1.0//EN";
 
     public NSNormalizerContentHandler(ContentHandler handler) {
         super(handler);

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java?rev=1673236&r1=1673235&r2=1673236&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
 Mon Apr 13 16:23:49 2015
@@ -16,7 +16,11 @@
  */
 package org.apache.tika.parser.odf;
 
-import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
+import javax.xml.XMLConstants;
+import javax.xml.namespace.QName;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -27,12 +31,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.Stack;
 
-import javax.xml.XMLConstants;
-import javax.xml.namespace.QName;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.CloseShieldInputStream;
 import org.apache.tika.metadata.Metadata;
@@ -50,6 +48,8 @@ import org.xml.sax.SAXNotRecognizedExcep
 import org.xml.sax.helpers.AttributesImpl;
 import org.xml.sax.helpers.DefaultHandler;
 
+import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
+
 /**
  * Parser for ODF <code>content.xml</code> files.
  */
@@ -62,115 +62,115 @@ public class OpenDocumentContentParser e
         public boolean bold;
         public boolean underlined;
     }
-    
+
     private static class ListStyle implements Style {
         public boolean ordered;
-        
+
         public String getTag() {
             return ordered ? "ol" : "ul";
         }
     }
 
     private static final class OpenDocumentElementMappingContentHandler extends
-                       ElementMappingContentHandler {
-               private final ContentHandler handler;
-               private final BitSet textNodeStack = new BitSet();
-               private int nodeDepth = 0;
-               private int completelyFiltered = 0;
-               private Stack<String> headingStack = new Stack<String>();
-               private Map<String, TextStyle> textStyleMap = new 
HashMap<String, TextStyle>();
+            ElementMappingContentHandler {
+        private final ContentHandler handler;
+        private final BitSet textNodeStack = new BitSet();
+        private int nodeDepth = 0;
+        private int completelyFiltered = 0;
+        private Stack<String> headingStack = new Stack<String>();
+        private Map<String, TextStyle> textStyleMap = new HashMap<String, 
TextStyle>();
         private Map<String, ListStyle> listStyleMap = new HashMap<String, 
ListStyle>();
         private TextStyle textStyle;
         private TextStyle lastTextStyle;
         private Stack<ListStyle> listStyleStack = new Stack<ListStyle>();
         private ListStyle listStyle;
 
-               private OpenDocumentElementMappingContentHandler(ContentHandler 
handler,
-                               Map<QName, TargetElement> mappings) {
-                       super(handler, mappings);
-                       this.handler = handler;
-               }
-
-               @Override
-               public void characters(char[] ch, int start, int length)
-                       throws SAXException {
-                   // only forward content of tags from text:-namespace
-                   if (completelyFiltered == 0 && nodeDepth > 0
-                           && textNodeStack.get(nodeDepth - 1)) {
-                       lazyEndSpan();
-                       super.characters(ch,start,length);
-                   }
-               }
-
-               // helper for checking tags which need complete filtering
-               // (with sub-tags)
-               private boolean needsCompleteFiltering(
-                       String namespaceURI, String localName) {
-                   if (TEXT_NS.equals(namespaceURI)) {
-                       return localName.endsWith("-template")
-                           || localName.endsWith("-style");
-                   }
+        private OpenDocumentElementMappingContentHandler(ContentHandler 
handler,
+                                                         Map<QName, 
TargetElement> mappings) {
+            super(handler, mappings);
+            this.handler = handler;
+        }
+
+        @Override
+        public void characters(char[] ch, int start, int length)
+                throws SAXException {
+            // only forward content of tags from text:-namespace
+            if (completelyFiltered == 0 && nodeDepth > 0
+                    && textNodeStack.get(nodeDepth - 1)) {
+                lazyEndSpan();
+                super.characters(ch, start, length);
+            }
+        }
+
+        // helper for checking tags which need complete filtering
+        // (with sub-tags)
+        private boolean needsCompleteFiltering(
+                String namespaceURI, String localName) {
+            if (TEXT_NS.equals(namespaceURI)) {
+                return localName.endsWith("-template")
+                        || localName.endsWith("-style");
+            }
             return TABLE_NS.equals(namespaceURI) && 
"covered-table-cell".equals(localName);
-               }
+        }
+
+        // map the heading level to <hX> HTML tags
+        private String getXHTMLHeaderTagName(Attributes atts) {
+            String depthStr = atts.getValue(TEXT_NS, "outline-level");
+            if (depthStr == null) {
+                return "h1";
+            }
+
+            int depth = Integer.parseInt(depthStr);
+            if (depth >= 6) {
+                return "h6";
+            } else if (depth <= 1) {
+                return "h1";
+            } else {
+                return "h" + depth;
+            }
+        }
 
-               // map the heading level to <hX> HTML tags
-               private String getXHTMLHeaderTagName(Attributes atts) {
-                   String depthStr = atts.getValue(TEXT_NS, "outline-level");
-                   if (depthStr == null) {
-                       return "h1";
-                   }
-
-                   int depth = Integer.parseInt(depthStr);
-                   if (depth >= 6) {
-                       return "h6";
-                   } else if (depth <= 1) {
-                       return "h1";
-                   } else {
-                       return "h" + depth;
-                   }
-               }
-
-               /**
-                * Check if a node is a text node
-                */
-               private boolean isTextNode(String namespaceURI, String 
localName) {
-                   if (TEXT_NS.equals(namespaceURI) && 
!localName.equals("page-number") && !localName.equals("page-count")) {
-                       return true;
-                   }
-                   if (SVG_NS.equals(namespaceURI)) {
-                       return "title".equals(localName) ||
-                               "desc".equals(localName);
-                   }
-                   return false;
-               }
-
-               private void startList(String name) throws SAXException {
-                   String elementName = "ul";
-                   if (name != null) {
-                       ListStyle style = listStyleMap.get(name);
-                       elementName = style != null ? style.getTag() : "ul";
-                   listStyleStack.push(style);
-                   }
+        /**
+         * Check if a node is a text node
+         */
+        private boolean isTextNode(String namespaceURI, String localName) {
+            if (TEXT_NS.equals(namespaceURI) && 
!localName.equals("page-number") && !localName.equals("page-count")) {
+                return true;
+            }
+            if (SVG_NS.equals(namespaceURI)) {
+                return "title".equals(localName) ||
+                        "desc".equals(localName);
+            }
+            return false;
+        }
+
+        private void startList(String name) throws SAXException {
+            String elementName = "ul";
+            if (name != null) {
+                ListStyle style = listStyleMap.get(name);
+                elementName = style != null ? style.getTag() : "ul";
+                listStyleStack.push(style);
+            }
             handler.startElement(XHTML, elementName, elementName, 
EMPTY_ATTRIBUTES);
-               }
+        }
 
-               private void endList() throws SAXException {
+        private void endList() throws SAXException {
             String elementName = "ul";
             if (!listStyleStack.isEmpty()) {
                 ListStyle style = listStyleStack.pop();
                 elementName = style != null ? style.getTag() : "ul";
             }
             handler.endElement(XHTML, elementName, elementName);
-               }
+        }
 
-               private void startSpan(String name) throws SAXException {
-                   if (name == null) {
-                       return;
-                   }
+        private void startSpan(String name) throws SAXException {
+            if (name == null) {
+                return;
+            }
 
             TextStyle style = textStyleMap.get(name);
             if (style == null) {
-              return;
+                return;
             }
 
             // End tags that refer to no longer valid styles
@@ -197,17 +197,17 @@ public class OpenDocumentContentParser e
 
             textStyle = style;
             lastTextStyle = null;
-               }
+        }
 
-               private void endSpan() throws SAXException {
-                   lastTextStyle = textStyle;
-                   textStyle = null;
-               }
-               
-               private void lazyEndSpan() throws SAXException {
-                   if (lastTextStyle == null) {
-                       return;
-                   }
+        private void endSpan() throws SAXException {
+            lastTextStyle = textStyle;
+            textStyle = null;
+        }
+
+        private void lazyEndSpan() throws SAXException {
+            if (lastTextStyle == null) {
+                return;
+            }
 
             if (lastTextStyle.underlined) {
                 handler.endElement(XHTML, "u", "u");
@@ -220,175 +220,175 @@ public class OpenDocumentContentParser e
             }
 
             lastTextStyle = null;
-               }
+        }
+
+        @Override
+        public void startElement(
+                String namespaceURI, String localName, String qName,
+                Attributes attrs) throws SAXException {
+            // keep track of current node type. If it is a text node,
+            // a bit at the current depth its set in textNodeStack.
+            // characters() checks the top bit to determine, if the
+            // actual node is a text node to print out nodeDepth contains
+            // the depth of the current node and also marks top of stack.
+            assert nodeDepth >= 0;
 
-               @Override
-               public void startElement(
-                       String namespaceURI, String localName, String qName,
-                       Attributes attrs) throws SAXException {
-                   // keep track of current node type. If it is a text node,
-                   // a bit at the current depth its set in textNodeStack.
-                   // characters() checks the top bit to determine, if the
-                   // actual node is a text node to print out nodeDepth 
contains
-                   // the depth of the current node and also marks top of 
stack.
-                   assert nodeDepth >= 0;
-
-                   // Set styles
-                   if (STYLE_NS.equals(namespaceURI) && 
"style".equals(localName)) {
-                       String family = attrs.getValue(STYLE_NS, "family");
-                       if ("text".equals(family)) {
-                           textStyle = new TextStyle();
-                       String name = attrs.getValue(STYLE_NS, "name");
-                           textStyleMap.put(name, textStyle);
-                       }
-                   } else if (TEXT_NS.equals(namespaceURI) && 
"list-style".equals(localName)) {
-                       listStyle = new ListStyle();
+            // Set styles
+            if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
+                String family = attrs.getValue(STYLE_NS, "family");
+                if ("text".equals(family)) {
+                    textStyle = new TextStyle();
+                    String name = attrs.getValue(STYLE_NS, "name");
+                    textStyleMap.put(name, textStyle);
+                }
+            } else if (TEXT_NS.equals(namespaceURI) && 
"list-style".equals(localName)) {
+                listStyle = new ListStyle();
                 String name = attrs.getValue(STYLE_NS, "name");
                 listStyleMap.put(name, listStyle);
-                   } else if (textStyle != null && 
STYLE_NS.equals(namespaceURI)
-                           && "text-properties".equals(localName)) {
-                       String fontStyle = 
attrs.getValue(FORMATTING_OBJECTS_NS, "font-style");
-                       if ("italic".equals(fontStyle) || 
"oblique".equals(fontStyle)) {
-                           textStyle.italic = true;
-                       }
-                       String fontWeight = 
attrs.getValue(FORMATTING_OBJECTS_NS, "font-weight");
-                       if ("bold".equals(fontWeight) || 
"bolder".equals(fontWeight)
-                               || (fontWeight!=null && 
Character.isDigit(fontWeight.charAt(0))
-                               && Integer.valueOf(fontWeight) > 500)) {
-                           textStyle.bold = true;
-                       }
-                       String underlineStyle = attrs.getValue(STYLE_NS, 
"text-underline-style");
-                       if (underlineStyle != null) {
-                           textStyle.underlined = true;
-                       }
-                   } else if (listStyle != null && 
TEXT_NS.equals(namespaceURI)) {
-                       if ("list-level-style-bullet".equals(localName)) {
-                           listStyle.ordered = false;
-                       } else if ("list-level-style-number".equals(localName)) 
{
-                           listStyle.ordered = true;
-                       }
-                   }
-
-                   textNodeStack.set(nodeDepth++, 
-                           isTextNode(namespaceURI, localName));
-                   // filter *all* content of some tags
-                   assert completelyFiltered >= 0;
-
-                   if (needsCompleteFiltering(namespaceURI, localName)) {
-                       completelyFiltered++;
-                   }
-                   // call next handler if no filtering
-                   if (completelyFiltered == 0) {
-                       // special handling of text:h, that are directly passed
-                       // to incoming handler
-                       if (TEXT_NS.equals(namespaceURI) && 
"h".equals(localName)) {
-                           final String el = 
headingStack.push(getXHTMLHeaderTagName(attrs));
-                           handler.startElement(XHTMLContentHandler.XHTML, el, 
el, EMPTY_ATTRIBUTES);
-                       } else if (TEXT_NS.equals(namespaceURI) && 
"list".equals(localName)) {
-                           startList(attrs.getValue(TEXT_NS, "style-name"));
+            } else if (textStyle != null && STYLE_NS.equals(namespaceURI)
+                    && "text-properties".equals(localName)) {
+                String fontStyle = attrs.getValue(FORMATTING_OBJECTS_NS, 
"font-style");
+                if ("italic".equals(fontStyle) || "oblique".equals(fontStyle)) 
{
+                    textStyle.italic = true;
+                }
+                String fontWeight = attrs.getValue(FORMATTING_OBJECTS_NS, 
"font-weight");
+                if ("bold".equals(fontWeight) || "bolder".equals(fontWeight)
+                        || (fontWeight != null && 
Character.isDigit(fontWeight.charAt(0))
+                        && Integer.valueOf(fontWeight) > 500)) {
+                    textStyle.bold = true;
+                }
+                String underlineStyle = attrs.getValue(STYLE_NS, 
"text-underline-style");
+                if (underlineStyle != null) {
+                    textStyle.underlined = true;
+                }
+            } else if (listStyle != null && TEXT_NS.equals(namespaceURI)) {
+                if ("list-level-style-bullet".equals(localName)) {
+                    listStyle.ordered = false;
+                } else if ("list-level-style-number".equals(localName)) {
+                    listStyle.ordered = true;
+                }
+            }
+
+            textNodeStack.set(nodeDepth++,
+                    isTextNode(namespaceURI, localName));
+            // filter *all* content of some tags
+            assert completelyFiltered >= 0;
+
+            if (needsCompleteFiltering(namespaceURI, localName)) {
+                completelyFiltered++;
+            }
+            // call next handler if no filtering
+            if (completelyFiltered == 0) {
+                // special handling of text:h, that are directly passed
+                // to incoming handler
+                if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
+                    final String el = 
headingStack.push(getXHTMLHeaderTagName(attrs));
+                    handler.startElement(XHTMLContentHandler.XHTML, el, el, 
EMPTY_ATTRIBUTES);
+                } else if (TEXT_NS.equals(namespaceURI) && 
"list".equals(localName)) {
+                    startList(attrs.getValue(TEXT_NS, "style-name"));
                 } else if (TEXT_NS.equals(namespaceURI) && 
"span".equals(localName)) {
                     startSpan(attrs.getValue(TEXT_NS, "style-name"));
-                       } else {
-                           super.startElement(namespaceURI, localName, qName, 
attrs);
-                       }
-                   }
-               }
-
-               @Override
-               public void endElement(
-                       String namespaceURI, String localName, String qName)
-                       throws SAXException {
+                } else {
+                    super.startElement(namespaceURI, localName, qName, attrs);
+                }
+            }
+        }
+
+        @Override
+        public void endElement(
+                String namespaceURI, String localName, String qName)
+                throws SAXException {
             if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
                 textStyle = null;
             } else if (TEXT_NS.equals(namespaceURI) && 
"list-style".equals(localName)) {
                 listStyle = null;
             }
 
-                   // call next handler if no filtering
-                   if (completelyFiltered == 0) {
-                       // special handling of text:h, that are directly passed
-                       // to incoming handler
-                       if (TEXT_NS.equals(namespaceURI) && 
"h".equals(localName)) {
-                           final String el = headingStack.pop();
-                           handler.endElement(XHTMLContentHandler.XHTML, el, 
el);
-                       } else if (TEXT_NS.equals(namespaceURI) && 
"list".equals(localName)) {
-                           endList();
+            // call next handler if no filtering
+            if (completelyFiltered == 0) {
+                // special handling of text:h, that are directly passed
+                // to incoming handler
+                if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
+                    final String el = headingStack.pop();
+                    handler.endElement(XHTMLContentHandler.XHTML, el, el);
+                } else if (TEXT_NS.equals(namespaceURI) && 
"list".equals(localName)) {
+                    endList();
                 } else if (TEXT_NS.equals(namespaceURI) && 
"span".equals(localName)) {
                     endSpan();
-                       } else {
-                           if (TEXT_NS.equals(namespaceURI) && 
"p".equals(localName)) {
-                               lazyEndSpan();
-                           }
-                           super.endElement(namespaceURI,localName,qName);
-                       }
-
-                       // special handling of tabulators
-                       if (TEXT_NS.equals(namespaceURI)
-                               && ("tab-stop".equals(localName)
-                                       || "tab".equals(localName))) {
-                           this.characters(TAB, 0, TAB.length);
-                       }
-                   }
-
-                   // revert filter for *all* content of some tags
-                   if (needsCompleteFiltering(namespaceURI,localName)) {
-                       completelyFiltered--;
-                   }
-                   assert completelyFiltered >= 0;
-
-                   // reduce current node depth
-                   nodeDepth--;
-                   assert nodeDepth >= 0;
-               }
-
-               @Override
-               public void startPrefixMapping(String prefix, String uri) {
-                   // remove prefix mappings as they should not occur in XHTML
-               }
-
-               @Override
-               public void endPrefixMapping(String prefix) {
-                   // remove prefix mappings as they should not occur in XHTML
-               }
-       }
+                } else {
+                    if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) 
{
+                        lazyEndSpan();
+                    }
+                    super.endElement(namespaceURI, localName, qName);
+                }
+
+                // special handling of tabulators
+                if (TEXT_NS.equals(namespaceURI)
+                        && ("tab-stop".equals(localName)
+                        || "tab".equals(localName))) {
+                    this.characters(TAB, 0, TAB.length);
+                }
+            }
+
+            // revert filter for *all* content of some tags
+            if (needsCompleteFiltering(namespaceURI, localName)) {
+                completelyFiltered--;
+            }
+            assert completelyFiltered >= 0;
+
+            // reduce current node depth
+            nodeDepth--;
+            assert nodeDepth >= 0;
+        }
 
-       public static final String TEXT_NS =
-        "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
+        @Override
+        public void startPrefixMapping(String prefix, String uri) {
+            // remove prefix mappings as they should not occur in XHTML
+        }
+
+        @Override
+        public void endPrefixMapping(String prefix) {
+            // remove prefix mappings as they should not occur in XHTML
+        }
+    }
+
+    public static final String TEXT_NS =
+            "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
 
     public static final String TABLE_NS =
-        "urn:oasis:names:tc:opendocument:xmlns:table:1.0";
+            "urn:oasis:names:tc:opendocument:xmlns:table:1.0";
 
     public static final String STYLE_NS =
-        "urn:oasis:names:tc:opendocument:xmlns:style:1.0";
+            "urn:oasis:names:tc:opendocument:xmlns:style:1.0";
 
     public static final String FORMATTING_OBJECTS_NS =
-        "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0";
+            "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0";
 
     public static final String OFFICE_NS =
-        "urn:oasis:names:tc:opendocument:xmlns:office:1.0";
+            "urn:oasis:names:tc:opendocument:xmlns:office:1.0";
 
     public static final String SVG_NS =
-        "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0";
+            "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0";
 
     public static final String PRESENTATION_NS =
-        "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0";
+            "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0";
 
     public static final String DRAW_NS =
-        "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0";
+            "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0";
 
     public static final String XLINK_NS = "http://www.w3.org/1999/xlink";;
 
-    protected static final char[] TAB = new char[] { '\t' };
+    protected static final char[] TAB = new char[]{'\t'};
 
     private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
 
     /**
      * Mappings between ODF tag names and XHTML tag names
      * (including attributes). All other tag names/attributes are ignored
-     * and left out from event stream. 
+     * and left out from event stream.
      */
     private static final HashMap<QName, TargetElement> MAPPINGS =
-        new HashMap<QName, TargetElement>();
+            new HashMap<QName, TargetElement>();
 
     static {
         // general mappings of text:-tags
@@ -426,9 +426,9 @@ public class OpenDocumentContentParser e
         MAPPINGS.put(
                 new QName(TEXT_NS, "span"),
                 new TargetElement(XHTML, "span"));
-        
-        final HashMap<QName,QName> aAttsMapping =
-            new HashMap<QName,QName>();
+
+        final HashMap<QName, QName> aAttsMapping =
+                new HashMap<QName, QName>();
         aAttsMapping.put(
                 new QName(XLINK_NS, "href"),
                 new QName("href"));
@@ -448,8 +448,8 @@ public class OpenDocumentContentParser e
                 new QName(TABLE_NS, "table-row"),
                 new TargetElement(XHTML, "tr"));
         // special mapping for rowspan/colspan attributes
-        final HashMap<QName,QName> tableCellAttsMapping =
-            new HashMap<QName,QName>();
+        final HashMap<QName, QName> tableCellAttsMapping =
+                new HashMap<QName, QName>();
         tableCellAttsMapping.put(
                 new QName(TABLE_NS, "number-columns-spanned"),
                 new QName("colspan"));
@@ -479,8 +479,8 @@ public class OpenDocumentContentParser e
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
         parseInternal(stream,
-                      new XHTMLContentHandler(handler,metadata),
-                      metadata, context);
+                new XHTMLContentHandler(handler, metadata),
+                metadata, context);
     }
 
     void parseInternal(
@@ -496,7 +496,7 @@ public class OpenDocumentContentParser e
             factory.setNamespaceAware(true);
             try {
                 factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, 
true);
-            } catch (SAXNotRecognizedException e){
+            } catch (SAXNotRecognizedException e) {
                 // TIKA-329: Some XML parsers do not support the 
secure-processing
                 // feature, even though it's required by JAXP in Java 5. 
Ignoring
                 // the exception is fine here, deployments without this feature
@@ -513,4 +513,3 @@ public class OpenDocumentContentParser e
     }
 
 }
-       
\ No newline at end of file

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java?rev=1673236&r1=1673235&r2=1673236&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
 Mon Apr 13 16:23:49 2015
@@ -50,33 +50,33 @@ public class OpenDocumentMetaParser exte
      * Serial version UID
      */
     private static final long serialVersionUID = -8739250869531737584L;
-   
-    private static final String META_NS = 
"urn:oasis:names:tc:opendocument:xmlns:meta:1.0"; 
+
+    private static final String META_NS = 
"urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
     private static final XPathParser META_XPATH = new XPathParser("meta", 
META_NS);
-    
-    /** 
-     * @see OfficeOpenXMLCore#SUBJECT 
+
+    /**
+     * @see OfficeOpenXMLCore#SUBJECT
      * @deprecated use OfficeOpenXMLCore#SUBJECT
      */
     @Deprecated
-    private static final Property TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR 
= 
-        Property.composite(Office.INITIAL_AUTHOR, 
-            new Property[] { Property.externalText("initial-creator") });
-    
+    private static final Property TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR 
=
+            Property.composite(Office.INITIAL_AUTHOR,
+                    new Property[]{Property.externalText("initial-creator")});
+
     private static ContentHandler getDublinCoreHandler(
             Metadata metadata, Property property, String element) {
         return new ElementMetadataHandler(
                 DublinCore.NAMESPACE_URI_DC, element,
                 metadata, property);
     }
-    
+
     private static ContentHandler getMeta(
             ContentHandler ch, Metadata md, Property property, String element) 
{
         Matcher matcher = new CompositeMatcher(
                 META_XPATH.parse("//meta:" + element),
                 META_XPATH.parse("//meta:" + element + "//text()"));
         ContentHandler branch =
-            new MatchingContentHandler(new MetadataHandler(md, property), 
matcher);
+                new MatchingContentHandler(new MetadataHandler(md, property), 
matcher);
         return new TeeContentHandler(ch, branch);
     }
 
@@ -87,27 +87,29 @@ public class OpenDocumentMetaParser exte
                 META_XPATH.parse("//meta:user-defined//text()"));
         // eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> 
becomes custom:Info1=Text1
         ContentHandler branch = new MatchingContentHandler(
-              new AttributeDependantMetadataHandler(md, "meta:name", 
Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
-              matcher);
+                new AttributeDependantMetadataHandler(md, "meta:name", 
Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
+                matcher);
         return new TeeContentHandler(ch, branch);
     }
 
-    @Deprecated private static ContentHandler getStatistic(
+    @Deprecated
+    private static ContentHandler getStatistic(
             ContentHandler ch, Metadata md, String name, String attribute) {
         Matcher matcher =
-            META_XPATH.parse("//meta:document-statistic/@meta:"+attribute);
+                META_XPATH.parse("//meta:document-statistic/@meta:" + 
attribute);
         ContentHandler branch = new MatchingContentHandler(
-              new AttributeMetadataHandler(META_NS, attribute, md, name), 
matcher);
+                new AttributeMetadataHandler(META_NS, attribute, md, name), 
matcher);
         return new TeeContentHandler(ch, branch);
     }
+
     private static ContentHandler getStatistic(
-          ContentHandler ch, Metadata md, Property property, String attribute) 
{
-      Matcher matcher =
-          META_XPATH.parse("//meta:document-statistic/@meta:"+attribute);
-      ContentHandler branch = new MatchingContentHandler(
-            new AttributeMetadataHandler(META_NS, attribute, md, property), 
matcher);
-      return new TeeContentHandler(ch, branch);
-  }
+            ContentHandler ch, Metadata md, Property property, String 
attribute) {
+        Matcher matcher =
+                META_XPATH.parse("//meta:document-statistic/@meta:" + 
attribute);
+        ContentHandler branch = new MatchingContentHandler(
+                new AttributeMetadataHandler(META_NS, attribute, md, 
property), matcher);
+        return new TeeContentHandler(ch, branch);
+    }
 
     protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, 
ParseContext context) {
         // We can no longer extend DcXMLParser due to the handling of 
dc:subject and dc:date
@@ -123,48 +125,48 @@ public class OpenDocumentMetaParser exte
                 getDublinCoreHandler(md, TikaCoreProperties.IDENTIFIER, 
"identifier"),
                 getDublinCoreHandler(md, TikaCoreProperties.LANGUAGE, 
"language"),
                 getDublinCoreHandler(md, TikaCoreProperties.RIGHTS, "rights"));
-        
+
         // Process the OO Meta Attributes
         ch = getMeta(ch, md, TikaCoreProperties.CREATED, "creation-date");
         // ODF uses dc:date for modified
         ch = new TeeContentHandler(ch, new ElementMetadataHandler(
                 DublinCore.NAMESPACE_URI_DC, "date",
                 md, TikaCoreProperties.MODIFIED));
-        
+
         // ODF uses dc:subject for description
         ch = new TeeContentHandler(ch, new ElementMetadataHandler(
                 DublinCore.NAMESPACE_URI_DC, "subject",
                 md, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT));
         ch = getMeta(ch, md, 
TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT, "keyword");
-        
-        ch = getMeta(ch, md, Property.externalText(MSOffice.EDIT_TIME), 
"editing-duration");        
+
+        ch = getMeta(ch, md, Property.externalText(MSOffice.EDIT_TIME), 
"editing-duration");
         ch = getMeta(ch, md, Property.externalText("editing-cycles"), 
"editing-cycles");
         ch = getMeta(ch, md, TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR, 
"initial-creator");
         ch = getMeta(ch, md, Property.externalText("generator"), "generator");
-        
+
         // Process the user defined Meta Attributes
         ch = getUserDefined(ch, md);
-        
+
         // Process the OO Statistics Attributes
-        ch = getStatistic(ch, md, Office.OBJECT_COUNT,  "object-count");
-        ch = getStatistic(ch, md, Office.IMAGE_COUNT,   "image-count");
-        ch = getStatistic(ch, md, Office.PAGE_COUNT,    "page-count");
-        ch = getStatistic(ch, md, PagedText.N_PAGES,    "page-count");
-        ch = getStatistic(ch, md, Office.TABLE_COUNT,   "table-count");
+        ch = getStatistic(ch, md, Office.OBJECT_COUNT, "object-count");
+        ch = getStatistic(ch, md, Office.IMAGE_COUNT, "image-count");
+        ch = getStatistic(ch, md, Office.PAGE_COUNT, "page-count");
+        ch = getStatistic(ch, md, PagedText.N_PAGES, "page-count");
+        ch = getStatistic(ch, md, Office.TABLE_COUNT, "table-count");
         ch = getStatistic(ch, md, Office.PARAGRAPH_COUNT, "paragraph-count");
-        ch = getStatistic(ch, md, Office.WORD_COUNT,      "word-count");
+        ch = getStatistic(ch, md, Office.WORD_COUNT, "word-count");
         ch = getStatistic(ch, md, Office.CHARACTER_COUNT, "character-count");
-        
+
         // Legacy, Tika-1.0 style attributes
         // TODO Remove these in Tika 2.0
-        ch = getStatistic(ch, md, MSOffice.OBJECT_COUNT,  "object-count");
-        ch = getStatistic(ch, md, MSOffice.IMAGE_COUNT,   "image-count");
-        ch = getStatistic(ch, md, MSOffice.PAGE_COUNT,    "page-count");
-        ch = getStatistic(ch, md, MSOffice.TABLE_COUNT,   "table-count");
+        ch = getStatistic(ch, md, MSOffice.OBJECT_COUNT, "object-count");
+        ch = getStatistic(ch, md, MSOffice.IMAGE_COUNT, "image-count");
+        ch = getStatistic(ch, md, MSOffice.PAGE_COUNT, "page-count");
+        ch = getStatistic(ch, md, MSOffice.TABLE_COUNT, "table-count");
         ch = getStatistic(ch, md, MSOffice.PARAGRAPH_COUNT, "paragraph-count");
-        ch = getStatistic(ch, md, MSOffice.WORD_COUNT,      "word-count");
+        ch = getStatistic(ch, md, MSOffice.WORD_COUNT, "word-count");
         ch = getStatistic(ch, md, MSOffice.CHARACTER_COUNT, "character-count");
-        
+
         // Legacy Statistics Attributes, replaced with real keys above
         // TODO Remove these shortly, eg after Tika 1.1 (TIKA-770)
         ch = getStatistic(ch, md, "nbPage", "page-count");
@@ -174,12 +176,12 @@ public class OpenDocumentMetaParser exte
         ch = getStatistic(ch, md, "nbTab", "table-count");
         ch = getStatistic(ch, md, "nbObject", "object-count");
         ch = getStatistic(ch, md, "nbImg", "image-count");
-        
+
         // Normalise the rest
         ch = new NSNormalizerContentHandler(ch);
         return ch;
     }
-    
+
     @Override
     public void parse(
             InputStream stream, ContentHandler handler,
@@ -188,10 +190,10 @@ public class OpenDocumentMetaParser exte
         super.parse(stream, handler, metadata, context);
         // Copy subject to description for OO2
         String odfSubject = metadata.get(OfficeOpenXMLCore.SUBJECT);
-        if (odfSubject != null && !odfSubject.equals("") && 
+        if (odfSubject != null && !odfSubject.equals("") &&
                 (metadata.get(TikaCoreProperties.DESCRIPTION) == null || 
metadata.get(TikaCoreProperties.DESCRIPTION).equals(""))) {
             metadata.set(TikaCoreProperties.DESCRIPTION, odfSubject);
         }
     }
-    
+
 }

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java?rev=1673236&r1=1673235&r2=1673236&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
 Mon Apr 13 16:23:49 2015
@@ -46,47 +46,49 @@ import org.xml.sax.helpers.DefaultHandle
  */
 public class OpenDocumentParser extends AbstractParser {
 
-    /** Serial version UID */
+    /**
+     * Serial version UID
+     */
     private static final long serialVersionUID = -6410276875438618287L;
 
     private static final Set<MediaType> SUPPORTED_TYPES =
-        Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
-                MediaType.application("vnd.sun.xml.writer"),
-                MediaType.application("vnd.oasis.opendocument.text"),
-                MediaType.application("vnd.oasis.opendocument.graphics"),
-                MediaType.application("vnd.oasis.opendocument.presentation"),
-                MediaType.application("vnd.oasis.opendocument.spreadsheet"),
-                MediaType.application("vnd.oasis.opendocument.chart"),
-                MediaType.application("vnd.oasis.opendocument.image"),
-                MediaType.application("vnd.oasis.opendocument.formula"),
-                MediaType.application("vnd.oasis.opendocument.text-master"),
-                MediaType.application("vnd.oasis.opendocument.text-web"),
-                MediaType.application("vnd.oasis.opendocument.text-template"),
-                
MediaType.application("vnd.oasis.opendocument.graphics-template"),
-                
MediaType.application("vnd.oasis.opendocument.presentation-template"),
-                
MediaType.application("vnd.oasis.opendocument.spreadsheet-template"),
-                MediaType.application("vnd.oasis.opendocument.chart-template"),
-                MediaType.application("vnd.oasis.opendocument.image-template"),
-                
MediaType.application("vnd.oasis.opendocument.formula-template"),
-                MediaType.application("x-vnd.oasis.opendocument.text"),
-                MediaType.application("x-vnd.oasis.opendocument.graphics"),
-                MediaType.application("x-vnd.oasis.opendocument.presentation"),
-                MediaType.application("x-vnd.oasis.opendocument.spreadsheet"),
-                MediaType.application("x-vnd.oasis.opendocument.chart"),
-                MediaType.application("x-vnd.oasis.opendocument.image"),
-                MediaType.application("x-vnd.oasis.opendocument.formula"),
-                MediaType.application("x-vnd.oasis.opendocument.text-master"),
-                MediaType.application("x-vnd.oasis.opendocument.text-web"),
-                
MediaType.application("x-vnd.oasis.opendocument.text-template"),
-                
MediaType.application("x-vnd.oasis.opendocument.graphics-template"),
-                
MediaType.application("x-vnd.oasis.opendocument.presentation-template"),
-                
MediaType.application("x-vnd.oasis.opendocument.spreadsheet-template"),
-                
MediaType.application("x-vnd.oasis.opendocument.chart-template"),
-                
MediaType.application("x-vnd.oasis.opendocument.image-template"),
-                
MediaType.application("x-vnd.oasis.opendocument.formula-template"))));
+            Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+                    MediaType.application("vnd.sun.xml.writer"),
+                    MediaType.application("vnd.oasis.opendocument.text"),
+                    MediaType.application("vnd.oasis.opendocument.graphics"),
+                    
MediaType.application("vnd.oasis.opendocument.presentation"),
+                    
MediaType.application("vnd.oasis.opendocument.spreadsheet"),
+                    MediaType.application("vnd.oasis.opendocument.chart"),
+                    MediaType.application("vnd.oasis.opendocument.image"),
+                    MediaType.application("vnd.oasis.opendocument.formula"),
+                    
MediaType.application("vnd.oasis.opendocument.text-master"),
+                    MediaType.application("vnd.oasis.opendocument.text-web"),
+                    
MediaType.application("vnd.oasis.opendocument.text-template"),
+                    
MediaType.application("vnd.oasis.opendocument.graphics-template"),
+                    
MediaType.application("vnd.oasis.opendocument.presentation-template"),
+                    
MediaType.application("vnd.oasis.opendocument.spreadsheet-template"),
+                    
MediaType.application("vnd.oasis.opendocument.chart-template"),
+                    
MediaType.application("vnd.oasis.opendocument.image-template"),
+                    
MediaType.application("vnd.oasis.opendocument.formula-template"),
+                    MediaType.application("x-vnd.oasis.opendocument.text"),
+                    MediaType.application("x-vnd.oasis.opendocument.graphics"),
+                    
MediaType.application("x-vnd.oasis.opendocument.presentation"),
+                    
MediaType.application("x-vnd.oasis.opendocument.spreadsheet"),
+                    MediaType.application("x-vnd.oasis.opendocument.chart"),
+                    MediaType.application("x-vnd.oasis.opendocument.image"),
+                    MediaType.application("x-vnd.oasis.opendocument.formula"),
+                    
MediaType.application("x-vnd.oasis.opendocument.text-master"),
+                    MediaType.application("x-vnd.oasis.opendocument.text-web"),
+                    
MediaType.application("x-vnd.oasis.opendocument.text-template"),
+                    
MediaType.application("x-vnd.oasis.opendocument.graphics-template"),
+                    
MediaType.application("x-vnd.oasis.opendocument.presentation-template"),
+                    
MediaType.application("x-vnd.oasis.opendocument.spreadsheet-template"),
+                    
MediaType.application("x-vnd.oasis.opendocument.chart-template"),
+                    
MediaType.application("x-vnd.oasis.opendocument.image-template"),
+                    
MediaType.application("x-vnd.oasis.opendocument.formula-template"))));
 
     private static final String META_NAME = "meta.xml";
-    
+
     private Parser meta = new OpenDocumentMetaParser();
 
     private Parser content = new OpenDocumentContentParser();
@@ -126,7 +128,7 @@ public class OpenDocumentParser extends
             if (container instanceof ZipFile) {
                 zipFile = (ZipFile) container;
             } else if (tis.hasFile()) {
-                zipFile = new ZipFile(tis.getFile());                
+                zipFile = new ZipFile(tis.getFile());
             } else {
                 zipStream = new ZipInputStream(stream);
             }
@@ -139,9 +141,9 @@ public class OpenDocumentParser extends
 
         // As we don't know which of the metadata or the content
         //  we'll hit first, catch the endDocument call initially
-        EndDocumentShieldingContentHandler handler = 
-          new EndDocumentShieldingContentHandler(xhtml);
-        
+        EndDocumentShieldingContentHandler handler =
+                new EndDocumentShieldingContentHandler(xhtml);
+
         // If we can, process the metadata first, then the
         //  rest of the file afterwards
         // Only possible to guarantee that when opened from a file not a stream
@@ -153,7 +155,7 @@ public class OpenDocumentParser extends
             Enumeration<? extends ZipEntry> entries = zipFile.entries();
             while (entries.hasMoreElements()) {
                 entry = entries.nextElement();
-                if (! META_NAME.equals(entry.getName())) {
+                if (!META_NAME.equals(entry.getName())) {
                     handleZipEntry(entry, zipFile.getInputStream(entry), 
metadata, context, handler);
                 }
             }
@@ -165,18 +167,18 @@ public class OpenDocumentParser extends
             } while (entry != null);
             zipStream.close();
         }
-        
+
         // Only now call the end document
-        if(handler.getEndDocumentWasCalled()) {
-           handler.reallyEndDocument();
+        if (handler.getEndDocumentWasCalled()) {
+            handler.reallyEndDocument();
         }
     }
-    
-    private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata 
metadata, 
-            ParseContext context, EndDocumentShieldingContentHandler handler)
+
+    private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata 
metadata,
+                                ParseContext context, 
EndDocumentShieldingContentHandler handler)
             throws IOException, SAXException, TikaException {
         if (entry == null) return;
-        
+
         if (entry.getName().equals("mimetype")) {
             String type = IOUtils.toString(zip, IOUtils.UTF_8.name());
             metadata.set(Metadata.CONTENT_TYPE, type);

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java?rev=1673236&r1=1673235&r2=1673236&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
 Mon Apr 13 16:23:49 2015
@@ -374,6 +374,7 @@ public class ODFParserTest extends TikaT
         }
     }
 
+    // TIKA-1063: Test basic style support.
     @Test
     public void testODTStyles() throws Exception {
         String xml = getXML("testStyles.odt").xml;
@@ -384,4 +385,27 @@ public class ODFParserTest extends TikaT
         assertContains("<ul>\t<li><p>First</p>", xml);
         assertContains("</ul>", xml);
     }
+
+    //TIKA-1600: Test that null pointer doesn't break parsing.
+    @Test
+    public void testNullStylesInODTFooter() throws Exception {
+        Parser parser = new OpenDocumentParser();
+        InputStream input = 
ODFParserTest.class.getResourceAsStream("/test-documents/testODT-TIKA-6000.odt");
+        try {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            parser.parse(input, handler, metadata, new ParseContext());
+
+            assertEquals("application/vnd.oasis.opendocument.text", 
metadata.get(Metadata.CONTENT_TYPE));
+
+            String content = handler.toString();
+
+            assertContains("Utilisation de ce document", content);
+            assertContains("Copyright and License", content);
+            assertContains("Changer la langue", content);
+            assertContains("La page d’accueil permet de faire une recherche 
simple", content);
+        } finally {
+            input.close();
+        }
+    }
 }


Reply via email to