Author: jukka
Date: Tue Dec  2 15:34:44 2008
New Revision: 722673

URL: http://svn.apache.org/viewvc?rev=722673&view=rev
Log:
TIKA-172: New Open Document Parser that emits structured XHTML content

Use spaces instead of tabs for indentation.

Modified:
    
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/NSNormalizerContentHandler.java
    
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java
    
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeMetaParser.java
    
lucene/tika/trunk/src/main/java/org/apache/tika/sax/ElementMappingContentHandler.java
    lucene/tika/trunk/src/main/java/org/apache/tika/sax/TextContentHandler.java

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/NSNormalizerContentHandler.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/NSNormalizerContentHandler.java?rev=722673&r1=722672&r2=722673&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/NSNormalizerContentHandler.java
 (original)
+++ 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/NSNormalizerContentHandler.java
 Tue Dec  2 15:34:44 2008
@@ -37,50 +37,50 @@
  */
 public class NSNormalizerContentHandler extends ContentHandlerDecorator {
 
-       public NSNormalizerContentHandler(ContentHandler handler) {
-               super(handler);
-       }
-
-       private final String mapOldNS(String ns) {
-               if (ns==null) return null;
-               if (ns.startsWith("http://openoffice.org/2000/";))
-                       
ns="urn:oasis:names:tc:opendocument:xmlns:"+ns.substring(27)+":1.0";
-               return ns;
-       }
-       
-       @Override
-       public void startElement(String namespaceURI, String localName, String 
qName, Attributes atts) throws SAXException {
-               AttributesImpl natts = new AttributesImpl();
-               for (int i = 0; i < atts.getLength(); i++) {
-                       natts.addAttribute(
-                               mapOldNS(atts.getURI(i)), atts.getLocalName(i), 
atts.getQName(i),
-                               atts.getType(i), atts.getValue(i)
-                       );
-               }
-               super.startElement(mapOldNS(namespaceURI),localName,qName,atts);
-       }
-       
-       @Override
-       public void endElement(String namespaceURI, String localName, String 
qName) throws SAXException {
-               super.endElement(mapOldNS(namespaceURI),localName,qName);
-       }
-       
-       @Override
-       public void startPrefixMapping(String prefix, String uri) throws 
SAXException {
-               super.startPrefixMapping(prefix,mapOldNS(uri));
-       }
-
-       /** do not load any DTDs (may be requested by parser). Fake the DTD by 
returning a empty string as InputSource */
-       @Override
-       public InputSource resolveEntity(String publicId, String systemId) 
throws IOException,SAXException {
-               if (
-                       "-//OpenOffice.org//DTD OfficeDocument 
1.0//EN".equals(publicId) ||
-                       (systemId!=null && 
systemId.toLowerCase().endsWith(".dtd"))
-               ) {
-                       return new InputSource(new StringReader(""));
-               } else {
-                       return super.resolveEntity(publicId,systemId);
-               }
-       }
-       
+    public NSNormalizerContentHandler(ContentHandler handler) {
+        super(handler);
+    }
+
+    private final String mapOldNS(String ns) {
+        if (ns==null) return null;
+        if (ns.startsWith("http://openoffice.org/2000/";))
+            
ns="urn:oasis:names:tc:opendocument:xmlns:"+ns.substring(27)+":1.0";
+        return ns;
+    }
+
+    @Override
+    public void startElement(String namespaceURI, String localName, String 
qName, Attributes atts) throws SAXException {
+        AttributesImpl natts = new AttributesImpl();
+        for (int i = 0; i < atts.getLength(); i++) {
+            natts.addAttribute(
+                    mapOldNS(atts.getURI(i)), atts.getLocalName(i), 
atts.getQName(i),
+                    atts.getType(i), atts.getValue(i)
+            );
+        }
+        super.startElement(mapOldNS(namespaceURI),localName,qName,atts);
+    }
+
+    @Override
+    public void endElement(String namespaceURI, String localName, String 
qName) throws SAXException {
+        super.endElement(mapOldNS(namespaceURI),localName,qName);
+    }
+
+    @Override
+    public void startPrefixMapping(String prefix, String uri) throws 
SAXException {
+        super.startPrefixMapping(prefix,mapOldNS(uri));
+    }
+
+    /** do not load any DTDs (may be requested by parser). Fake the DTD by 
returning a empty string as InputSource */
+    @Override
+    public InputSource resolveEntity(String publicId, String systemId) throws 
IOException,SAXException {
+        if (
+                "-//OpenOffice.org//DTD OfficeDocument 
1.0//EN".equals(publicId) ||
+                (systemId!=null && systemId.toLowerCase().endsWith(".dtd"))
+        ) {
+            return new InputSource(new StringReader(""));
+        } else {
+            return super.resolveEntity(publicId,systemId);
+        }
+    }
+
 }

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java?rev=722673&r1=722672&r2=722673&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java
 (original)
+++ 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java
 Tue Dec  2 15:34:44 2008
@@ -47,146 +47,146 @@
  */
 public class OpenOfficeContentParser implements Parser {
 
-       public static final String 
TEXT_NS="urn:oasis:names:tc:opendocument:xmlns:text:1.0";
-       public static final String 
TABLE_NS="urn:oasis:names:tc:opendocument:xmlns:table:1.0";
-       public static final String XLINK_NS="http://www.w3.org/1999/xlink";;
-       
-       protected static final char[] TAB=new char[]{'\t'};
-
-       /**
-        * Mappings between OpenDocument tag names and XHTML tag names 
(including attributes).
-        * All other tag names/attributes are ignored and left out from event 
stream. 
-        */
-       private static final HashMap<QName,TargetElement> MAPPINGS=new 
HashMap<QName,TargetElement>();
-       static {
-               // general mappings of text:-tags
-               MAPPINGS.put(new QName(TEXT_NS,"p"), new 
TargetElement(XHTML,"p"));
-               // text:h-tags are mapped specifically in 
startElement/endElement
-               MAPPINGS.put(new QName(TEXT_NS,"line-break"), new 
TargetElement(XHTML,"br"));
-               MAPPINGS.put(new QName(TEXT_NS,"list"), new 
TargetElement(XHTML,"ul"));
-               MAPPINGS.put(new QName(TEXT_NS,"list-item"), new 
TargetElement(XHTML,"li"));
-               MAPPINGS.put(new QName(TEXT_NS,"note"), new 
TargetElement(XHTML,"div"));
-               MAPPINGS.put(new QName(TEXT_NS,"span"), new 
TargetElement(XHTML,"span"));
-               MAPPINGS.put(new QName(TEXT_NS,"a"),new TargetElement(XHTML,"a",
-                       Collections.singletonMap(new QName(XLINK_NS,"href"), 
new QName("href"))
-               ));
-               
-               // create HTML tables from table:-tags
-               MAPPINGS.put(new QName(TABLE_NS,"table"), new 
TargetElement(XHTML,"table"));
-               // repeating of rows is ignored; for columns, see below!
-               MAPPINGS.put(new QName(TABLE_NS,"table-row"), new 
TargetElement(XHTML,"tr"));
-               // special mapping for rowspan/colspan attributes
-               final HashMap<QName,QName> tableCellAttsMapping=new 
HashMap<QName,QName>();
-               tableCellAttsMapping.put(new 
QName(TABLE_NS,"number-columns-spanned"),new QName("colspan"));
-               tableCellAttsMapping.put(new 
QName(TABLE_NS,"number-rows-spanned"),new QName("rowspan"));
-               /* TODO: The following is not correct, the cell should be 
repeated not spanned!
-                * Code generates a HTML cell, spanning all repeated columns, 
to make the cell look correct.
-                * Problems may occur when both spanning and repeating is 
given, which is not allowed by spec.
-                * Cell spanning instead of repeating  is not a problem, 
because OpenOffice uses it
-                * only for empty cells.
-                */
-               tableCellAttsMapping.put(new 
QName(TABLE_NS,"number-columns-repeated"),new QName("colspan"));
-               MAPPINGS.put(new QName(TABLE_NS,"table-cell"), new 
TargetElement(XHTML,"td",tableCellAttsMapping));
-       }
-               
-       public void parse(InputStream stream, ContentHandler handler, Metadata 
metadata)
-               throws IOException, SAXException, TikaException {
-
-               final XHTMLContentHandler xhtml = new 
XHTMLContentHandler(handler,metadata);
-               final DefaultHandler dh = new 
ElementMappingContentHandler(xhtml, MAPPINGS) {
-                       private final BitSet textNodeStack=new BitSet();
-                       private int nodeDepth=0,completelyFiltered=0;
-                       private Stack<String> headingStack=new Stack<String>();
-                       
-                       @Override
-                       public void characters(char[] ch, int start, int 
length) throws SAXException {
-                               // only forward content of tags from 
text:-namespace
-                               if (completelyFiltered==0 && nodeDepth>0 && 
textNodeStack.get(nodeDepth-1))
-                                       super.characters(ch,start,length);
-                       }
-                       
-                       // helper for checking tags which need complete 
filtering (with sub-tags)
-                       private final boolean needsCompleteFiltering(String 
namespaceURI, String localName) {
-                               return (
-                                       (TEXT_NS.equals(namespaceURI) && 
(localName.endsWith("-template") || localName.endsWith("-style"))) ||
-                                       (TABLE_NS.equals(namespaceURI) && 
"covered-table-cell".equals(localName))
-                               );
-                       }
-                       
-                       // map the heading level to <hX> HTML tags
-                       private final String getXHTMLHeaderTagName(Attributes 
atts) {
-                               final String 
depthStr=atts.getValue(TEXT_NS,"outline-level");
-                               if (depthStr==null) return "h1";
-                               int depth=Integer.parseInt(depthStr);
-                               if (depth>6) depth=6;
-                               if (depth<1) depth=1;
-                               return "h"+depth;
-                       }
-                       
-                       @Override
-                       public void startElement(String namespaceURI, String 
localName, String qName, Attributes atts) throws SAXException {
-                               // keep track of current node type. If it is a 
text node, a bit at the current depth ist set in textNodeStack.
-                               // characters() checks the top bit to 
determine, if the actual node is a text node to print out
-                               // nodeDepth contains the depth of the current 
node and also marks top of stack.
-                               assert nodeDepth>=0;
-                               textNodeStack.set(nodeDepth++, 
TEXT_NS.equals(namespaceURI));
-                               // filter *all* content of some tags
-                               assert completelyFiltered>=0;
-                               if 
(needsCompleteFiltering(namespaceURI,localName)) completelyFiltered++;
-                               // call next handler if no filtering
-                               if (completelyFiltered==0) {
-                                       // special handling of text:h, that are 
directly passed to xhtml handler
-                                       if (TEXT_NS.equals(namespaceURI) && 
"h".equals(localName)) {
-                                               
xhtml.startElement(headingStack.push(getXHTMLHeaderTagName(atts)));
-                                       } else {
-                                               
super.startElement(namespaceURI,localName,qName,atts);
-                                       }
-                               }
-                       }
-                       
-                       @Override
-                       public void endElement(String namespaceURI, String 
localName, String qName) throws SAXException {
-                               // call next handler if no filtering
-                               if (completelyFiltered==0) {
-                                       // special handling of text:h, that are 
directly passed to xhtml handler
-                                       if (TEXT_NS.equals(namespaceURI) && 
"h".equals(localName)) {
-                                               
xhtml.endElement(headingStack.pop());
-                                       } else {
-                                               
super.endElement(namespaceURI,localName,qName);
-                                       }
-                                       // special handling of tabulators
-                                       if (TEXT_NS.equals(namespaceURI) && 
("tab-stop".equals(localName) || "tab".equals(localName)))
-                                               
this.characters(TAB,0,TAB.length);
-                               }
-                               // revert filter for *all* content of some tags
-                               if 
(needsCompleteFiltering(namespaceURI,localName)) completelyFiltered--;
-                               assert completelyFiltered>=0;
-                               // reduce current node depth
-                               nodeDepth--;
-                               assert nodeDepth>=0;
-                       }
-                       
-                       @Override
-                       public void startPrefixMapping(String prefix, String 
uri) throws SAXException {
-                               // remove prefix mappings as they should not 
occur in XHTML
-                       }
-                       
-                       @Override
-                       public void endPrefixMapping(String prefix) throws 
SAXException {
-                               // remove prefix mappings as they should not 
occur in XHTML
-                       }
-                       
-               };
-               
-               try {
-                       SAXParserFactory factory = 
SAXParserFactory.newInstance();
-                       factory.setValidating(false);
-                       factory.setNamespaceAware(true);
-                       SAXParser parser = factory.newSAXParser();
-                       parser.parse(new CloseShieldInputStream(stream),new 
NSNormalizerContentHandler(dh));
-               } catch (ParserConfigurationException e) {
-                       throw new TikaException("XML parser configuration 
error", e);
-               }
-       }
+    public static final String 
TEXT_NS="urn:oasis:names:tc:opendocument:xmlns:text:1.0";
+    public static final String 
TABLE_NS="urn:oasis:names:tc:opendocument:xmlns:table:1.0";
+    public static final String XLINK_NS="http://www.w3.org/1999/xlink";;
+
+    protected static final char[] TAB=new char[]{'\t'};
+
+    /**
+     * Mappings between OpenDocument tag names and XHTML tag names (including 
attributes).
+     * All other tag names/attributes are ignored and left out from event 
stream. 
+     */
+    private static final HashMap<QName,TargetElement> MAPPINGS=new 
HashMap<QName,TargetElement>();
+    static {
+        // general mappings of text:-tags
+        MAPPINGS.put(new QName(TEXT_NS,"p"), new TargetElement(XHTML,"p"));
+        // text:h-tags are mapped specifically in startElement/endElement
+        MAPPINGS.put(new QName(TEXT_NS,"line-break"), new 
TargetElement(XHTML,"br"));
+        MAPPINGS.put(new QName(TEXT_NS,"list"), new TargetElement(XHTML,"ul"));
+        MAPPINGS.put(new QName(TEXT_NS,"list-item"), new 
TargetElement(XHTML,"li"));
+        MAPPINGS.put(new QName(TEXT_NS,"note"), new 
TargetElement(XHTML,"div"));
+        MAPPINGS.put(new QName(TEXT_NS,"span"), new 
TargetElement(XHTML,"span"));
+        MAPPINGS.put(new QName(TEXT_NS,"a"),new TargetElement(XHTML,"a",
+                Collections.singletonMap(new QName(XLINK_NS,"href"), new 
QName("href"))
+        ));
+
+        // create HTML tables from table:-tags
+        MAPPINGS.put(new QName(TABLE_NS,"table"), new 
TargetElement(XHTML,"table"));
+        // repeating of rows is ignored; for columns, see below!
+        MAPPINGS.put(new QName(TABLE_NS,"table-row"), new 
TargetElement(XHTML,"tr"));
+        // special mapping for rowspan/colspan attributes
+        final HashMap<QName,QName> tableCellAttsMapping=new 
HashMap<QName,QName>();
+        tableCellAttsMapping.put(new 
QName(TABLE_NS,"number-columns-spanned"),new QName("colspan"));
+        tableCellAttsMapping.put(new QName(TABLE_NS,"number-rows-spanned"),new 
QName("rowspan"));
+        /* TODO: The following is not correct, the cell should be repeated not 
spanned!
+         * Code generates a HTML cell, spanning all repeated columns, to make 
the cell look correct.
+         * Problems may occur when both spanning and repeating is given, which 
is not allowed by spec.
+         * Cell spanning instead of repeating  is not a problem, because 
OpenOffice uses it
+         * only for empty cells.
+         */
+        tableCellAttsMapping.put(new 
QName(TABLE_NS,"number-columns-repeated"),new QName("colspan"));
+        MAPPINGS.put(new QName(TABLE_NS,"table-cell"), new 
TargetElement(XHTML,"td",tableCellAttsMapping));
+    }
+
+    public void parse(InputStream stream, ContentHandler handler, Metadata 
metadata)
+    throws IOException, SAXException, TikaException {
+
+        final XHTMLContentHandler xhtml = new 
XHTMLContentHandler(handler,metadata);
+        final DefaultHandler dh = new ElementMappingContentHandler(xhtml, 
MAPPINGS) {
+            private final BitSet textNodeStack=new BitSet();
+            private int nodeDepth=0,completelyFiltered=0;
+            private Stack<String> headingStack=new Stack<String>();
+
+            @Override
+            public void characters(char[] ch, int start, int length) throws 
SAXException {
+                // only forward content of tags from text:-namespace
+                if (completelyFiltered==0 && nodeDepth>0 && 
textNodeStack.get(nodeDepth-1))
+                    super.characters(ch,start,length);
+            }
+
+            // helper for checking tags which need complete filtering (with 
sub-tags)
+            private final boolean needsCompleteFiltering(String namespaceURI, 
String localName) {
+                return (
+                        (TEXT_NS.equals(namespaceURI) && 
(localName.endsWith("-template") || localName.endsWith("-style"))) ||
+                        (TABLE_NS.equals(namespaceURI) && 
"covered-table-cell".equals(localName))
+                );
+            }
+
+            // map the heading level to <hX> HTML tags
+            private final String getXHTMLHeaderTagName(Attributes atts) {
+                final String depthStr=atts.getValue(TEXT_NS,"outline-level");
+                if (depthStr==null) return "h1";
+                int depth=Integer.parseInt(depthStr);
+                if (depth>6) depth=6;
+                if (depth<1) depth=1;
+                return "h"+depth;
+            }
+
+            @Override
+            public void startElement(String namespaceURI, String localName, 
String qName, Attributes atts) throws SAXException {
+                // keep track of current node type. If it is a text node, a 
bit at the current depth ist set in textNodeStack.
+                // characters() checks the top bit to determine, if the actual 
node is a text node to print out
+                // nodeDepth contains the depth of the current node and also 
marks top of stack.
+                assert nodeDepth>=0;
+                textNodeStack.set(nodeDepth++, TEXT_NS.equals(namespaceURI));
+                // filter *all* content of some tags
+                assert completelyFiltered>=0;
+                if (needsCompleteFiltering(namespaceURI,localName)) 
completelyFiltered++;
+                // call next handler if no filtering
+                if (completelyFiltered==0) {
+                    // special handling of text:h, that are directly passed to 
xhtml handler
+                    if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) 
{
+                        
xhtml.startElement(headingStack.push(getXHTMLHeaderTagName(atts)));
+                    } else {
+                        super.startElement(namespaceURI,localName,qName,atts);
+                    }
+                }
+            }
+
+            @Override
+            public void endElement(String namespaceURI, String localName, 
String qName) throws SAXException {
+                // call next handler if no filtering
+                if (completelyFiltered==0) {
+                    // special handling of text:h, that are directly passed to 
xhtml handler
+                    if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) 
{
+                        xhtml.endElement(headingStack.pop());
+                    } else {
+                        super.endElement(namespaceURI,localName,qName);
+                    }
+                    // special handling of tabulators
+                    if (TEXT_NS.equals(namespaceURI) && 
("tab-stop".equals(localName) || "tab".equals(localName)))
+                        this.characters(TAB,0,TAB.length);
+                }
+                // revert filter for *all* content of some tags
+                if (needsCompleteFiltering(namespaceURI,localName)) 
completelyFiltered--;
+                assert completelyFiltered>=0;
+                // reduce current node depth
+                nodeDepth--;
+                assert nodeDepth>=0;
+            }
+
+            @Override
+            public void startPrefixMapping(String prefix, String uri) throws 
SAXException {
+                // remove prefix mappings as they should not occur in XHTML
+            }
+
+            @Override
+            public void endPrefixMapping(String prefix) throws SAXException {
+                // remove prefix mappings as they should not occur in XHTML
+            }
+
+        };
+
+        try {
+            SAXParserFactory factory = SAXParserFactory.newInstance();
+            factory.setValidating(false);
+            factory.setNamespaceAware(true);
+            SAXParser parser = factory.newSAXParser();
+            parser.parse(new CloseShieldInputStream(stream),new 
NSNormalizerContentHandler(dh));
+        } catch (ParserConfigurationException e) {
+            throw new TikaException("XML parser configuration error", e);
+        }
+    }
 
 }

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeMetaParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeMetaParser.java?rev=722673&r1=722672&r2=722673&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeMetaParser.java
 (original)
+++ 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeMetaParser.java
 Tue Dec  2 15:34:44 2008
@@ -65,7 +65,7 @@
         dh = getStatistic(dh, md, "nbPara", "paragraph-count");
         dh = getStatistic(dh, md, "nbWord", "word-count");
         dh = getStatistic(dh, md, "nbCharacter", "character-count");
-               dh = new NSNormalizerContentHandler(dh);
+        dh = new NSNormalizerContentHandler(dh);
         return dh;
     }
 

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/sax/ElementMappingContentHandler.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/sax/ElementMappingContentHandler.java?rev=722673&r1=722672&r2=722673&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/main/java/org/apache/tika/sax/ElementMappingContentHandler.java
 (original)
+++ 
lucene/tika/trunk/src/main/java/org/apache/tika/sax/ElementMappingContentHandler.java
 Tue Dec  2 15:34:44 2008
@@ -34,82 +34,82 @@
  */
 public class ElementMappingContentHandler extends ContentHandlerDecorator {
 
-       private final Map<QName,TargetElement> mappings;
+    private final Map<QName,TargetElement> mappings;
+
+    public ElementMappingContentHandler(ContentHandler handler, 
Map<QName,TargetElement> mappings) {
+        super(handler);
+        this.mappings=mappings;
+    }
+
+    @Override
+    public void startElement(String namespaceURI, String localName, String 
qName, Attributes atts) throws SAXException {
+        final TargetElement mapping=mappings.get(new 
QName(namespaceURI,localName));
+        if (mapping!=null) {
+            final QName tag=mapping.getMappedTagName();
+            
super.startElement(tag.getNamespaceURI(),tag.getLocalPart(),getQNameAsString(tag),mapping.mapAttributes(atts));
+        }
+    }
+
+    @Override
+    public void endElement(String namespaceURI, String localName, String 
qName) throws SAXException {
+        final TargetElement mapping=mappings.get(new 
QName(namespaceURI,localName));
+        if (mapping!=null) {
+            final QName tag=mapping.getMappedTagName();
+            
super.endElement(tag.getNamespaceURI(),tag.getLocalPart(),getQNameAsString(tag));
+        }
+    }
+
+    protected static final String getQNameAsString(final QName qname) {
+        final StringBuilder qn=new StringBuilder(qname.getPrefix());
+        if (qn.length()>0) qn.append(':');
+        return qn.append(qname.getLocalPart()).toString();
+    }
+
+    public static class TargetElement {
+
+        /** Creates an TargetElement, attributes of this element will be 
mapped as specified */
+        public TargetElement(QName mappedTagName, Map<QName,QName> 
attributesMapping) {
+            this.mappedTagName=mappedTagName;
+            this.attributesMapping=attributesMapping;
+        }
+
+        /** A shortcut that automatically creates the QName object */
+        public TargetElement(String mappedTagURI, String mappedTagLocalName, 
Map<QName,QName> attributesMapping) {
+            this(new QName(mappedTagURI,mappedTagLocalName), 
attributesMapping);
+        }
+
+        /** Creates an TargetElement with no attributes, all attributes will 
be deleted from SAX stream */
+        public TargetElement(QName mappedTagName) {
+            this(mappedTagName, Collections.<QName,QName>emptyMap());
+        }
+
+        /** A shortcut that automatically creates the QName object */
+        public TargetElement(String mappedTagURI, String mappedTagLocalName) {
+            this(mappedTagURI, mappedTagLocalName, 
Collections.<QName,QName>emptyMap());
+        }
+
+        public QName getMappedTagName() {
+            return mappedTagName;
+        }
+
+        public Map<QName,QName> getAttributesMapping() {
+            return attributesMapping;
+        }
+
+        public Attributes mapAttributes(final Attributes atts) {
+            final AttributesImpl natts = new AttributesImpl();
+            for (int i = 0; i < atts.getLength(); i++) {
+                QName name=attributesMapping.get(new QName(atts.getURI(i), 
atts.getLocalName(i)));
+                if (name!=null) natts.addAttribute(
+                        name.getNamespaceURI(), name.getLocalPart(), 
getQNameAsString(name),
+                        atts.getType(i), atts.getValue(i)
+                );
+            }
+            return natts;
+        }
+
+        private final QName mappedTagName;
+        private final Map<QName,QName> attributesMapping;
+    }
 
-       public ElementMappingContentHandler(ContentHandler handler, 
Map<QName,TargetElement> mappings) {
-               super(handler);
-               this.mappings=mappings;
-       }
-
-       @Override
-       public void startElement(String namespaceURI, String localName, String 
qName, Attributes atts) throws SAXException {
-               final TargetElement mapping=mappings.get(new 
QName(namespaceURI,localName));
-               if (mapping!=null) {
-                       final QName tag=mapping.getMappedTagName();
-                       
super.startElement(tag.getNamespaceURI(),tag.getLocalPart(),getQNameAsString(tag),mapping.mapAttributes(atts));
-               }
-       }
-
-       @Override
-       public void endElement(String namespaceURI, String localName, String 
qName) throws SAXException {
-               final TargetElement mapping=mappings.get(new 
QName(namespaceURI,localName));
-               if (mapping!=null) {
-                       final QName tag=mapping.getMappedTagName();
-                       
super.endElement(tag.getNamespaceURI(),tag.getLocalPart(),getQNameAsString(tag));
-               }
-       }
-       
-       protected static final String getQNameAsString(final QName qname) {
-               final StringBuilder qn=new StringBuilder(qname.getPrefix());
-               if (qn.length()>0) qn.append(':');
-               return qn.append(qname.getLocalPart()).toString();
-       }
-
-       public static class TargetElement {
-       
-               /** Creates an TargetElement, attributes of this element will 
be mapped as specified */
-               public TargetElement(QName mappedTagName, Map<QName,QName> 
attributesMapping) {
-                       this.mappedTagName=mappedTagName;
-                       this.attributesMapping=attributesMapping;
-               }
-               
-               /** A shortcut that automatically creates the QName object */
-               public TargetElement(String mappedTagURI, String 
mappedTagLocalName, Map<QName,QName> attributesMapping) {
-                       this(new QName(mappedTagURI,mappedTagLocalName), 
attributesMapping);
-               }
-               
-               /** Creates an TargetElement with no attributes, all attributes 
will be deleted from SAX stream */
-               public TargetElement(QName mappedTagName) {
-                       this(mappedTagName, 
Collections.<QName,QName>emptyMap());
-               }
-               
-               /** A shortcut that automatically creates the QName object */
-               public TargetElement(String mappedTagURI, String 
mappedTagLocalName) {
-                       this(mappedTagURI, mappedTagLocalName, 
Collections.<QName,QName>emptyMap());
-               }
-               
-               public QName getMappedTagName() {
-                       return mappedTagName;
-               }
-               
-               public Map<QName,QName> getAttributesMapping() {
-                       return attributesMapping;
-               }
-               
-               public Attributes mapAttributes(final Attributes atts) {
-                       final AttributesImpl natts = new AttributesImpl();
-                       for (int i = 0; i < atts.getLength(); i++) {
-                               QName name=attributesMapping.get(new 
QName(atts.getURI(i), atts.getLocalName(i)));
-                               if (name!=null) natts.addAttribute(
-                                       name.getNamespaceURI(), 
name.getLocalPart(), getQNameAsString(name),
-                                       atts.getType(i), atts.getValue(i)
-                               );
-                       }
-                       return natts;
-               }
-               
-               private final QName mappedTagName;
-               private final Map<QName,QName> attributesMapping;
-       }
-       
 }

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/sax/TextContentHandler.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/sax/TextContentHandler.java?rev=722673&r1=722672&r2=722673&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/sax/TextContentHandler.java 
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/sax/TextContentHandler.java 
Tue Dec  2 15:34:44 2008
@@ -47,7 +47,7 @@
     }
 
     @Override
-       public String toString() {
+    public String toString() {
         return delegate.toString();
     }
 


Reply via email to