stax: FOMBuilder.java FOMStAXFilter.java

veithen Sun, 12 Aug 2012 10:39:04 -0700

Author: veithen
Date: Sun Aug 12 17:38:13 2012
New Revision: 1372155

URL: http://svn.apache.org/viewvc?rev=1372155&view=rev
Log:
Started to decouple Abdera from the internals of the Axiom builder by moving 
the filtering logic (for whitespace, comments, etc.) out of FOMBuilder into an 
XMLStreamReader wrapper.


Added:
    
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java
   (with props)
Modified:
    
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java

Modified: 
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java
URL: 
http://svn.apache.org/viewvc/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java?rev=1372155&r1=1372154&r2=1372155&view=diff
==============================================================================
--- 
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java
 (original)
+++ 
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java
 Sun Aug 12 17:38:13 2012
@@ -21,7 +21,6 @@ import java.util.Map;
 
 import javax.xml.namespace.QName;
 import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
 
 import org.apache.abdera.filter.ParseFilter;
@@ -32,15 +31,12 @@ import org.apache.abdera.model.Text;
 import org.apache.abdera.parser.ParseException;
 import org.apache.abdera.parser.ParserOptions;
 import org.apache.abdera.util.Constants;
-import org.apache.axiom.om.OMConstants;
 import org.apache.axiom.om.OMContainer;
 import org.apache.axiom.om.OMDocument;
 import org.apache.axiom.om.OMElement;
 import org.apache.axiom.om.OMException;
-import org.apache.axiom.om.OMFactory;
 import org.apache.axiom.om.OMNamespace;
 import org.apache.axiom.om.OMNode;
-import org.apache.axiom.om.OMText;
 import org.apache.axiom.om.impl.OMContainerEx;
 import org.apache.axiom.om.impl.OMNodeEx;
 import org.apache.axiom.om.impl.builder.StAXOMBuilder;
@@ -52,27 +48,15 @@ public class FOMBuilder extends StAXOMBu
     private final FOMFactory fomfactory;
     private final ParserOptions parserOptions;
     private boolean indoc = false;
-    private int depthInSkipElement = 0;
-    private boolean ignoreWhitespace = false;
-    private boolean ignoreComments = false;
-    private boolean ignorePI = false;
 
     public FOMBuilder(FOMFactory factory, XMLStreamReader parser, 
ParserOptions parserOptions) {
-        super(factory, parser);
+        super(factory, new FOMStAXFilter(parser, parserOptions));
         this.document = (OMDocument)factory.newDocument();
         this.parserOptions = parserOptions;
         this.fomfactory = factory;
         String enc = parser.getCharacterEncodingScheme();
         document.setCharsetEncoding(enc != null ? enc : "utf-8");
         document.setXMLVersion(parser.getVersion() != null ? 
parser.getVersion() : "1.0");
-        if (parserOptions != null) {
-            ParseFilter parseFilter = parserOptions.getParseFilter();
-            if (parseFilter != null) {
-                ignoreWhitespace = parseFilter.getIgnoreWhitespace();
-                ignoreComments = parseFilter.getIgnoreComments();
-                ignorePI = parseFilter.getIgnoreProcessingInstructions();
-            }
-        }
     }
 
     public ParserOptions getParserOptions() {
@@ -132,32 +116,6 @@ public class FOMBuilder extends StAXOMBu
             : true;
     }
 
-    private OMNode applyTextFilter(int type) {
-        if (parserOptions != null) {
-            ParseFilter parseFilter = parserOptions.getParseFilter();
-            if (parseFilter != null) {
-                if (parser.isWhiteSpace() && parseFilter.getIgnoreWhitespace())
-                    return createOMText("", type);
-            }
-        }
-        return createOMText(type);
-    }
-
-    private int getNextElementToParse() throws XMLStreamException {
-        int token = parser.next();
-        if (depthInSkipElement == 0 && token != 
XMLStreamConstants.START_ELEMENT) {
-            return token;
-        } else if (token == XMLStreamConstants.START_ELEMENT && 
isAcceptableToParse(parser.getName(), false)
-            && depthInSkipElement == 0) {
-            return token;
-        } else if (token == XMLStreamConstants.START_ELEMENT) {
-            depthInSkipElement++;
-        } else if (token == XMLStreamConstants.END_ELEMENT) { // otherwise 
skip like crazy
-            depthInSkipElement--;
-        }
-        return getNextElementToParse();
-    }
-
     /**
      * Method next.
      * 
@@ -169,7 +127,7 @@ public class FOMBuilder extends StAXOMBu
             if (done) {
                 throw new OMException();
             }
-            int token = getNextElementToParse();
+            int token = parser.next();
             if (!cache) {
                 return token;
             }
@@ -183,10 +141,10 @@ public class FOMBuilder extends StAXOMBu
                     document.setStandalone(parser.isStandalone() ? YES : NO);
                     break;
                 case XMLStreamConstants.CHARACTERS:
-                    lastNode = applyTextFilter(XMLStreamConstants.CHARACTERS);
+                    lastNode = createOMText(XMLStreamConstants.CHARACTERS);
                     break;
                 case XMLStreamConstants.CDATA:
-                    lastNode = applyTextFilter(XMLStreamConstants.CDATA);
+                    lastNode = createOMText(XMLStreamConstants.CDATA);
                     break;
                 case XMLStreamConstants.END_ELEMENT:
                     endElement();
@@ -196,39 +154,13 @@ public class FOMBuilder extends StAXOMBu
                     ((OMContainerEx)this.document).setComplete(true);
                     break;
                 case XMLStreamConstants.SPACE:
-                    if (!ignoreWhitespace)
-                        lastNode = createOMText(XMLStreamConstants.SPACE);
+                    lastNode = createOMText(XMLStreamConstants.SPACE);
                     break;
                 case XMLStreamConstants.COMMENT:
-                    if (!ignoreComments)
-                        createComment();
-                    break;
-                case XMLStreamConstants.DTD:
-                    // Current StAX cursor model implementations 
inconsistently handle DTDs.
-                    // Woodstox, for instance, does not provide a means of 
getting to the complete
-                    // doctype declaration (which is actually valid according 
to the spec, which
-                    // is broken). The StAX reference impl returns the 
complete doctype declaration
-                    // despite the fact that doing so is apparently against 
the spec. We can get
-                    // to the complete declaration in Woodstox if we want to 
use their proprietary
-                    // extension APIs. It's unclear how other Stax impls 
handle this. So.. for now,
-                    // we're just going to ignore the DTD. The DTD will still 
be processed as far
-                    // as entities are concerned, but we will not be able to 
reserialize the parsed
-                    // document with the DTD. Since very few folks actually 
use DTD's in feeds
-                    // right now (and we should likely be encouraging folks 
not to do so), this
-                    // shouldn't be that big of a problem
-                    // if (!parserOptions.getIgnoreDoctype())
-                    // createDTD();
+                    createComment();
                     break;
                 case XMLStreamConstants.PROCESSING_INSTRUCTION:
-                    if (!ignorePI)
-                        createPI();
-                    break;
-                case XMLStreamConstants.ENTITY_REFERENCE:
-                    String val = parserOptions.resolveEntity(super.getName());
-                    if (val == null)
-                        throw new ParseException("Unresolved undeclared 
entity: " + super.getName());
-                    else
-                        lastNode = createOMText(val, 
XMLStreamConstants.CHARACTERS);
+                    createPI();
                     break;
                 default:
                     throw new ParseException();
@@ -306,50 +238,4 @@ public class FOMBuilder extends StAXOMBu
     public FOMFactory getFactory() {
         return fomfactory;
     }
-
-    /**
-     * Method createOMText.
-     * 
-     * @return Returns OMNode.
-     * @throws OMException
-     */
-    protected OMNode createOMText(String value, int textType) throws 
OMException {
-        OMNode node = null;
-        if (lastNode == null) {
-            return null;
-        } else if (!lastNode.isComplete()) {
-            node = createOMText(value, (OMElement)lastNode, textType);
-        } else {
-            OMContainer parent = lastNode.getParent();
-            if (!(parent instanceof OMDocument)) {
-                node = createOMText(value, (OMElement)parent, textType);
-            }
-        }
-        return node;
-    }
-
-    /**
-     * This method will check whether the text can be optimizable using 
IS_BINARY flag. If that is set then we try to
-     * get the data handler.
-     * 
-     * @param omElement
-     * @param textType
-     * @return omNode
-     */
-    private OMNode createOMText(String value, OMElement omElement, int 
textType) {
-        try {
-            // TODO:Check on this. I'm not sure it's actually used
-            // if (isDataHandlerAware && Boolean.TRUE == 
parser.getProperty(OMConstants.IS_BINARY)) {
-            if (Boolean.TRUE == parser.getProperty(OMConstants.IS_BINARY)) {
-                Object dataHandler = 
parser.getProperty(OMConstants.DATA_HANDLER);
-                OMText text = new FOMTextValue(dataHandler, true, 
(OMFactory)this);
-                omElement.addChild(text);
-                return text;
-            } else {
-                return new FOMTextValue(omElement, value, textType, 
(OMFactory)this.fomfactory);
-            }
-        } catch (IllegalArgumentException e) {
-            return new FOMTextValue(omElement, value, textType, 
(OMFactory)this.fomfactory);
-        }
-    }
 }

Added: 
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java
URL: 
http://svn.apache.org/viewvc/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java?rev=1372155&view=auto
==============================================================================
--- 
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java
 (added)
+++ 
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java
 Sun Aug 12 17:38:13 2012
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  The ASF licenses this file to You
+ * under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.  For additional information regarding
+ * copyright in this work, please see the NOTICE file in the top level
+ * directory of this distribution.
+ */
+package org.apache.abdera.parser.stax;
+
+import javax.xml.namespace.QName;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+import org.apache.abdera.filter.ParseFilter;
+import org.apache.abdera.parser.ParseException;
+import org.apache.abdera.parser.ParserOptions;
+import org.apache.axiom.om.OMContainer;
+import org.apache.axiom.util.stax.wrapper.XMLStreamReaderWrapper;
+
+/**
+ * {@link XMLStreamReader} wrapper that implements the various filters and 
transformations that can
+ * be configured using {@link ParserFilter}.
+ * <p>
+ * The design of Apache Axiom is based on the assumption that no filtering or 
transformation is done
+ * inside the builder. Among other things, this assumption ensures that
+ * {@link OMContainer#getXMLStreamReaderWithoutCaching()} produces consistent 
results. One may argue
+ * that for Abdera this is less important because
+ * {@link OMContainer#getXMLStreamReaderWithoutCaching()} is not exposed by 
the Abdera API. However,
+ * attempting to do filtering and transformation in the builder results in 
strong coupling between
+ * Abdera and Axiom because {@link FOMBuilder} would depend on the internal 
implementation details
+ * of the Axiom builder. To avoid this we do all filtering/transformation 
upfront.
+ */
+class FOMStAXFilter extends XMLStreamReaderWrapper {
+    private final ParserOptions parserOptions;
+    private boolean ignoreWhitespace = false;
+    private boolean ignoreComments = false;
+    private boolean ignorePI = false;
+    private int depthInSkipElement;
+    private int altEventType;
+    private String altText;
+    
+    FOMStAXFilter(XMLStreamReader parent, ParserOptions parserOptions) {
+        super(parent);
+        this.parserOptions = parserOptions;
+        if (parserOptions != null) {
+            ParseFilter parseFilter = parserOptions.getParseFilter();
+            if (parseFilter != null) {
+                ignoreWhitespace = parseFilter.getIgnoreWhitespace();
+                ignoreComments = parseFilter.getIgnoreComments();
+                ignorePI = parseFilter.getIgnoreProcessingInstructions();
+            }
+        }
+        resetEvent();
+    }
+
+    private void resetEvent() {
+        altEventType = -1;
+        altText = null;
+    }
+    
+    private boolean isAcceptableToParse(QName qname, boolean attribute) {
+        if (parserOptions == null)
+            return true;
+        ParseFilter filter = parserOptions.getParseFilter();
+        return (filter != null) ? (!attribute) ? filter.acceptable(qname) : 
filter.acceptable(getName(), qname)
+            : true;
+    }
+
+    @Override
+    public int next() throws XMLStreamException {
+        resetEvent();
+        while (true) {
+            int eventType = super.next();
+            if (depthInSkipElement > 0) {
+                switch (eventType) {
+                    case START_ELEMENT:
+                        depthInSkipElement++;
+                        break;
+                    case END_ELEMENT:
+                        depthInSkipElement--;
+                        break;
+                }
+            } else {
+                switch (eventType) {
+                    case DTD:
+                        // Current StAX cursor model implementations 
inconsistently handle DTDs.
+                        // Woodstox, for instance, does not provide a means of 
getting to the complete
+                        // doctype declaration (which is actually valid 
according to the spec, which
+                        // is broken). The StAX reference impl returns the 
complete doctype declaration
+                        // despite the fact that doing so is apparently 
against the spec. We can get
+                        // to the complete declaration in Woodstox if we want 
to use their proprietary
+                        // extension APIs. It's unclear how other Stax impls 
handle this. So.. for now,
+                        // we're just going to ignore the DTD. The DTD will 
still be processed as far
+                        // as entities are concerned, but we will not be able 
to reserialize the parsed
+                        // document with the DTD. Since very few folks 
actually use DTD's in feeds
+                        // right now (and we should likely be encouraging 
folks not to do so), this
+                        // shouldn't be that big of a problem
+                        continue;
+                    case START_ELEMENT:
+                        if (!isAcceptableToParse(getName(), false)) {
+                            depthInSkipElement = 1;
+                            continue;
+                        }
+                        break;
+                    case SPACE:
+                        if (ignoreWhitespace) {
+                            continue;
+                        }
+                        break;
+                    case COMMENT:
+                        if (ignoreComments) {
+                            continue;
+                        }
+                        break;
+                    case PROCESSING_INSTRUCTION:
+                        if (ignorePI) {
+                            continue;
+                        }
+                        break;
+                    case CHARACTERS:
+                    case CDATA:
+                        if (ignoreWhitespace && isWhiteSpace()) {
+                            continue;
+                        }
+                        break;
+                    case ENTITY_REFERENCE:
+                        String val = 
parserOptions.resolveEntity(getLocalName());
+                        if (val == null) {
+                            throw new ParseException("Unresolved undeclared 
entity: " + getLocalName());
+                        } else {
+                            altEventType = CHARACTERS;
+                            altText = val;
+                        }
+                        break;
+                }
+                return altEventType != -1 ? altEventType : eventType;
+            }
+        }
+    }
+
+    @Override
+    public int getEventType() {
+        return altEventType != -1 ? altEventType : super.getEventType();
+    }
+
+    @Override
+    public String getText() {
+        return altText != null ? altText : super.getText();
+    }
+}

Propchange: 
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

svn commit: r1372155 - in /abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax: FOMBuilder.java FOMStAXFilter.java

Reply via email to