Author: veithen
Date: Sun Aug 12 17:38:13 2012
New Revision: 1372155
URL: http://svn.apache.org/viewvc?rev=1372155&view=rev
Log:
Started to decouple Abdera from the internals of the Axiom builder by moving
the filtering logic (for whitespace, comments, etc.) out of FOMBuilder into an
XMLStreamReader wrapper.
Added:
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java
(with props)
Modified:
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java
Modified:
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java
URL:
http://svn.apache.org/viewvc/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java?rev=1372155&r1=1372154&r2=1372155&view=diff
==============================================================================
---
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java
(original)
+++
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMBuilder.java
Sun Aug 12 17:38:13 2012
@@ -21,7 +21,6 @@ import java.util.Map;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.abdera.filter.ParseFilter;
@@ -32,15 +31,12 @@ import org.apache.abdera.model.Text;
import org.apache.abdera.parser.ParseException;
import org.apache.abdera.parser.ParserOptions;
import org.apache.abdera.util.Constants;
-import org.apache.axiom.om.OMConstants;
import org.apache.axiom.om.OMContainer;
import org.apache.axiom.om.OMDocument;
import org.apache.axiom.om.OMElement;
import org.apache.axiom.om.OMException;
-import org.apache.axiom.om.OMFactory;
import org.apache.axiom.om.OMNamespace;
import org.apache.axiom.om.OMNode;
-import org.apache.axiom.om.OMText;
import org.apache.axiom.om.impl.OMContainerEx;
import org.apache.axiom.om.impl.OMNodeEx;
import org.apache.axiom.om.impl.builder.StAXOMBuilder;
@@ -52,27 +48,15 @@ public class FOMBuilder extends StAXOMBu
private final FOMFactory fomfactory;
private final ParserOptions parserOptions;
private boolean indoc = false;
- private int depthInSkipElement = 0;
- private boolean ignoreWhitespace = false;
- private boolean ignoreComments = false;
- private boolean ignorePI = false;
public FOMBuilder(FOMFactory factory, XMLStreamReader parser,
ParserOptions parserOptions) {
- super(factory, parser);
+ super(factory, new FOMStAXFilter(parser, parserOptions));
this.document = (OMDocument)factory.newDocument();
this.parserOptions = parserOptions;
this.fomfactory = factory;
String enc = parser.getCharacterEncodingScheme();
document.setCharsetEncoding(enc != null ? enc : "utf-8");
document.setXMLVersion(parser.getVersion() != null ?
parser.getVersion() : "1.0");
- if (parserOptions != null) {
- ParseFilter parseFilter = parserOptions.getParseFilter();
- if (parseFilter != null) {
- ignoreWhitespace = parseFilter.getIgnoreWhitespace();
- ignoreComments = parseFilter.getIgnoreComments();
- ignorePI = parseFilter.getIgnoreProcessingInstructions();
- }
- }
}
public ParserOptions getParserOptions() {
@@ -132,32 +116,6 @@ public class FOMBuilder extends StAXOMBu
: true;
}
- private OMNode applyTextFilter(int type) {
- if (parserOptions != null) {
- ParseFilter parseFilter = parserOptions.getParseFilter();
- if (parseFilter != null) {
- if (parser.isWhiteSpace() && parseFilter.getIgnoreWhitespace())
- return createOMText("", type);
- }
- }
- return createOMText(type);
- }
-
- private int getNextElementToParse() throws XMLStreamException {
- int token = parser.next();
- if (depthInSkipElement == 0 && token !=
XMLStreamConstants.START_ELEMENT) {
- return token;
- } else if (token == XMLStreamConstants.START_ELEMENT &&
isAcceptableToParse(parser.getName(), false)
- && depthInSkipElement == 0) {
- return token;
- } else if (token == XMLStreamConstants.START_ELEMENT) {
- depthInSkipElement++;
- } else if (token == XMLStreamConstants.END_ELEMENT) { // otherwise
skip like crazy
- depthInSkipElement--;
- }
- return getNextElementToParse();
- }
-
/**
* Method next.
*
@@ -169,7 +127,7 @@ public class FOMBuilder extends StAXOMBu
if (done) {
throw new OMException();
}
- int token = getNextElementToParse();
+ int token = parser.next();
if (!cache) {
return token;
}
@@ -183,10 +141,10 @@ public class FOMBuilder extends StAXOMBu
document.setStandalone(parser.isStandalone() ? YES : NO);
break;
case XMLStreamConstants.CHARACTERS:
- lastNode = applyTextFilter(XMLStreamConstants.CHARACTERS);
+ lastNode = createOMText(XMLStreamConstants.CHARACTERS);
break;
case XMLStreamConstants.CDATA:
- lastNode = applyTextFilter(XMLStreamConstants.CDATA);
+ lastNode = createOMText(XMLStreamConstants.CDATA);
break;
case XMLStreamConstants.END_ELEMENT:
endElement();
@@ -196,39 +154,13 @@ public class FOMBuilder extends StAXOMBu
((OMContainerEx)this.document).setComplete(true);
break;
case XMLStreamConstants.SPACE:
- if (!ignoreWhitespace)
- lastNode = createOMText(XMLStreamConstants.SPACE);
+ lastNode = createOMText(XMLStreamConstants.SPACE);
break;
case XMLStreamConstants.COMMENT:
- if (!ignoreComments)
- createComment();
- break;
- case XMLStreamConstants.DTD:
- // Current StAX cursor model implementations
inconsistently handle DTDs.
- // Woodstox, for instance, does not provide a means of
getting to the complete
- // doctype declaration (which is actually valid according
to the spec, which
- // is broken). The StAX reference impl returns the
complete doctype declaration
- // despite the fact that doing so is apparently against
the spec. We can get
- // to the complete declaration in Woodstox if we want to
use their proprietary
- // extension APIs. It's unclear how other Stax impls
handle this. So.. for now,
- // we're just going to ignore the DTD. The DTD will still
be processed as far
- // as entities are concerned, but we will not be able to
reserialize the parsed
- // document with the DTD. Since very few folks actually
use DTD's in feeds
- // right now (and we should likely be encouraging folks
not to do so), this
- // shouldn't be that big of a problem
- // if (!parserOptions.getIgnoreDoctype())
- // createDTD();
+ createComment();
break;
case XMLStreamConstants.PROCESSING_INSTRUCTION:
- if (!ignorePI)
- createPI();
- break;
- case XMLStreamConstants.ENTITY_REFERENCE:
- String val = parserOptions.resolveEntity(super.getName());
- if (val == null)
- throw new ParseException("Unresolved undeclared
entity: " + super.getName());
- else
- lastNode = createOMText(val,
XMLStreamConstants.CHARACTERS);
+ createPI();
break;
default:
throw new ParseException();
@@ -306,50 +238,4 @@ public class FOMBuilder extends StAXOMBu
public FOMFactory getFactory() {
return fomfactory;
}
-
- /**
- * Method createOMText.
- *
- * @return Returns OMNode.
- * @throws OMException
- */
- protected OMNode createOMText(String value, int textType) throws
OMException {
- OMNode node = null;
- if (lastNode == null) {
- return null;
- } else if (!lastNode.isComplete()) {
- node = createOMText(value, (OMElement)lastNode, textType);
- } else {
- OMContainer parent = lastNode.getParent();
- if (!(parent instanceof OMDocument)) {
- node = createOMText(value, (OMElement)parent, textType);
- }
- }
- return node;
- }
-
- /**
- * This method will check whether the text can be optimizable using
IS_BINARY flag. If that is set then we try to
- * get the data handler.
- *
- * @param omElement
- * @param textType
- * @return omNode
- */
- private OMNode createOMText(String value, OMElement omElement, int
textType) {
- try {
- // TODO:Check on this. I'm not sure it's actually used
- // if (isDataHandlerAware && Boolean.TRUE ==
parser.getProperty(OMConstants.IS_BINARY)) {
- if (Boolean.TRUE == parser.getProperty(OMConstants.IS_BINARY)) {
- Object dataHandler =
parser.getProperty(OMConstants.DATA_HANDLER);
- OMText text = new FOMTextValue(dataHandler, true,
(OMFactory)this);
- omElement.addChild(text);
- return text;
- } else {
- return new FOMTextValue(omElement, value, textType,
(OMFactory)this.fomfactory);
- }
- } catch (IllegalArgumentException e) {
- return new FOMTextValue(omElement, value, textType,
(OMFactory)this.fomfactory);
- }
- }
}
Added:
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java
URL:
http://svn.apache.org/viewvc/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java?rev=1372155&view=auto
==============================================================================
---
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java
(added)
+++
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java
Sun Aug 12 17:38:13 2012
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. The ASF licenses this file to You
+ * under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. For additional information regarding
+ * copyright in this work, please see the NOTICE file in the top level
+ * directory of this distribution.
+ */
+package org.apache.abdera.parser.stax;
+
+import javax.xml.namespace.QName;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+import org.apache.abdera.filter.ParseFilter;
+import org.apache.abdera.parser.ParseException;
+import org.apache.abdera.parser.ParserOptions;
+import org.apache.axiom.om.OMContainer;
+import org.apache.axiom.util.stax.wrapper.XMLStreamReaderWrapper;
+
+/**
+ * {@link XMLStreamReader} wrapper that implements the various filters and
transformations that can
+ * be configured using {@link ParserFilter}.
+ * <p>
+ * The design of Apache Axiom is based on the assumption that no filtering or
transformation is done
+ * inside the builder. Among other things, this assumption ensures that
+ * {@link OMContainer#getXMLStreamReaderWithoutCaching()} produces consistent
results. One may argue
+ * that for Abdera this is less important because
+ * {@link OMContainer#getXMLStreamReaderWithoutCaching()} is not exposed by
the Abdera API. However,
+ * attempting to do filtering and transformation in the builder results in
strong coupling between
+ * Abdera and Axiom because {@link FOMBuilder} would depend on the internal
implementation details
+ * of the Axiom builder. To avoid this we do all filtering/transformation
upfront.
+ */
+class FOMStAXFilter extends XMLStreamReaderWrapper {
+ private final ParserOptions parserOptions;
+ private boolean ignoreWhitespace = false;
+ private boolean ignoreComments = false;
+ private boolean ignorePI = false;
+ private int depthInSkipElement;
+ private int altEventType;
+ private String altText;
+
+ FOMStAXFilter(XMLStreamReader parent, ParserOptions parserOptions) {
+ super(parent);
+ this.parserOptions = parserOptions;
+ if (parserOptions != null) {
+ ParseFilter parseFilter = parserOptions.getParseFilter();
+ if (parseFilter != null) {
+ ignoreWhitespace = parseFilter.getIgnoreWhitespace();
+ ignoreComments = parseFilter.getIgnoreComments();
+ ignorePI = parseFilter.getIgnoreProcessingInstructions();
+ }
+ }
+ resetEvent();
+ }
+
+ private void resetEvent() {
+ altEventType = -1;
+ altText = null;
+ }
+
+ private boolean isAcceptableToParse(QName qname, boolean attribute) {
+ if (parserOptions == null)
+ return true;
+ ParseFilter filter = parserOptions.getParseFilter();
+ return (filter != null) ? (!attribute) ? filter.acceptable(qname) :
filter.acceptable(getName(), qname)
+ : true;
+ }
+
+ @Override
+ public int next() throws XMLStreamException {
+ resetEvent();
+ while (true) {
+ int eventType = super.next();
+ if (depthInSkipElement > 0) {
+ switch (eventType) {
+ case START_ELEMENT:
+ depthInSkipElement++;
+ break;
+ case END_ELEMENT:
+ depthInSkipElement--;
+ break;
+ }
+ } else {
+ switch (eventType) {
+ case DTD:
+ // Current StAX cursor model implementations
inconsistently handle DTDs.
+ // Woodstox, for instance, does not provide a means of
getting to the complete
+ // doctype declaration (which is actually valid
according to the spec, which
+ // is broken). The StAX reference impl returns the
complete doctype declaration
+ // despite the fact that doing so is apparently
against the spec. We can get
+ // to the complete declaration in Woodstox if we want
to use their proprietary
+ // extension APIs. It's unclear how other Stax impls
handle this. So.. for now,
+ // we're just going to ignore the DTD. The DTD will
still be processed as far
+ // as entities are concerned, but we will not be able
to reserialize the parsed
+ // document with the DTD. Since very few folks
actually use DTD's in feeds
+ // right now (and we should likely be encouraging
folks not to do so), this
+ // shouldn't be that big of a problem
+ continue;
+ case START_ELEMENT:
+ if (!isAcceptableToParse(getName(), false)) {
+ depthInSkipElement = 1;
+ continue;
+ }
+ break;
+ case SPACE:
+ if (ignoreWhitespace) {
+ continue;
+ }
+ break;
+ case COMMENT:
+ if (ignoreComments) {
+ continue;
+ }
+ break;
+ case PROCESSING_INSTRUCTION:
+ if (ignorePI) {
+ continue;
+ }
+ break;
+ case CHARACTERS:
+ case CDATA:
+ if (ignoreWhitespace && isWhiteSpace()) {
+ continue;
+ }
+ break;
+ case ENTITY_REFERENCE:
+ String val =
parserOptions.resolveEntity(getLocalName());
+ if (val == null) {
+ throw new ParseException("Unresolved undeclared
entity: " + getLocalName());
+ } else {
+ altEventType = CHARACTERS;
+ altText = val;
+ }
+ break;
+ }
+ return altEventType != -1 ? altEventType : eventType;
+ }
+ }
+ }
+
+ @Override
+ public int getEventType() {
+ return altEventType != -1 ? altEventType : super.getEventType();
+ }
+
+ @Override
+ public String getText() {
+ return altText != null ? altText : super.getText();
+ }
+}
Propchange:
abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMStAXFilter.java
------------------------------------------------------------------------------
svn:eol-style = native