Author: vsiveton
Date: Fri Nov 7 07:05:39 2008
New Revision: 712147
URL: http://svn.apache.org/viewvc?rev=712147&view=rev
Log:
DOXIA-263: Improve validation of input documents
o add validation before processing xml
Added:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
(with props)
Modified:
maven/doxia/doxia/trunk/doxia-core/pom.xml
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
Modified: maven/doxia/doxia/trunk/doxia-core/pom.xml
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/pom.xml?rev=712147&r1=712146&r2=712147&view=diff
==============================================================================
--- maven/doxia/doxia/trunk/doxia-core/pom.xml (original)
+++ maven/doxia/doxia/trunk/doxia-core/pom.xml Fri Nov 7 07:05:39 2008
@@ -49,14 +49,13 @@
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-container-default</artifactId>
</dependency>
-
- <!-- test -->
<dependency>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>2.8.1</version>
- <scope>test</scope>
</dependency>
+
+ <!-- test -->
<dependency>
<groupId>org.apache.maven.scm</groupId>
<artifactId>maven-scm-api</artifactId>
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java?rev=712147&r1=712146&r2=712147&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
Fri Nov 7 07:05:39 2008
@@ -19,21 +19,33 @@
* under the License.
*/
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.Reader;
+import java.io.StringReader;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import javax.xml.XMLConstants;
+
import org.apache.maven.doxia.macro.MacroExecutionException;
import org.apache.maven.doxia.markup.XmlMarkup;
import org.apache.maven.doxia.sink.Sink;
import org.apache.maven.doxia.sink.SinkEventAttributeSet;
+import org.codehaus.plexus.util.IOUtil;
import org.codehaus.plexus.util.StringUtils;
import org.codehaus.plexus.util.xml.pull.MXParser;
import org.codehaus.plexus.util.xml.pull.XmlPullParser;
import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.XMLReaderFactory;
/**
* An abstract class that defines some convenience methods for
<code>XML</code> parsers.
@@ -46,14 +58,20 @@
extends AbstractParser
implements XmlMarkup
{
- /** Entity pattern for HTML entity, i.e. &nbsp; */
+ /** Entity pattern for HTML entity, i.e. &nbsp; see
http://www.w3.org/TR/REC-xml/#NT-EntityDecl */
private static final Pattern PATTERN_ENTITY_1 =
Pattern.compile(
"<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
- /** Entity pattern for Unicode entity, i.e. &#38; */
+ /** Entity pattern for Unicode entity, i.e. &#38; see
http://www.w3.org/TR/REC-xml/#NT-EntityDecl */
private static final Pattern PATTERN_ENTITY_2 =
Pattern.compile(
"<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&#x?[0-9a-fA-F]{1,4};)(\\s)*\"(\\s)*>"
);
+ /** Doctype pattern as defined in
http://www.w3.org/TR/REC-xml/#NT-doctypedecl */
+ private static final Pattern PATTERN_DOCTYPE = Pattern.compile(
".*<!DOCTYPE([^>]*)>.*" );
+
+ /** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */
+ private static final Pattern PATTERN_TAG = Pattern.compile(
".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*" );
+
private boolean ignorable;
private boolean collapsible;
@@ -62,10 +80,34 @@
private Map entities;
+ private boolean validate = true;
+
+ /** lazy xmlReader to validate xml content*/
+ private XMLReader xmlReader;
+
/** [EMAIL PROTECTED] */
public void parse( Reader source, Sink sink )
throws ParseException
{
+ // 1 first parsing if validation is required
+ if ( isValidate() )
+ {
+ String content;
+ try
+ {
+ content = IOUtil.toString( new BufferedReader( source ) );
+ }
+ catch ( IOException e )
+ {
+ throw new ParseException( "Error reading the model: " +
e.getMessage(), e );
+ }
+
+ validate( content );
+
+ source = new StringReader( content );
+ }
+
+ // 2 second parsing to process
try
{
XmlPullParser parser = new MXParser();
@@ -426,4 +468,107 @@
return entities;
}
+
+ /**
+ * @return <code>true</code> if XML content will be validate,
<code>false</code> otherwise.
+ */
+ public boolean isValidate()
+ {
+ return validate;
+ }
+
+ /**
+ * Specify a flag to validate or not the XML content.
+ *
+ * @param validate the validate to set
+ * @see #parse(Reader, Sink)
+ */
+ public void setValidate( boolean validate )
+ {
+ this.validate = validate;
+ }
+
+ // ----------------------------------------------------------------------
+ // Private methods
+ // ----------------------------------------------------------------------
+
+ /**
+ * Validate an XML content with SAX.
+ *
+ * @param content a not null xml content
+ * @throws ParseException if any.
+ */
+ private void validate( String content )
+ throws ParseException
+ {
+ try
+ {
+ // 1 if there's a doctype
+ boolean hasDoctype = false;
+ Matcher matcher = PATTERN_DOCTYPE.matcher( content );
+ if ( matcher.find() )
+ {
+ hasDoctype = true;
+ }
+
+ // 2 if no doctype, check for an xmlns instance
+ boolean hasXsd = false;
+ if ( !hasDoctype )
+ {
+ matcher = PATTERN_TAG.matcher( content );
+ if ( matcher.find() )
+ {
+ String value = matcher.group( 2 );
+
+ if ( value.indexOf(
XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI ) != -1 )
+ {
+ hasXsd = true;
+ }
+ }
+ }
+
+ // 3 validate content if doctype or xsd
+ if ( hasDoctype || hasXsd )
+ {
+ getLog().info( "Validating the content..." );
+ getXmlReader().parse( new InputSource( new
ByteArrayInputStream( content.getBytes() ) ) );
+ }
+ }
+ catch ( IOException e )
+ {
+ throw new ParseException( "Error validating the model: " +
e.getMessage(), e );
+ }
+ catch ( SAXNotRecognizedException e )
+ {
+ throw new ParseException( "Error validating the model: " +
e.getMessage(), e );
+ }
+ catch ( SAXNotSupportedException e )
+ {
+ throw new ParseException( "Error validating the model: " +
e.getMessage(), e );
+ }
+ catch ( SAXException e )
+ {
+ throw new ParseException( "Error validating the model: " +
e.getMessage(), e );
+ }
+ }
+
+ /**
+ * @return an xmlReader instance.
+ * @throws SAXException if any
+ */
+ private XMLReader getXmlReader()
+ throws SAXException
+ {
+ if ( xmlReader == null )
+ {
+ MessagesErrorHandler errorHandler = new MessagesErrorHandler(
getLog() );
+
+ xmlReader = XMLReaderFactory.createXMLReader(
"org.apache.xerces.parsers.SAXParser" );
+ xmlReader.setFeature( "http://xml.org/sax/features/validation",
true );
+ xmlReader.setFeature(
"http://apache.org/xml/features/validation/schema", true );
+ xmlReader.setErrorHandler( errorHandler );
+ }
+
+ return xmlReader;
+ }
}
Added:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java?rev=712147&view=auto
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
(added)
+++
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
Fri Nov 7 07:05:39 2008
@@ -0,0 +1,127 @@
+package org.apache.maven.doxia.parser;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.maven.doxia.logging.Log;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Convenience class to beautify SAXParseException messages.
+ *
+ * @author <a href="mailto:[EMAIL PROTECTED]">Vincent Siveton</a>
+ * @version $Id$
+ */
+class MessagesErrorHandler
+ extends DefaultHandler
+{
+ /** The vm line separator */
+ private static final String EOL = System.getProperty( "line.separator" );
+
+ private static final int TYPE_UNKNOWN = 0;
+
+ private static final int TYPE_WARNING = 1;
+
+ private static final int TYPE_ERROR = 2;
+
+ private static final int TYPE_FATAL = 3;
+
+ private final Log log;
+
+ public MessagesErrorHandler( Log log )
+ {
+ this.log = log;
+ }
+
+ /** [EMAIL PROTECTED] */
+ public void warning( SAXParseException e )
+ throws SAXException
+ {
+ processException( TYPE_WARNING, e );
+ }
+
+ /** [EMAIL PROTECTED] */
+ public void error( SAXParseException e )
+ throws SAXException
+ {
+ processException( TYPE_ERROR, e );
+ }
+
+ /** [EMAIL PROTECTED] */
+ public void fatalError( SAXParseException e )
+ throws SAXException
+ {
+ processException( TYPE_FATAL, e );
+ }
+
+ // ----------------------------------------------------------------------
+ // Private methods
+ // ----------------------------------------------------------------------
+
+ private void processException( int type, SAXParseException e )
+ throws SAXException
+ {
+ StringBuffer message = new StringBuffer();
+
+ switch ( type )
+ {
+ case TYPE_WARNING:
+ message.append( "Warning:" );
+ break;
+
+ case TYPE_ERROR:
+ message.append( "Error:" );
+ break;
+
+ case TYPE_FATAL:
+ message.append( "Fatal error:" );
+ break;
+
+ case TYPE_UNKNOWN:
+ default:
+ message.append( "Unknown:" );
+ break;
+ }
+
+ message.append( EOL );
+ message.append( " Public ID: " + e.getPublicId() ).append( EOL );
+ message.append( " System ID: " + e.getSystemId() ).append( EOL );
+ message.append( " Line number: " + e.getLineNumber() ).append( EOL );
+ message.append( " Column number: " + e.getColumnNumber() ).append(
EOL );
+ message.append( " Message: " + e.getMessage() ).append( EOL );
+
+ switch ( type )
+ {
+ case TYPE_WARNING:
+ if ( log.isWarnEnabled() )
+ {
+ log.warn( message.toString() );
+ }
+ break;
+
+ case TYPE_UNKNOWN:
+ case TYPE_ERROR:
+ case TYPE_FATAL:
+ default:
+ throw new SAXException( message.toString() );
+ }
+ }
+}
\ No newline at end of file
Propchange:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/MessagesErrorHandler.java
------------------------------------------------------------------------------
svn:keywords = Author Date Id Revision