Author: vsiveton
Date: Sun Nov 2 16:51:25 2008
New Revision: 709994
URL: http://svn.apache.org/viewvc?rev=709994&view=rev
Log:
DOXIA-250: Xml parser should handle entities defined in doctype
o fixed the AbstractXmlParser to handle defined entities
o added a test case
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xhtml/src/test/java/org/apache/maven/doxia/module/xhtml/XhtmlParserTest.java
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java?rev=709994&r1=709993&r2=709994&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/AbstractXmlParser.java
Sun Nov 2 16:51:25 2008
@@ -21,12 +21,15 @@
import java.io.IOException;
import java.io.Reader;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.maven.doxia.macro.MacroExecutionException;
import org.apache.maven.doxia.markup.XmlMarkup;
import org.apache.maven.doxia.sink.Sink;
import org.apache.maven.doxia.sink.SinkEventAttributeSet;
-
import org.codehaus.plexus.util.StringUtils;
import org.codehaus.plexus.util.xml.pull.MXParser;
import org.codehaus.plexus.util.xml.pull.XmlPullParser;
@@ -43,12 +46,22 @@
extends AbstractParser
implements XmlMarkup
{
+ /** Entity pattern for HTML entity, i.e.   */
+ private static final Pattern PATTERN_ENTITY_1 =
+ Pattern.compile(
"<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
+
+ /** Entity pattern for Unicode entity, i.e. &#38; */
+ private static final Pattern PATTERN_ENTITY_2 =
+ Pattern.compile(
"<!ENTITY(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&#x?[0-9a-fA-F]{1,4};)(\\s)*\"(\\s)*>"
);
+
private boolean ignorable;
private boolean collapsible;
private boolean trimmable;
+ private Map entities;
+
/** [EMAIL PROTECTED] */
public void parse( Reader source, Sink sink )
throws ParseException
@@ -65,8 +78,8 @@
}
catch ( XmlPullParserException ex )
{
- throw new ParseException( "Error parsing the model: " +
ex.getMessage(), ex, ex.getLineNumber(), ex
- .getColumnNumber() );
+ throw new ParseException( "Error parsing the model: " +
ex.getMessage(), ex, ex.getLineNumber(),
+ ex.getColumnNumber() );
}
catch ( MacroExecutionException ex )
{
@@ -180,7 +193,44 @@
}
else if ( eventType == XmlPullParser.DOCDECL )
{
- // nop
+ String text = parser.getText();
+ int entitiesCount = StringUtils.countMatches( text, "<!ENTITY"
);
+ // entities defined in a local doctype
+ if ( entitiesCount > 0 )
+ {
+ int start = text.indexOf( "<" );
+ int end = text.lastIndexOf( ">" );
+ if ( start != -1 && end != -1 )
+ {
+ text = text.substring( start, end + 1 );
+ for ( int i = 0; i < entitiesCount; i++ )
+ {
+ String tmp = text.substring( text.indexOf( "<" ),
text.indexOf( ">" ) + 1 );
+ Matcher matcher = PATTERN_ENTITY_1.matcher( tmp );
+ if ( matcher.find() && matcher.groupCount() == 7 )
+ {
+ String entityName = matcher.group( 2 );
+ String entityValue = matcher.group( 5 );
+
+ parser.defineEntityReplacementText(
entityName, entityValue );
+ getLocalEntities().put( entityName,
entityValue );
+ }
+ else
+ {
+ matcher = PATTERN_ENTITY_2.matcher( text );
+ if ( matcher.find() && matcher.groupCount() ==
7 )
+ {
+ String entityName = matcher.group( 2 );
+ String entityValue = matcher.group( 5 );
+
+ parser.defineEntityReplacementText(
entityName, entityValue );
+ getLocalEntities().put( entityName,
entityValue );
+ }
+ }
+ text = StringUtils.replace( text, tmp, "" ).trim();
+ }
+ }
+ }
}
try
@@ -355,4 +405,25 @@
return text;
}
+
+ /**
+ * Return the defined entities in a local doctype, i.e.:
+ * <pre>
+ * <!DOCTYPE foo [
+ * <!ENTITY bar "&#x160;">
+ * <!ENTITY bar1 "&#x161;">
+ * ]>
+ * </pre>
+ *
+ * @return a map of the defined entities in a local doctype.
+ */
+ protected Map getLocalEntities()
+ {
+ if ( entities == null )
+ {
+ entities = new LinkedHashMap();
+ }
+
+ return entities;
+ }
}
Modified:
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java?rev=709994&r1=709993&r2=709994&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
(original)
+++
maven/doxia/doxia/trunk/doxia-core/src/main/java/org/apache/maven/doxia/parser/XhtmlBaseParser.java
Sun Nov 2 16:51:25 2008
@@ -698,7 +698,14 @@
}
else
{
- sink.text( text );
+ if ( getLocalEntities().containsKey( textChars ) )
+ {
+ sink.rawText( text );
+ }
+ else
+ {
+ sink.text( text );
+ }
}
}
Modified:
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xhtml/src/test/java/org/apache/maven/doxia/module/xhtml/XhtmlParserTest.java
URL:
http://svn.apache.org/viewvc/maven/doxia/doxia/trunk/doxia-modules/doxia-module-xhtml/src/test/java/org/apache/maven/doxia/module/xhtml/XhtmlParserTest.java?rev=709994&r1=709993&r2=709994&view=diff
==============================================================================
---
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xhtml/src/test/java/org/apache/maven/doxia/module/xhtml/XhtmlParserTest.java
(original)
+++
maven/doxia/doxia/trunk/doxia-modules/doxia-module-xhtml/src/test/java/org/apache/maven/doxia/module/xhtml/XhtmlParserTest.java
Sun Nov 2 16:51:25 2008
@@ -19,10 +19,12 @@
* under the License.
*/
+import java.io.StringWriter;
import java.util.Iterator;
import org.apache.maven.doxia.parser.AbstractParserTest;
import org.apache.maven.doxia.parser.Parser;
+import org.apache.maven.doxia.sink.Sink;
import org.apache.maven.doxia.sink.SinkEventElement;
import org.apache.maven.doxia.sink.SinkEventTestingSink;
@@ -104,4 +106,32 @@
assertFalse( it.hasNext() );
}
+ /**
+ * @throws Exception if any
+ */
+ public void testDoxia250()
+ throws Exception
+ {
+ StringBuffer sb = new StringBuffer();
+ sb.append( "<!DOCTYPE test [" ).append( EOL );
+ sb.append( "<!ENTITY " ).append( EOL ).append( " foo " ).append(
EOL ).append( " \" " )
+ .append( EOL ).append( " ř " ).append( EOL ).append( "
\">" ).append( EOL );
+ sb.append( "<!ENTITY " ).append( EOL ).append( " foo1 "
).append( EOL ).append( " \" " )
+ .append( EOL ).append( " " ).append( EOL ).append( "
\">" ).append( EOL );
+ sb.append( "<!ENTITY " ).append( EOL ).append( " foo2 "
).append( EOL ).append( " \" " )
+ .append( EOL ).append( " š " ).append( EOL ).append( "
\">" ).append( EOL );
+ sb.append( "]>" ).append( EOL );
+ sb.append( "<html><body>&foo;&foo1;&foo2;</body></html>" );
+
+ String text = sb.toString();
+ StringWriter w = new StringWriter();
+ Sink sink = new XhtmlSink( w );
+ // Should fail when fixing DOXIA-263 I guess.
+ ( (XhtmlParser) createParser() ).parse( text.toString(), sink );
+ String result = w.toString();
+
+ assertTrue( result.indexOf( "ř" ) != -1 );
+ assertTrue( result.indexOf( " " ) != -1 );
+ assertTrue( result.indexOf( "š" ) != -1 );
+ }
}