ytalwar 2005/03/23 09:54:05
Modified: java/src/org/apache/xalan/templates ElemAttribute.java
ElemElement.java ElemPI.java
java/src/org/apache/xalan/xsltc/compiler VariableBase.java
DecimalFormatting.java XslAttribute.java xpath.lex
XslElement.java ApplyTemplates.java Output.java
ProcessingInstruction.java Key.java WithParam.java
AttributeSet.java Template.java CallTemplate.java
java/src/org/apache/xalan/processor XSLTAttributeDef.java
java/src/org/apache/xalan/xsltc/compiler/util Util.java
java/src/org/apache/xml/utils QName.java
java/src/org/apache/xml/serializer WriterToUTF8Buffered.java
java/src/org/apache/xalan/xsltc/runtime BasisLibrary.java
Added: java/src/org/apache/xml/utils XML11Char.java
Log:
This is resolve UTF-8 Charcater support and QName character support for XML
1.1 support in XALANJ-2070.
Henry Zongaro and Brian Minchau helped in fixing this part of the JIRA issue.
A new class org.apache.xml.utils.XML11Char has been included to support XML
1.1 characters.
Also a reference to org.apache.xml.utils.XMLChar has been replaced with a
reference to
org.apache.xml.utils.XML11Char in almost all the places in Xalan.
org.apache.xml.serializer.WriterToUTF8Buffered has been update to support
UTF-8 characters that can be represented in four bytes.
Revision Changes Path
1.30 +3 -3
xml-xalan/java/src/org/apache/xalan/templates/ElemAttribute.java
Index: ElemAttribute.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/templates/ElemAttribute.java,v
retrieving revision 1.29
retrieving revision 1.30
diff -u -r1.29 -r1.30
--- ElemAttribute.java 11 Feb 2005 06:18:14 -0000 1.29
+++ ElemAttribute.java 23 Mar 2005 17:54:04 -0000 1.30
@@ -25,7 +25,7 @@
import org.apache.xml.serializer.NamespaceMappings;
import org.apache.xml.serializer.SerializationHandler;
import org.apache.xml.utils.QName;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
import org.xml.sax.SAXException;
@@ -158,7 +158,7 @@
return false;
if(nodeName.equals("xmlns"))
return false;
- return XMLChar.isValidQName(nodeName);
+ return XML11Char.isXML11ValidQName(nodeName);
}
/**
1.39 +3 -3
xml-xalan/java/src/org/apache/xalan/templates/ElemElement.java
Index: ElemElement.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/templates/ElemElement.java,v
retrieving revision 1.38
retrieving revision 1.39
diff -u -r1.38 -r1.39
--- ElemElement.java 15 Dec 2004 17:35:34 -0000 1.38
+++ ElemElement.java 23 Mar 2005 17:54:04 -0000 1.39
@@ -24,7 +24,7 @@
import org.apache.xalan.transformer.TransformerImpl;
import org.apache.xml.serializer.SerializationHandler;
import org.apache.xml.utils.QName;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
import org.apache.xpath.XPathContext;
import org.xml.sax.SAXException;
@@ -214,7 +214,7 @@
String nodeNamespace = "";
// Only validate if an AVT was used.
- if ((nodeName != null) && (!m_name_avt.isSimple()) &&
(!XMLChar.isValidQName(nodeName)))
+ if ((nodeName != null) && (!m_name_avt.isSimple()) &&
(!XML11Char.isXML11ValidQName(nodeName)))
{
transformer.getMsgMgr().warn(
this, XSLTErrorResources.WG_ILLEGAL_ATTRIBUTE_VALUE,
1.23 +3 -3 xml-xalan/java/src/org/apache/xalan/templates/ElemPI.java
Index: ElemPI.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/templates/ElemPI.java,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -r1.22 -r1.23
--- ElemPI.java 23 Jan 2005 00:27:29 -0000 1.22
+++ ElemPI.java 23 Mar 2005 17:54:04 -0000 1.23
@@ -22,7 +22,7 @@
import org.apache.xalan.res.XSLTErrorResources;
import org.apache.xalan.transformer.TransformerImpl;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
import org.apache.xpath.XPathContext;
/**
@@ -145,7 +145,7 @@
// Only check if an avt was used (ie. this wasn't checked at compose
time.)
// Ignore processing instruction, if invalid
- else if ((!m_name_atv.isSimple()) && (!XMLChar.isValidNCName(piName)))
+ else if ((!m_name_atv.isSimple()) &&
(!XML11Char.isXML11ValidNCName(piName)))
{
transformer.getMsgMgr().warn(
this, XSLTErrorResources.WG_PROCESSINGINSTRUCTION_NOTVALID_NCNAME,
1.24 +3 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/VariableBase.java
Index: VariableBase.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/VariableBase.java,v
retrieving revision 1.23
retrieving revision 1.24
diff -u -r1.23 -r1.24
--- VariableBase.java 24 Feb 2004 02:57:28 -0000 1.23
+++ VariableBase.java 23 Mar 2005 17:54:04 -0000 1.24
@@ -34,7 +34,7 @@
import org.apache.xalan.xsltc.compiler.util.NodeSetType;
import org.apache.xalan.xsltc.compiler.util.Type;
import org.apache.xalan.xsltc.compiler.util.Util;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
* @author Jacek Ambroziak
@@ -230,7 +230,7 @@
String name = getAttribute("name");
if (name.length() > 0) {
- if (!XMLChar.isValidQName(name)) {
+ if (!XML11Char.isXML11ValidQName(name)) {
ErrorMsg err = new ErrorMsg(ErrorMsg.INVALID_QNAME_ERR,
name, this);
parser.reportError(Constants.ERROR, err);
}
1.15 +3 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/DecimalFormatting.java
Index: DecimalFormatting.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/DecimalFormatting.java,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- DecimalFormatting.java 24 Feb 2004 03:55:47 -0000 1.14
+++ DecimalFormatting.java 23 Mar 2005 17:54:04 -0000 1.15
@@ -31,7 +31,7 @@
import org.apache.xalan.xsltc.compiler.util.MethodGenerator;
import org.apache.xalan.xsltc.compiler.util.Type;
import org.apache.xalan.xsltc.compiler.util.TypeCheckError;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
* @author Jacek Ambroziak
@@ -59,7 +59,7 @@
// Get the name of these decimal formatting symbols
final String name = getAttribute("name");
if (name.length() > 0) {
- if (!XMLChar.isValidQName(name)){
+ if (!XML11Char.isXML11ValidQName(name)){
ErrorMsg err = new ErrorMsg(ErrorMsg.INVALID_QNAME_ERR,
name, this);
parser.reportError(Constants.ERROR, err);
}
1.25 +3 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/XslAttribute.java
Index: XslAttribute.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/XslAttribute.java,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -r1.24 -r1.25
--- XslAttribute.java 24 Feb 2004 03:55:48 -0000 1.24
+++ XslAttribute.java 23 Mar 2005 17:54:04 -0000 1.25
@@ -36,10 +36,10 @@
import org.apache.xalan.xsltc.compiler.util.Type;
import org.apache.xalan.xsltc.compiler.util.TypeCheckError;
import org.apache.xalan.xsltc.compiler.util.Util;
-import org.apache.xml.utils.XMLChar;
import org.apache.xml.serializer.ElemDesc;
import org.apache.xml.serializer.SerializationHandler;
+import org.apache.xml.utils.XML11Char;
/**
* @author Jacek Ambroziak
@@ -91,7 +91,7 @@
_isLiteral = Util.isLiteral(name);
if (_isLiteral) {
- if (!XMLChar.isValidQName(name)) {
+ if (!XML11Char.isXML11ValidQName(name)) {
reportError(this, parser, ErrorMsg.ILLEGAL_ATTR_NAME_ERR,
name);
return;
}
1.11 +9 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/xpath.lex
Index: xpath.lex
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/xpath.lex,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- xpath.lex 24 Feb 2004 19:15:03 -0000 1.10
+++ xpath.lex 23 Mar 2005 17:54:04 -0000 1.11
@@ -53,9 +53,15 @@
Extender=[\u00B7\u02D0\u02D1\u0387\u0640\u0E46\u0EC6\u3005\u3031-\u3035\u309D-\u309E\u30FC-\u30FE]
-NCName=({Letter}|"_")({NCNameChar})*
+NCName=({Letter}|"_"|{NCNameStartChar})({NCNameChar})*
-NCNameChar={Letter}|{Digit}|"."|"-"|"_"|{CombiningChar}|{Extender}
+NCNameChar={Letter}|{Digit}|"."|"-"|"_"|{CombiningChar}|{Extender}|{NCNameStartChar}|
\u00B7 | [\u0300-\u036F] | [\u203F-\u2040] | [\u0130-\u0136]
+
+NCNameStartChar=[\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]
| \u005F |({HighSurrogate}{LowSurrogate})
+
+HighSurrogate=[\uD800-\uDBFF]
+
+LowSurrogate=[\uDC00-\uDFFF]
%%
1.24 +3 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/XslElement.java
Index: XslElement.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/XslElement.java,v
retrieving revision 1.23
retrieving revision 1.24
diff -u -r1.23 -r1.24
--- XslElement.java 24 Feb 2004 03:55:48 -0000 1.23
+++ XslElement.java 23 Mar 2005 17:54:04 -0000 1.24
@@ -33,7 +33,7 @@
import org.apache.xalan.xsltc.compiler.util.Type;
import org.apache.xalan.xsltc.compiler.util.TypeCheckError;
import org.apache.xalan.xsltc.compiler.util.Util;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
* @author Jacek Ambroziak
@@ -85,7 +85,7 @@
// Optimize compilation when name is known at compile time
_isLiteralName = Util.isLiteral(name);
if (_isLiteralName) {
- if (!XMLChar.isValidQName(name)) {
+ if (!XML11Char.isXML11ValidQName(name)) {
ErrorMsg msg = new ErrorMsg(ErrorMsg.ILLEGAL_ELEM_NAME_ERR,
name, this);
parser.reportError(WARNING, msg);
1.22 +3 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/ApplyTemplates.java
Index: ApplyTemplates.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/ApplyTemplates.java,v
retrieving revision 1.21
retrieving revision 1.22
diff -u -r1.21 -r1.22
--- ApplyTemplates.java 24 Feb 2004 03:55:47 -0000 1.21
+++ ApplyTemplates.java 23 Mar 2005 17:54:04 -0000 1.22
@@ -36,7 +36,7 @@
import org.apache.xalan.xsltc.compiler.util.Type;
import org.apache.xalan.xsltc.compiler.util.TypeCheckError;
import org.apache.xalan.xsltc.compiler.util.Util;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
* @author Jacek Ambroziak
@@ -73,7 +73,7 @@
}
if (mode.length() > 0) {
- if (!XMLChar.isValidQName(mode)) {
+ if (!XML11Char.isXML11ValidQName(mode)) {
ErrorMsg err = new ErrorMsg(ErrorMsg.INVALID_QNAME_ERR,
mode, this);
parser.reportError(Constants.ERROR, err);
}
1.28 +4 -4
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Output.java
Index: Output.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Output.java,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -r1.27 -r1.28
--- Output.java 10 Dec 2004 18:46:42 -0000 1.27
+++ Output.java 23 Mar 2005 17:54:04 -0000 1.28
@@ -35,7 +35,7 @@
import org.apache.xalan.xsltc.compiler.util.MethodGenerator;
import org.apache.xalan.xsltc.compiler.util.Util;
import org.apache.xml.serializer.Encodings;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
* @author Jacek Ambroziak
@@ -167,7 +167,7 @@
if ((_method.equals("xml"))||
(_method.equals("html"))||
(_method.equals("text"))||
- ((XMLChar.isValidQName(_method)&&(_method.indexOf(":") >
0)))) {
+
((XML11Char.isXML11ValidQName(_method)&&(_method.indexOf(":") > 0)))) {
outputProperties.setProperty(OutputKeys.METHOD, _method);
} else {
reportError(this, parser, ErrorMsg.INVALID_METHOD_IN_OUTPUT,
_method);
@@ -243,7 +243,7 @@
// Make sure to store names in expanded form
while (tokens.hasMoreTokens()) {
String qname = tokens.nextToken();
- if (!XMLChar.isValidQName(qname)) {
+ if (!XML11Char.isXML11ValidQName(qname)) {
ErrorMsg err = new ErrorMsg(ErrorMsg.INVALID_QNAME_ERR,
qname, this);
parser.reportError(Constants.ERROR, err);
}
1.12 +3 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/ProcessingInstruction.java
Index: ProcessingInstruction.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/ProcessingInstruction.java,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- ProcessingInstruction.java 24 Feb 2004 03:55:48 -0000 1.11
+++ ProcessingInstruction.java 23 Mar 2005 17:54:04 -0000 1.12
@@ -34,7 +34,7 @@
import org.apache.xalan.xsltc.compiler.util.Type;
import org.apache.xalan.xsltc.compiler.util.TypeCheckError;
import org.apache.xalan.xsltc.compiler.util.Util;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
* @author Jacek Ambroziak
@@ -51,7 +51,7 @@
if (name.length() > 0) {
_isLiteral = Util.isLiteral(name);
if (_isLiteral) {
- if (!XMLChar.isValidNCName(name)) {
+ if (!XML11Char.isXML11ValidNCName(name)) {
ErrorMsg err = new ErrorMsg(ErrorMsg.INVALID_NCNAME_ERR,
name, this);
parser.reportError(Constants.ERROR, err);
}
1.20 +3 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Key.java
Index: Key.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Key.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- Key.java 15 Dec 2004 17:35:38 -0000 1.19
+++ Key.java 23 Mar 2005 17:54:04 -0000 1.20
@@ -42,7 +42,7 @@
import org.apache.xalan.xsltc.compiler.util.TypeCheckError;
import org.apache.xalan.xsltc.compiler.util.Util;
import org.apache.xml.dtm.Axis;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
* @author Morten Jorgensen
@@ -78,7 +78,7 @@
// Get the required attributes and parser XPath expressions
final String name = getAttribute("name");
- if (!XMLChar.isValidQName(name)){
+ if (!XML11Char.isXML11ValidQName(name)){
ErrorMsg err = new ErrorMsg(ErrorMsg.INVALID_QNAME_ERR, name,
this);
parser.reportError(Constants.ERROR, err);
}
1.18 +3 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/WithParam.java
Index: WithParam.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/WithParam.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -r1.17 -r1.18
--- WithParam.java 24 Feb 2004 02:57:28 -0000 1.17
+++ WithParam.java 23 Mar 2005 17:54:04 -0000 1.18
@@ -30,7 +30,7 @@
import org.apache.xalan.xsltc.compiler.util.Type;
import org.apache.xalan.xsltc.compiler.util.TypeCheckError;
import org.apache.xalan.xsltc.compiler.util.Util;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
* @author Jacek Ambroziak
@@ -112,7 +112,7 @@
public void parseContents(Parser parser) {
final String name = getAttribute("name");
if (name.length() > 0) {
- if (!XMLChar.isValidQName(name)) {
+ if (!XML11Char.isXML11ValidQName(name)) {
ErrorMsg err = new ErrorMsg(ErrorMsg.INVALID_QNAME_ERR, name,
this);
parser.reportError(Constants.ERROR, err);
1.18 +3 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/AttributeSet.java
Index: AttributeSet.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/AttributeSet.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -r1.17 -r1.18
--- AttributeSet.java 24 Feb 2004 03:55:47 -0000 1.17
+++ AttributeSet.java 23 Mar 2005 17:54:04 -0000 1.18
@@ -32,7 +32,7 @@
import org.apache.xalan.xsltc.compiler.util.Type;
import org.apache.xalan.xsltc.compiler.util.TypeCheckError;
import org.apache.xalan.xsltc.compiler.util.Util;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
* @author Jacek Ambroziak
@@ -85,7 +85,7 @@
// Get this attribute set's name
final String name = getAttribute("name");
- if (!XMLChar.isValidQName(name)) {
+ if (!XML11Char.isXML11ValidQName(name)) {
ErrorMsg err = new ErrorMsg(ErrorMsg.INVALID_QNAME_ERR, name,
this);
parser.reportError(Constants.ERROR, err);
}
1.26 +4 -4
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Template.java
Index: Template.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Template.java,v
retrieving revision 1.25
retrieving revision 1.26
diff -u -r1.25 -r1.26
--- Template.java 24 Feb 2004 03:55:48 -0000 1.25
+++ Template.java 23 Mar 2005 17:54:04 -0000 1.26
@@ -32,7 +32,7 @@
import org.apache.xalan.xsltc.compiler.util.Type;
import org.apache.xalan.xsltc.compiler.util.TypeCheckError;
import org.apache.xalan.xsltc.compiler.util.Util;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
@@ -194,7 +194,7 @@
_stylesheet = super.getStylesheet();
if (name.length() > 0) {
- if (!XMLChar.isValidQName(name)) {
+ if (!XML11Char.isXML11ValidQName(name)) {
ErrorMsg err = new ErrorMsg(ErrorMsg.INVALID_QNAME_ERR,
name, this);
parser.reportError(Constants.ERROR, err);
}
@@ -202,7 +202,7 @@
}
if (mode.length() > 0) {
- if (!XMLChar.isValidQName(mode)) {
+ if (!XML11Char.isXML11ValidQName(mode)) {
ErrorMsg err = new ErrorMsg(ErrorMsg.INVALID_QNAME_ERR,
mode, this);
parser.reportError(Constants.ERROR, err);
}
1.20 +3 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/CallTemplate.java
Index: CallTemplate.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/CallTemplate.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- CallTemplate.java 16 Nov 2004 19:57:17 -0000 1.19
+++ CallTemplate.java 23 Mar 2005 17:54:04 -0000 1.20
@@ -31,7 +31,7 @@
import org.apache.xalan.xsltc.compiler.util.Type;
import org.apache.xalan.xsltc.compiler.util.TypeCheckError;
import org.apache.xalan.xsltc.compiler.util.Util;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
import java.util.Vector;
@@ -73,7 +73,7 @@
public void parseContents(Parser parser) {
final String name = getAttribute("name");
if (name.length() > 0) {
- if (!XMLChar.isValidQName(name)) {
+ if (!XML11Char.isXML11ValidQName(name)) {
ErrorMsg err = new ErrorMsg(ErrorMsg.INVALID_QNAME_ERR,
name, this);
parser.reportError(Constants.ERROR, err);
}
1.34 +8 -8
xml-xalan/java/src/org/apache/xalan/processor/XSLTAttributeDef.java
Index: XSLTAttributeDef.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/processor/XSLTAttributeDef.java,v
retrieving revision 1.33
retrieving revision 1.34
diff -u -r1.33 -r1.34
--- XSLTAttributeDef.java 15 Dec 2004 17:35:30 -0000 1.33
+++ XSLTAttributeDef.java 23 Mar 2005 17:54:04 -0000 1.34
@@ -33,7 +33,7 @@
import org.apache.xml.utils.QName;
import org.apache.xml.utils.StringToIntTable;
import org.apache.xml.utils.StringVector;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
import org.apache.xpath.XPath;
@@ -794,7 +794,7 @@
AVT avt = new AVT(handler, uri, name, rawName, value, owner);
// If an AVT wasn't used, validate the value
- if ((avt.isSimple()) && (!XMLChar.isValidNmtoken(value))) {
+ if ((avt.isSimple()) &&
(!XML11Char.isXML11ValidNmtoken(value))) {
handleError(handler,XSLTErrorResources.INVALID_NMTOKEN, new
Object[] {name,value},null);
return null;
}
@@ -805,7 +805,7 @@
throw new org.xml.sax.SAXException(te);
}
} else {
- if (!XMLChar.isValidNmtoken(value)) {
+ if (!XML11Char.isXML11ValidNmtoken(value)) {
handleError(handler,XSLTErrorResources.INVALID_NMTOKEN, new
Object[] {name,value},null);
return null;
}
@@ -983,7 +983,7 @@
if (indexOfNSSep >= 0)
{
String prefix = value.substring(0, indexOfNSSep);
- if (!XMLChar.isValidNCName(prefix))
+ if (!XML11Char.isXML11ValidNCName(prefix))
{
handleError(handler,XSLTErrorResources.INVALID_QNAME,new Object[]{name,value
},null);
return null;
@@ -994,7 +994,7 @@
? value : value.substring(indexOfNSSep + 1);
if ((localName == null) || (localName.length() == 0) ||
- (!XMLChar.isValidNCName(localName)))
+ (!XML11Char.isXML11ValidNCName(localName)))
{
handleError(handler,XSLTErrorResources.INVALID_QNAME,new Object[]{name,value
},null );
return null;
@@ -1039,7 +1039,7 @@
avt = new AVT(handler, uri, name, rawName, value, owner);
// If an AVT wasn't used, validate the value
- if ((avt.isSimple()) && (!XMLChar.isValidNCName(value)))
+ if ((avt.isSimple()) && (!XML11Char.isXML11ValidNCName(value)))
{
handleError(handler,XSLTErrorResources.INVALID_NCNAME,new
Object[] {name,value},null);
return null;
@@ -1053,7 +1053,7 @@
}
} else {
- if (!XMLChar.isValidNCName(value))
+ if (!XML11Char.isXML11ValidNCName(value))
{
handleError(handler,XSLTErrorResources.INVALID_NCNAME,new
Object[] {name,value},null);
return null;
1.16 +3 -3
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/util/Util.java
Index: Util.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/util/Util.java,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- Util.java 24 Feb 2004 03:55:48 -0000 1.15
+++ Util.java 23 Mar 2005 17:54:05 -0000 1.16
@@ -23,7 +23,7 @@
import org.apache.bcel.generic.Type;
import org.apache.xalan.xsltc.compiler.Constants;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
* @author Jacek Ambroziak
@@ -188,7 +188,7 @@
if ((str != null) && (!str.equals(Constants.EMPTYSTRING))) {
final StringTokenizer tokens = new StringTokenizer(str);
while (tokens.hasMoreTokens()) {
- if (!XMLChar.isValidQName(tokens.nextToken())) {
+ if (!XML11Char.isXML11ValidQName(tokens.nextToken())) {
return false;
}
}
1.18 +8 -8 xml-xalan/java/src/org/apache/xml/utils/QName.java
Index: QName.java
===================================================================
RCS file: /home/cvs/xml-xalan/java/src/org/apache/xml/utils/QName.java,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -r1.17 -r1.18
--- QName.java 24 Jan 2005 04:04:40 -0000 1.17
+++ QName.java 23 Mar 2005 17:54:05 -0000 1.18
@@ -111,7 +111,7 @@
if (validate)
{
- if (!XMLChar.isValidNCName(localName))
+ if (!XML11Char.isXML11ValidNCName(localName))
{
throw new IllegalArgumentException(XMLMessages.createXMLMessage(
XMLErrorResources.ER_ARG_LOCALNAME_INVALID,null )); //"Argument
'localName' not a valid NCName");
@@ -158,13 +158,13 @@
if (validate)
{
- if (!XMLChar.isValidNCName(localName))
+ if (!XML11Char.isXML11ValidNCName(localName))
{
throw new IllegalArgumentException(XMLMessages.createXMLMessage(
XMLErrorResources.ER_ARG_LOCALNAME_INVALID,null )); //"Argument
'localName' not a valid NCName");
}
- if ((null != prefix) && (!XMLChar.isValidNCName(prefix)))
+ if ((null != prefix) && (!XML11Char.isXML11ValidNCName(prefix)))
{
throw new IllegalArgumentException(XMLMessages.createXMLMessage(
XMLErrorResources.ER_ARG_PREFIX_INVALID,null )); //"Argument
'prefix' not a valid NCName");
@@ -208,7 +208,7 @@
if (validate)
{
- if (!XMLChar.isValidNCName(localName))
+ if (!XML11Char.isXML11ValidNCName(localName))
{
throw new IllegalArgumentException(XMLMessages.createXMLMessage(
XMLErrorResources.ER_ARG_LOCALNAME_INVALID,null )); //"Argument
'localName' not a valid NCName");
@@ -299,7 +299,7 @@
if (validate)
{
- if ((_localName == null) || (!XMLChar.isValidNCName(_localName)))
+ if ((_localName == null) ||
(!XML11Char.isXML11ValidNCName(_localName)))
{
throw new IllegalArgumentException(XMLMessages.createXMLMessage(
XMLErrorResources.ER_ARG_LOCALNAME_INVALID,null )); //"Argument
'localName' not a valid NCName");
@@ -388,7 +388,7 @@
if (validate)
{
- if ((_localName == null) || (!XMLChar.isValidNCName(_localName)))
+ if ((_localName == null) ||
(!XML11Char.isXML11ValidNCName(_localName)))
{
throw new IllegalArgumentException(XMLMessages.createXMLMessage(
XMLErrorResources.ER_ARG_LOCALNAME_INVALID,null )); //"Argument
'localName' not a valid NCName");
@@ -466,7 +466,7 @@
if (validate)
{
- if ((_localName == null) || (!XMLChar.isValidNCName(_localName)))
+ if ((_localName == null) ||
(!XML11Char.isXML11ValidNCName(_localName)))
{
throw new IllegalArgumentException(XMLMessages.createXMLMessage(
XMLErrorResources.ER_ARG_LOCALNAME_INVALID,null )); //"Argument
'localName' not a valid NCName");
1.1 xml-xalan/java/src/org/apache/xml/utils/XML11Char.java
Index: XML11Char.java
===================================================================
/*
* Copyright 1999-2005 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.xml.utils;
import java.util.Arrays;
/**
* THIS IS A COPY OF THE XERCES-2J CLASS org.apache.xerces.utls.XMLChar
*
* This class defines the basic properties of characters in XML 1.1. The data
* in this class can be used to verify that a character is a valid
* XML 1.1 character or if the character is a space, name start, or name
* character.
* <p>
* A series of convenience methods are supplied to ease the burden
* of the developer. Using the character as an index into the
<code>XML11CHARS</code>
* array and applying the appropriate mask flag (e.g.
* <code>MASK_VALID</code>), yields the same results as calling the
* convenience methods. There is one exception: check the comments
* for the <code>isValid</code> method for details.
*
* @version $Id: XML11Char.java,v 1.1 2005/03/23 17:54:05 ytalwar Exp $
*/
public class XML11Char {
//
// Constants
//
/** Character flags for XML 1.1. */
private static final byte XML11CHARS [] = new byte [1 << 16];
/** XML 1.1 Valid character mask. */
public static final int MASK_XML11_VALID = 0x01;
/** XML 1.1 Space character mask. */
public static final int MASK_XML11_SPACE = 0x02;
/** XML 1.1 Name start character mask. */
public static final int MASK_XML11_NAME_START = 0x04;
/** XML 1.1 Name character mask. */
public static final int MASK_XML11_NAME = 0x08;
/** XML 1.1 control character mask */
public static final int MASK_XML11_CONTROL = 0x10;
/** XML 1.1 content for external entities (valid - "special" chars -
control chars) */
public static final int MASK_XML11_CONTENT = 0x20;
/** XML namespaces 1.1 NCNameStart */
public static final int MASK_XML11_NCNAME_START = 0x40;
/** XML namespaces 1.1 NCName */
public static final int MASK_XML11_NCNAME = 0x80;
/** XML 1.1 content for internal entities (valid - "special" chars) */
public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL
| MASK_XML11_CONTENT;
//
// Static initialization
//
static {
// Initializing the Character Flag Array
// Code generated by: XML11CharGenerator.
Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte)
17
XML11CHARS[9] = 35;
XML11CHARS[10] = 3;
Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value
(byte) 17
XML11CHARS[13] = 3;
Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value
(byte) 17
XML11CHARS[32] = 35;
Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value
(byte) 33
XML11CHARS[38] = 1;
Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value
(byte) 33
Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value
(byte) -87
XML11CHARS[47] = 33;
Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value
(byte) -87
XML11CHARS[58] = 45;
XML11CHARS[59] = 33;
XML11CHARS[60] = 1;
Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value
(byte) 33
Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value
(byte) -19
Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value
(byte) 33
XML11CHARS[93] = 1;
XML11CHARS[94] = 33;
XML11CHARS[95] = -19;
XML11CHARS[96] = 33;
Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value
(byte) -19
Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value
(byte) 33
Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value
(byte) 17
XML11CHARS[133] = 35;
Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value
(byte) 17
Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value
(byte) 33
XML11CHARS[183] = -87;
Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value
(byte) 33
Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value
(byte) -19
XML11CHARS[215] = 33;
Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value
(byte) -19
XML11CHARS[247] = 33;
Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value
(byte) -19
Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value
(byte) -87
Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value
(byte) -19
XML11CHARS[894] = 33;
Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of
value (byte) -19
Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value
(byte) 33
Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value
(byte) -19
Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value
(byte) 33
XML11CHARS[8232] = 35;
Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value
(byte) 33
Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value
(byte) -87
Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value
(byte) 33
Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of
value (byte) -19
Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of
value (byte) 33
Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of
value (byte) -19
Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of
value (byte) 33
Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of
value (byte) -19
Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of
value (byte) 33
Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of
value (byte) -19
Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of
value (byte) 33
Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of
value (byte) -19
} // <clinit>()
//
// Public static methods
//
/**
* Returns true if the specified character is a space character
* as amdended in the XML 1.1 specification.
*
* @param c The character to check.
*/
public static boolean isXML11Space(int c) {
return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0);
} // isXML11Space(int):boolean
/**
* Returns true if the specified character is valid. This method
* also checks the surrogate character range from 0x10000 to 0x10FFFF.
* <p>
* If the program chooses to apply the mask directly to the
* <code>XML11CHARS</code> array, then they are responsible for checking
* the surrogate character range.
*
* @param c The character to check.
*/
public static boolean isXML11Valid(int c) {
return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0)
|| (0x10000 <= c && c <= 0x10FFFF);
} // isXML11Valid(int):boolean
/**
* Returns true if the specified character is invalid.
*
* @param c The character to check.
*/
public static boolean isXML11Invalid(int c) {
return !isXML11Valid(c);
} // isXML11Invalid(int):boolean
/**
* Returns true if the specified character is valid and permitted outside
* of a character reference.
* That is, this method will return false for the same set as
* isXML11Valid, except it also reports false for "control characters".
*
* @param c The character to check.
*/
public static boolean isXML11ValidLiteral(int c) {
return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 &&
(XML11CHARS[c] & MASK_XML11_CONTROL) == 0))
|| (0x10000 <= c && c <= 0x10FFFF));
} // isXML11ValidLiteral(int):boolean
/**
* Returns true if the specified character can be considered
* content in an external parsed entity.
*
* @param c The character to check.
*/
public static boolean isXML11Content(int c) {
return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) ||
(0x10000 <= c && c <= 0x10FFFF);
} // isXML11Content(int):boolean
/**
* Returns true if the specified character can be considered
* content in an internal parsed entity.
*
* @param c The character to check.
*/
public static boolean isXML11InternalEntityContent(int c) {
return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL)
!= 0) ||
(0x10000 <= c && c <= 0x10FFFF);
} // isXML11InternalEntityContent(int):boolean
/**
* Returns true if the specified character is a valid name start
* character as defined by production [4] in the XML 1.1
* specification.
*
* @param c The character to check.
*/
public static boolean isXML11NameStart(int c) {
return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0)
|| (0x10000 <= c && c < 0xF0000);
} // isXML11NameStart(int):boolean
/**
* Returns true if the specified character is a valid name
* character as defined by production [4a] in the XML 1.1
* specification.
*
* @param c The character to check.
*/
public static boolean isXML11Name(int c) {
return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0)
|| (c >= 0x10000 && c < 0xF0000);
} // isXML11Name(int):boolean
/**
* Returns true if the specified character is a valid NCName start
* character as defined by production [4] in Namespaces in XML
* 1.1 recommendation.
*
* @param c The character to check.
*/
public static boolean isXML11NCNameStart(int c) {
return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0)
|| (0x10000 <= c && c < 0xF0000);
} // isXML11NCNameStart(int):boolean
/**
* Returns true if the specified character is a valid NCName
* character as defined by production [5] in Namespaces in XML
* 1.1 recommendation.
*
* @param c The character to check.
*/
public static boolean isXML11NCName(int c) {
return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
|| (0x10000 <= c && c < 0xF0000);
} // isXML11NCName(int):boolean
/**
* Returns whether the given character is a valid
* high surrogate for a name character. This includes
* all high surrogates for characters [0x10000-0xEFFFF].
* In other words everything excluding planes 15 and 16.
*
* @param c The character to check.
*/
public static boolean isXML11NameHighSurrogate(int c) {
return (0xD800 <= c && c <= 0xDB7F);
}
/*
* [5] Name ::= NameStartChar NameChar*
*/
/**
* Check to see if a string is a valid Name according to [5]
* in the XML 1.1 Recommendation
*
* @param name string to check
* @return true if name is a valid Name
*/
public static boolean isXML11ValidName(String name) {
int length = name.length();
if (length == 0)
return false;
int i = 1;
char ch = name.charAt(0);
if( !isXML11NameStart(ch) ) {
if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
char ch2 = name.charAt(1);
if ( !XMLChar.isLowSurrogate(ch2) ||
!isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) {
return false;
}
i = 2;
}
else {
return false;
}
}
while (i < length) {
ch = name.charAt(i);
if ( !isXML11Name(ch) ) {
if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
char ch2 = name.charAt(i);
if ( !XMLChar.isLowSurrogate(ch2) ||
!isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
return false;
}
}
else {
return false;
}
}
++i;
}
return true;
} // isXML11ValidName(String):boolean
/*
* from the namespace 1.1 rec
* [4] NCName ::= NCNameStartChar NCNameChar*
*/
/**
* Check to see if a string is a valid NCName according to [4]
* from the XML Namespaces 1.1 Recommendation
*
* @param ncName string to check
* @return true if name is a valid NCName
*/
public static boolean isXML11ValidNCName(String ncName) {
int length = ncName.length();
if (length == 0)
return false;
int i = 1;
char ch = ncName.charAt(0);
if( !isXML11NCNameStart(ch) ) {
if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
char ch2 = ncName.charAt(1);
if ( !XMLChar.isLowSurrogate(ch2) ||
!isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
return false;
}
i = 2;
}
else {
return false;
}
}
while (i < length) {
ch = ncName.charAt(i);
if ( !isXML11NCName(ch) ) {
if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
char ch2 = ncName.charAt(i);
if ( !XMLChar.isLowSurrogate(ch2) ||
!isXML11NCName(XMLChar.supplemental(ch, ch2)) ) {
return false;
}
}
else {
return false;
}
}
++i;
}
return true;
} // isXML11ValidNCName(String):boolean
/*
* [7] Nmtoken ::= (NameChar)+
*/
/**
* Check to see if a string is a valid Nmtoken according to [7]
* in the XML 1.1 Recommendation
*
* @param nmtoken string to check
* @return true if nmtoken is a valid Nmtoken
*/
public static boolean isXML11ValidNmtoken(String nmtoken) {
int length = nmtoken.length();
if (length == 0)
return false;
for (int i = 0; i < length; ++i ) {
char ch = nmtoken.charAt(i);
if( !isXML11Name(ch) ) {
if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
char ch2 = nmtoken.charAt(i);
if ( !XMLChar.isLowSurrogate(ch2) ||
!isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
return false;
}
}
else {
return false;
}
}
}
return true;
} // isXML11ValidName(String):boolean
/**
* Simple check to determine if qname is legal. If it returns false
* then <param>str</param> is illegal; if it returns true then
* <param>str</param> is legal.
*/
public static boolean isXML11ValidQName(String str) {
final int colon = str.indexOf(':');
if (colon == 0 || colon == str.length() - 1) {
return false;
}
if (colon > 0) {
final String prefix = str.substring(0,colon);
final String localPart = str.substring(colon+1);
return isXML11ValidNCName(prefix) &&
isXML11ValidNCName(localPart);
}
else {
return isXML11ValidNCName(str);
}
}
} // class XML11Char
1.9 +120 -60
xml-xalan/java/src/org/apache/xml/serializer/WriterToUTF8Buffered.java
Index: WriterToUTF8Buffered.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xml/serializer/WriterToUTF8Buffered.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- WriterToUTF8Buffered.java 1 Sep 2004 17:36:30 -0000 1.8
+++ WriterToUTF8Buffered.java 23 Mar 2005 17:54:05 -0000 1.9
@@ -1,5 +1,5 @@
/*
- * Copyright 1999-2004 The Apache Software Foundation.
+ * Copyright 1999-2005 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -41,7 +41,6 @@
* This is a fixed constant is used rather than m_outputBytes.lenght for
performance.
*/
private static final int BYTES_MAX=16*1024;
-
/** number of characters that the character buffer can hold.
* This is 1/3 of the number of bytes because UTF-8 encoding
* can expand one unicode character by up to 3 bytes.
@@ -87,7 +86,7 @@
// Big enough to hold the input chars that will be transformed
// into output bytes in m_ouputBytes.
- m_inputChars = new char[CHARS_MAX + 1];
+ m_inputChars = new char[CHARS_MAX + 2];
count = 0;
// the old body of this constructor, before the buffersize was changed
to a constant
@@ -147,12 +146,20 @@
m_outputBytes[count++] = (byte) (0xc0 + (c >> 6));
m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
}
- else
+ else if (c < 0x10000)
{
m_outputBytes[count++] = (byte) (0xe0 + (c >> 12));
m_outputBytes[count++] = (byte) (0x80 + ((c >> 6) & 0x3f));
m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
}
+ else
+ {
+ m_outputBytes[count++] = (byte) (0xf0 + (c >> 18));
+ m_outputBytes[count++] = (byte) (0x80 + ((c >> 12) & 0x3f));
+ m_outputBytes[count++] = (byte) (0x80 + ((c >> 6) & 0x3f));
+ m_outputBytes[count++] = (byte) (0x80 + (c & 0x3f));
+ }
+
}
@@ -182,7 +189,7 @@
// The requested length is greater than the unused part of the buffer
flushBuffer();
- if (lengthx3 >= BYTES_MAX)
+ if (lengthx3 > BYTES_MAX)
{
/*
* The requested length exceeds the size of the buffer.
@@ -191,12 +198,47 @@
* and make multiple recursive calls.
* Be careful about integer overflows in multiplication.
*/
- final int chunks = 1 + length/CHARS_MAX;
+ int split = length/CHARS_MAX;
+ final int chunks;
+ if (split > 1)
+ chunks = split;
+ else
+ chunks = 2;
int end_chunk = start;
for (int chunk = 1; chunk <= chunks; chunk++)
{
int start_chunk = end_chunk;
end_chunk = start + (int) ((((long) length) * chunk) / chunks);
+
+ // Adjust the end of the chunk if it ends on a high char
+ // of a Unicode surrogate pair and low char of the pair
+ // is not going to be in the same chunk
+ final char c = chars[end_chunk - 1];
+ int ic = chars[end_chunk - 1];
+ if (c >= 0xD800 && c <= 0xDBFF) {
+ // The last Java char that we were going
+ // to process is the first of a
+ // Java surrogate char pair that
+ // represent a Unicode character.
+
+ if (end_chunk < start + length) {
+ // Avoid spanning by including the low
+ // char in the current chunk of chars.
+ end_chunk++;
+ } else {
+ /* This is the last char of the last chunk,
+ * and it is the high char of a high/low pair with
+ * no low char provided.
+ * TODO: error message needed.
+ * The char array incorrectly ends in a high char
+ * of a high/low surrogate pair, but there is
+ * no corresponding low as the high is the last char
+ */
+ end_chunk--;
+ }
+ }
+
+
int len_chunk = (end_chunk - start_chunk);
this.write(chars,start_chunk, len_chunk);
}
@@ -232,6 +274,25 @@
buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6));
buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
}
+ /**
+ * The following else if condition is added to support XML 1.1
Characters for
+ * UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
+ * Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
+ * [1101 11yy] [yyxx xxxx] (low surrogate)
+ * * uuuuu = wwww + 1
+ */
+ else if (c >= 0xD800 && c <= 0xDBFF)
+ {
+ char high, low;
+ high = c;
+ i++;
+ low = chars[i];
+
+ buf_loc[count_loc++] = (byte) (0xF0 | (((high + 0x40) >> 8) &
0xf0));
+ buf_loc[count_loc++] = (byte) (0x80 | (((high + 0x40) >> 2) &
0x3f));
+ buf_loc[count_loc++] = (byte) (0x80 | ((low >> 6) & 0x0f) + ((high
<< 4) & 0x30));
+ buf_loc[count_loc++] = (byte) (0x80 | (low & 0x3f));
+ }
else
{
buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12));
@@ -243,53 +304,6 @@
count = count_loc;
}
-
- /**
- * Writes out the character array
- * @param chars a character array with only ASCII characters, so
- * the UTF-8 encoding is optimized.
- * @param start the first character in the input array
- * @param length the number of characters in the input array
- */
- private void directWrite(final char chars[], final int start, final int
length)
- throws java.io.IOException
- {
-
-
-
- if (length >= BYTES_MAX - count)
- {
- // The requested length is greater than the unused part of the buffer
- flushBuffer();
-
- if (length >= BYTES_MAX)
- {
- /*
- * The requested length exceeds the size of the buffer.
- * Cut the buffer up into chunks, each of which will
- * not cause an overflow to the output buffer m_outputBytes,
- * and make multiple recursive calls.
- */
- int chunks = 1 + length/CHARS_MAX;
- for (int chunk =0 ; chunk < chunks; chunk++)
- {
- int start_chunk = start + ((length*chunk)/chunks);
- int end_chunk = start + ((length*(chunk+1))/chunks);
- int len_chunk = (end_chunk - start_chunk);
- this.directWrite(chars,start_chunk, len_chunk);
- }
- return;
- }
- }
-
- final int n = length+start;
- final byte[] buf_loc = m_outputBytes; // local reference for faster
access
- int count_loc = count; // local integer for faster access
- for(int i=start; i < n ; i++ )
- buf_loc[count_loc++] = (byte) buf_loc[i];
- // Store the local integer back into the instance variable
- count = count_loc;
- }
/**
* Write a string.
@@ -312,20 +326,47 @@
// The requested length is greater than the unused part of the buffer
flushBuffer();
- if (lengthx3 >= BYTES_MAX)
+ if (lengthx3 > BYTES_MAX)
{
/*
* The requested length exceeds the size of the buffer,
* so break it up in chunks that don't exceed the buffer size.
*/
final int start = 0;
- int chunks = 1 + length/CHARS_MAX;
- for (int chunk =0 ; chunk < chunks; chunk++)
+ int split = length/CHARS_MAX;
+ final int chunks;
+ if (split > 1)
+ chunks = split;
+ else
+ chunks = 2;
+ int end_chunk = 0;
+ for (int chunk = 1; chunk <= chunks; chunk++)
{
- int start_chunk = start + ((length*chunk)/chunks);
- int end_chunk = start + ((length*(chunk+1))/chunks);
- int len_chunk = (end_chunk - start_chunk);
+ int start_chunk = end_chunk;
+ end_chunk = start + (int) ((((long) length) * chunk) / chunks);
s.getChars(start_chunk,end_chunk, m_inputChars,0);
+ int len_chunk = (end_chunk - start_chunk);
+
+ // Adjust the end of the chunk if it ends on a high char
+ // of a Unicode surrogate pair and low char of the pair
+ // is not going to be in the same chunk
+ final char c = m_inputChars[len_chunk - 1];
+ if (c >= 0xD800 && c <= 0xDBFF) {
+ // Exclude char in this chunk,
+ // to avoid spanning a Unicode character
+ // that is in two Java chars as a high/low surrogate
+ end_chunk--;
+ len_chunk--;
+ if (chunk == chunks) {
+ /* TODO: error message needed.
+ * The String incorrectly ends in a high char
+ * of a high/low surrogate pair, but there is
+ * no corresponding low as the high is the last char
+ * Recover by ignoring this last char.
+ */
+ }
+ }
+
this.write(m_inputChars,0, len_chunk);
}
return;
@@ -361,6 +402,25 @@
buf_loc[count_loc++] = (byte) (0xc0 + (c >> 6));
buf_loc[count_loc++] = (byte) (0x80 + (c & 0x3f));
}
+ /**
+ * The following else if condition is added to support XML 1.1
Characters for
+ * UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
+ * Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
+ * [1101 11yy] [yyxx xxxx] (low surrogate)
+ * * uuuuu = wwww + 1
+ */
+ else if (c >= 0xD800 && c <= 0xDBFF)
+ {
+ char high, low;
+ high = c;
+ i++;
+ low = chars[i];
+
+ buf_loc[count_loc++] = (byte) (0xF0 | (((high + 0x40) >> 8) & 0xf0));
+ buf_loc[count_loc++] = (byte) (0x80 | (((high + 0x40) >> 2) & 0x3f));
+ buf_loc[count_loc++] = (byte) (0x80 | ((low >> 6) & 0x0f) + ((high
<< 4) & 0x30));
+ buf_loc[count_loc++] = (byte) (0x80 | (low & 0x3f));
+ }
else
{
buf_loc[count_loc++] = (byte) (0xe0 + (c >> 12));
1.81 +7 -7
xml-xalan/java/src/org/apache/xalan/xsltc/runtime/BasisLibrary.java
Index: BasisLibrary.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/runtime/BasisLibrary.java,v
retrieving revision 1.80
retrieving revision 1.81
diff -u -r1.80 -r1.81
--- BasisLibrary.java 15 Dec 2004 17:35:45 -0000 1.80
+++ BasisLibrary.java 23 Mar 2005 17:54:05 -0000 1.81
@@ -46,7 +46,7 @@
import org.xml.sax.SAXException;
import org.apache.xml.serializer.NamespaceMappings;
import org.apache.xml.serializer.SerializationHandler;
-import org.apache.xml.utils.XMLChar;
+import org.apache.xml.utils.XML11Char;
/**
* Standard XSLT functions. All standard functions expect the current node
@@ -1291,20 +1291,20 @@
if (firstOccur != lastOccur) {
final String oriPrefix = name.substring(firstOccur+1,
lastOccur);
- if (!XMLChar.isValidNCName(oriPrefix)) {
+ if (!XML11Char.isXML11ValidNCName(oriPrefix)) {
// even though the orignal prefix is ignored, it should
still get checked for valid NCName
runTimeError(INVALID_QNAME_ERR,oriPrefix+":"+localName);
}
}
// prefix must be a valid NCName
- if (!XMLChar.isValidNCName(newPrefix)) {
+ if (!XML11Char.isXML11ValidNCName(newPrefix)) {
runTimeError(INVALID_QNAME_ERR,newPrefix+":"+localName);
}
}
// local name must be a valid NCName and must not be XMLNS
- if
((!XMLChar.isValidNCName(localName))||(localName.equals(Constants.XMLNS_PREFIX)))
{
+ if
((!XML11Char.isXML11ValidNCName(localName))||(localName.equals(Constants.XMLNS_PREFIX)))
{
runTimeError(INVALID_QNAME_ERR,localName);
}
}
@@ -1314,7 +1314,7 @@
* This method should only be invoked if the attribute value is an AVT
*/
public static void checkNCName(String name) {
- if (!XMLChar.isValidNCName(name)) {
+ if (!XML11Char.isXML11ValidNCName(name)) {
runTimeError(INVALID_NCNAME_ERR,name);
}
}
@@ -1324,7 +1324,7 @@
* This method should only be invoked if the attribute value is an AVT
*/
public static void checkQName(String name) {
- if (!XMLChar.isValidQName(name)) {
+ if (!XML11Char.isXML11ValidQName(name)) {
runTimeError(INVALID_QNAME_ERR,name);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]