mrglavas 2004/08/12 12:13:32
Modified: java/src/org/apache/xerces/impl XMLScanner.java
XML11DocumentScannerImpl.java
XML11DTDScannerImpl.java
Log:
Performance: If the scanner stopped on the closing quote
that means we've scanned the entire attribute value. If
this value does not contain any TABs, CRs or LFs then it
would be unchanged by normalization. Now checking
this common condition to test whether both the normalized
and non-normalized values are equal.
Revision Changes Path
1.50 +56 -2 xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java
Index: XMLScanner.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java,v
retrieving revision 1.49
retrieving revision 1.50
diff -u -r1.49 -r1.50
--- XMLScanner.java 12 Aug 2004 18:24:56 -0000 1.49
+++ XMLScanner.java 12 Aug 2004 19:13:32 -0000 1.50
@@ -772,9 +772,19 @@
+ value.toString() + "\"");
}
+ int fromIndex = 0;
+ if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
+ /** Both the non-normalized and normalized attribute values are equal.
**/
+ nonNormalizedValue.setValues(value);
+ int cquote = fEntityScanner.scanChar();
+ if (cquote != quote) {
+ reportFatalError("CloseQuoteExpected", new
Object[]{eleName,atName});
+ }
+ return true;
+ }
fStringBuffer2.clear();
fStringBuffer2.append(value);
- normalizeWhitespace(value);
+ normalizeWhitespace(value, fromIndex);
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** normalizeWhitespace -> \""
+ value.toString() + "\"");
@@ -1115,6 +1125,50 @@
value.ch[i] = ' ';
}
}
+ }
+
+ /**
+ * Normalize whitespace in an XMLString converting all whitespace
+ * characters to space characters.
+ */
+ protected void normalizeWhitespace(XMLString value, int fromIndex) {
+ int end = value.offset + value.length;
+ for (int i = value.offset + fromIndex; i < end; ++i) {
+ int c = value.ch[i];
+ // Performance: For XML 1.0 documents take advantage of
+ // the fact that the only legal characters below 0x20
+ // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
+ // already determined the well-formedness of these
+ // characters it is sufficient (and safe) to check
+ // against 0x20. -- mrglavas
+ if (c < 0x20) {
+ value.ch[i] = ' ';
+ }
+ }
+ }
+
+ /**
+ * Checks whether this string would be unchanged by normalization.
+ *
+ * @return -1 if the value would be unchanged by normalization,
+ * otherwise the index of the first whitespace character which
+ * would be transformed.
+ */
+ protected int isUnchangedByNormalization(XMLString value) {
+ int end = value.offset + value.length;
+ for (int i = value.offset; i < end; ++i) {
+ int c = value.ch[i];
+ // Performance: For XML 1.0 documents take advantage of
+ // the fact that the only legal characters below 0x20
+ // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
+ // already determined the well-formedness of these
+ // characters it is sufficient (and safe) to check
+ // against 0x20. -- mrglavas
+ if (c < 0x20) {
+ return i - value.offset;
+ }
+ }
+ return -1;
}
//
1.21 +46 -3
xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java
Index: XML11DocumentScannerImpl.java
===================================================================
RCS file:
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -r1.20 -r1.21
--- XML11DocumentScannerImpl.java 12 Aug 2004 18:24:56 -0000 1.20
+++ XML11DocumentScannerImpl.java 12 Aug 2004 19:13:32 -0000 1.21
@@ -172,9 +172,20 @@
System.out.println("** scanLiteral -> \""
+ value.toString() + "\"");
}
+
+ int fromIndex = 0;
+ if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
+ /** Both the non-normalized and normalized attribute values are equal.
**/
+ nonNormalizedValue.setValues(value);
+ int cquote = fEntityScanner.scanChar();
+ if (cquote != quote) {
+ reportFatalError("CloseQuoteExpected", new
Object[]{eleName,atName});
+ }
+ return true;
+ }
fStringBuffer2.clear();
fStringBuffer2.append(value);
- normalizeWhitespace(value);
+ normalizeWhitespace(value, fromIndex);
if (DEBUG_ATTR_NORMALIZATION) {
System.out.println("** normalizeWhitespace -> \""
+ value.toString() + "\"");
@@ -441,12 +452,44 @@
*/
protected void normalizeWhitespace(XMLString value) {
int end = value.offset + value.length;
- for (int i = value.offset; i < end; i++) {
+ for (int i = value.offset; i < end; ++i) {
int c = value.ch[i];
if (XMLChar.isSpace(c)) {
value.ch[i] = ' ';
}
}
+ }
+
+ /**
+ * Normalize whitespace in an XMLString converting all whitespace
+ * characters to space characters.
+ */
+ protected void normalizeWhitespace(XMLString value, int fromIndex) {
+ int end = value.offset + value.length;
+ for (int i = value.offset + fromIndex; i < end; ++i) {
+ int c = value.ch[i];
+ if (XMLChar.isSpace(c)) {
+ value.ch[i] = ' ';
+ }
+ }
+ }
+
+ /**
+ * Checks whether this string would be unchanged by normalization.
+ *
+ * @return -1 if the value would be unchanged by normalization,
+ * otherwise the index of the first whitespace character which
+ * would be transformed.
+ */
+ protected int isUnchangedByNormalization(XMLString value) {
+ int end = value.offset + value.length;
+ for (int i = value.offset; i < end; ++i) {
+ int c = value.ch[i];
+ if (XMLChar.isSpace(c)) {
+ return i - value.offset;
+ }
+ }
+ return -1;
}
// returns true if the given character is not
1.12 +34 -2
xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java
Index: XML11DTDScannerImpl.java
===================================================================
RCS file:
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- XML11DTDScannerImpl.java 24 Feb 2004 23:03:46 -0000 1.11
+++ XML11DTDScannerImpl.java 12 Aug 2004 19:13:32 -0000 1.12
@@ -161,12 +161,44 @@
*/
protected void normalizeWhitespace(XMLString value) {
int end = value.offset + value.length;
- for (int i = value.offset; i < end; i++) {
+ for (int i = value.offset; i < end; ++i) {
int c = value.ch[i];
if (XMLChar.isSpace(c)) {
value.ch[i] = ' ';
}
}
+ }
+
+ /**
+ * Normalize whitespace in an XMLString converting all whitespace
+ * characters to space characters.
+ */
+ protected void normalizeWhitespace(XMLString value, int fromIndex) {
+ int end = value.offset + value.length;
+ for (int i = value.offset + fromIndex; i < end; ++i) {
+ int c = value.ch[i];
+ if (XMLChar.isSpace(c)) {
+ value.ch[i] = ' ';
+ }
+ }
+ }
+
+ /**
+ * Checks whether this string would be unchanged by normalization.
+ *
+ * @return -1 if the value would be unchanged by normalization,
+ * otherwise the index of the first whitespace character which
+ * would be transformed.
+ */
+ protected int isUnchangedByNormalization(XMLString value) {
+ int end = value.offset + value.length;
+ for (int i = value.offset; i < end; ++i) {
+ int c = value.ch[i];
+ if (XMLChar.isSpace(c)) {
+ return i - value.offset;
+ }
+ }
+ return -1;
}
// returns true if the given character is not
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]