mrglavas 2004/08/12 12:13:32 Modified: java/src/org/apache/xerces/impl XMLScanner.java XML11DocumentScannerImpl.java XML11DTDScannerImpl.java Log: Performance: If the scanner stopped on the closing quote
that means we've scanned the entire attribute value. If this value does not contain any TABs, CRs or LFs then it would be unchanged by normalization. Now checking this common condition to test whether both the normalized and non-normalized values are equal. Revision Changes Path 1.50 +56 -2 xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java Index: XMLScanner.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java,v retrieving revision 1.49 retrieving revision 1.50 diff -u -r1.49 -r1.50 --- XMLScanner.java 12 Aug 2004 18:24:56 -0000 1.49 +++ XMLScanner.java 12 Aug 2004 19:13:32 -0000 1.50 @@ -772,9 +772,19 @@ + value.toString() + "\""); } + int fromIndex = 0; + if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) { + /** Both the non-normalized and normalized attribute values are equal. **/ + nonNormalizedValue.setValues(value); + int cquote = fEntityScanner.scanChar(); + if (cquote != quote) { + reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName}); + } + return true; + } fStringBuffer2.clear(); fStringBuffer2.append(value); - normalizeWhitespace(value); + normalizeWhitespace(value, fromIndex); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** normalizeWhitespace -> \"" + value.toString() + "\""); @@ -1115,6 +1125,50 @@ value.ch[i] = ' '; } } + } + + /** + * Normalize whitespace in an XMLString converting all whitespace + * characters to space characters. + */ + protected void normalizeWhitespace(XMLString value, int fromIndex) { + int end = value.offset + value.length; + for (int i = value.offset + fromIndex; i < end; ++i) { + int c = value.ch[i]; + // Performance: For XML 1.0 documents take advantage of + // the fact that the only legal characters below 0x20 + // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've + // already determined the well-formedness of these + // characters it is sufficient (and safe) to check + // against 0x20. -- mrglavas + if (c < 0x20) { + value.ch[i] = ' '; + } + } + } + + /** + * Checks whether this string would be unchanged by normalization. + * + * @return -1 if the value would be unchanged by normalization, + * otherwise the index of the first whitespace character which + * would be transformed. + */ + protected int isUnchangedByNormalization(XMLString value) { + int end = value.offset + value.length; + for (int i = value.offset; i < end; ++i) { + int c = value.ch[i]; + // Performance: For XML 1.0 documents take advantage of + // the fact that the only legal characters below 0x20 + // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've + // already determined the well-formedness of these + // characters it is sufficient (and safe) to check + // against 0x20. -- mrglavas + if (c < 0x20) { + return i - value.offset; + } + } + return -1; } // 1.21 +46 -3 xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java Index: XML11DocumentScannerImpl.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java,v retrieving revision 1.20 retrieving revision 1.21 diff -u -r1.20 -r1.21 --- XML11DocumentScannerImpl.java 12 Aug 2004 18:24:56 -0000 1.20 +++ XML11DocumentScannerImpl.java 12 Aug 2004 19:13:32 -0000 1.21 @@ -172,9 +172,20 @@ System.out.println("** scanLiteral -> \"" + value.toString() + "\""); } + + int fromIndex = 0; + if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) { + /** Both the non-normalized and normalized attribute values are equal. **/ + nonNormalizedValue.setValues(value); + int cquote = fEntityScanner.scanChar(); + if (cquote != quote) { + reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName}); + } + return true; + } fStringBuffer2.clear(); fStringBuffer2.append(value); - normalizeWhitespace(value); + normalizeWhitespace(value, fromIndex); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** normalizeWhitespace -> \"" + value.toString() + "\""); @@ -441,12 +452,44 @@ */ protected void normalizeWhitespace(XMLString value) { int end = value.offset + value.length; - for (int i = value.offset; i < end; i++) { + for (int i = value.offset; i < end; ++i) { int c = value.ch[i]; if (XMLChar.isSpace(c)) { value.ch[i] = ' '; } } + } + + /** + * Normalize whitespace in an XMLString converting all whitespace + * characters to space characters. + */ + protected void normalizeWhitespace(XMLString value, int fromIndex) { + int end = value.offset + value.length; + for (int i = value.offset + fromIndex; i < end; ++i) { + int c = value.ch[i]; + if (XMLChar.isSpace(c)) { + value.ch[i] = ' '; + } + } + } + + /** + * Checks whether this string would be unchanged by normalization. + * + * @return -1 if the value would be unchanged by normalization, + * otherwise the index of the first whitespace character which + * would be transformed. + */ + protected int isUnchangedByNormalization(XMLString value) { + int end = value.offset + value.length; + for (int i = value.offset; i < end; ++i) { + int c = value.ch[i]; + if (XMLChar.isSpace(c)) { + return i - value.offset; + } + } + return -1; } // returns true if the given character is not 1.12 +34 -2 xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java Index: XML11DTDScannerImpl.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- XML11DTDScannerImpl.java 24 Feb 2004 23:03:46 -0000 1.11 +++ XML11DTDScannerImpl.java 12 Aug 2004 19:13:32 -0000 1.12 @@ -161,12 +161,44 @@ */ protected void normalizeWhitespace(XMLString value) { int end = value.offset + value.length; - for (int i = value.offset; i < end; i++) { + for (int i = value.offset; i < end; ++i) { int c = value.ch[i]; if (XMLChar.isSpace(c)) { value.ch[i] = ' '; } } + } + + /** + * Normalize whitespace in an XMLString converting all whitespace + * characters to space characters. + */ + protected void normalizeWhitespace(XMLString value, int fromIndex) { + int end = value.offset + value.length; + for (int i = value.offset + fromIndex; i < end; ++i) { + int c = value.ch[i]; + if (XMLChar.isSpace(c)) { + value.ch[i] = ' '; + } + } + } + + /** + * Checks whether this string would be unchanged by normalization. + * + * @return -1 if the value would be unchanged by normalization, + * otherwise the index of the first whitespace character which + * would be transformed. + */ + protected int isUnchangedByNormalization(XMLString value) { + int end = value.offset + value.length; + for (int i = value.offset; i < end; ++i) { + int c = value.ch[i]; + if (XMLChar.isSpace(c)) { + return i - value.offset; + } + } + return -1; } // returns true if the given character is not --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]