mrglavas 2004/08/12 11:24:56 Modified: java/src/org/apache/xerces/impl XMLScanner.java XML11DocumentScannerImpl.java XML11NSDocumentScannerImpl.java XMLNSDocumentScannerImpl.java XMLDocumentFragmentScannerImpl.java Log: JIRA Issue #1001:
http://nagoya.apache.org/jira/browse/XERCESJ-1001 When processing attributes in the scanner we store both the non-normalized and normalized values of an attribute. Attribute value normalization as specified by the XML 1.0 specification replaces tabs and line breaks with spaces and performs entity expansion, if any entity references exist in the attribute values. In the vast number of real world documents both the non-normalized and normalized values are the same, however we were creating a new string for both values. Thanks to the patch by John Kim, we only create two strings if the non-normalized and normalized value are different. Revision Changes Path 1.49 +8 -3 xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java Index: XMLScanner.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java,v retrieving revision 1.48 retrieving revision 1.49 diff -u -r1.48 -r1.49 --- XMLScanner.java 25 Apr 2004 05:05:50 -0000 1.48 +++ XMLScanner.java 12 Aug 2004 18:24:56 -0000 1.49 @@ -746,10 +746,12 @@ * false if undeclared entities should be reported as WFC violation. * @param eleName The name of element to which this attribute belongs. * + * @return true if the non-normalized and normalized value are the same + * * <strong>Note:</strong> This method uses fStringBuffer2, anything in it * at the time of calling is lost. **/ - protected void scanAttributeValue(XMLString value, + protected boolean scanAttributeValue(XMLString value, XMLString nonNormalizedValue, String atName, boolean checkEntities,String eleName) @@ -769,6 +771,7 @@ System.out.println("** scanLiteral -> \"" + value.toString() + "\""); } + fStringBuffer2.clear(); fStringBuffer2.append(value); normalizeWhitespace(value); @@ -953,6 +956,8 @@ if (cquote != quote) { reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName}); } + return nonNormalizedValue.equals(value.ch, value.offset, value.length); + } // scanAttributeValue() @@ -1098,7 +1103,7 @@ */ protected void normalizeWhitespace(XMLString value) { int end = value.offset + value.length; - for (int i = value.offset; i < end; i++) { + for (int i = value.offset; i < end; ++i) { int c = value.ch[i]; // Performance: For XML 1.0 documents take advantage of // the fact that the only legal characters below 0x20 1.20 +5 -2 xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java Index: XML11DocumentScannerImpl.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java,v retrieving revision 1.19 retrieving revision 1.20 diff -u -r1.19 -r1.20 --- XML11DocumentScannerImpl.java 25 Apr 2004 05:05:50 -0000 1.19 +++ XML11DocumentScannerImpl.java 12 Aug 2004 18:24:56 -0000 1.20 @@ -147,10 +147,12 @@ * false if undeclared entities should be reported as WFC violation. * @param eleName The name of element to which this attribute belongs. * + * @return true if the non-normalized and normalized value are the same + * * <strong>Note:</strong> This method uses fStringBuffer2, anything in it * at the time of calling is lost. **/ - protected void scanAttributeValue(XMLString value, + protected boolean scanAttributeValue(XMLString value, XMLString nonNormalizedValue, String atName, boolean checkEntities,String eleName) @@ -357,6 +359,7 @@ if (cquote != quote) { reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName}); } + return nonNormalizedValue.equals(value.ch, value.offset, value.length); } // scanAttributeValue() // 1.12 +9 -9 xml-xerces/java/src/org/apache/xerces/impl/XML11NSDocumentScannerImpl.java Index: XML11NSDocumentScannerImpl.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11NSDocumentScannerImpl.java,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- XML11NSDocumentScannerImpl.java 30 Apr 2004 15:36:38 -0000 1.11 +++ XML11NSDocumentScannerImpl.java 12 Aug 2004 18:24:56 -0000 1.12 @@ -591,16 +591,16 @@ //REVISIT: one more case needs to be included: external PE and standalone is no boolean isVC = fHasExternalDTD && !fStandalone; - // REVISIT: it seems that this function should not take attributes, and length - scanAttributeValue( - this.fTempString, - fTempString2, - fAttributeQName.rawname, - isVC, - fCurrentElement.rawname); + // Scan attribute value and return true if the non-normalized and normalized value are the same + boolean isSameNormalizedAttr = scanAttributeValue(this.fTempString, fTempString2, + fAttributeQName.rawname,isVC,fCurrentElement.rawname); + String value = fTempString.toString(); attributes.setValue(attrIndex, value); - attributes.setNonNormalizedValue(attrIndex, fTempString2.toString()); + // If the non-normalized and normalized value are the same, avoid creating a new string. + if (!isSameNormalizedAttr) { + attributes.setNonNormalizedValue(attrIndex, fTempString2.toString()); + } attributes.setSpecified(attrIndex, true); // record namespace declarations if any. 1.24 +9 -5 xml-xerces/java/src/org/apache/xerces/impl/XMLNSDocumentScannerImpl.java Index: XMLNSDocumentScannerImpl.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLNSDocumentScannerImpl.java,v retrieving revision 1.23 retrieving revision 1.24 diff -u -r1.23 -r1.24 --- XMLNSDocumentScannerImpl.java 30 Apr 2004 15:36:38 -0000 1.23 +++ XMLNSDocumentScannerImpl.java 12 Aug 2004 18:24:56 -0000 1.24 @@ -528,12 +528,16 @@ //REVISIT: one more case needs to be included: external PE and standalone is no boolean isVC = fHasExternalDTD && !fStandalone; - scanAttributeValue(this.fTempString, fTempString2, - fAttributeQName.rawname, isVC, - fCurrentElement.rawname); + // Scan attribute value and return true if the non-normalized and normalized value are the same + boolean isSameNormalizedAttr = scanAttributeValue(this.fTempString, fTempString2, + fAttributeQName.rawname, isVC, fCurrentElement.rawname); + String value = fTempString.toString(); attributes.setValue(attrIndex, value); - attributes.setNonNormalizedValue(attrIndex, fTempString2.toString()); + // If the non-normalized and normalized value are the same, avoid creating a new string. + if (!isSameNormalizedAttr) { + attributes.setNonNormalizedValue(attrIndex, fTempString2.toString()); + } attributes.setSpecified(attrIndex, true); // record namespace declarations if any. 1.55 +10 -5 xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java Index: XMLDocumentFragmentScannerImpl.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java,v retrieving revision 1.54 retrieving revision 1.55 diff -u -r1.54 -r1.55 --- XMLDocumentFragmentScannerImpl.java 8 Aug 2004 20:30:05 -0000 1.54 +++ XMLDocumentFragmentScannerImpl.java 12 Aug 2004 18:24:56 -0000 1.55 @@ -972,11 +972,16 @@ } //REVISIT: one more case needs to be included: external PE and standalone is no boolean isVC = fHasExternalDTD && !fStandalone; - scanAttributeValue(fTempString, fTempString2, - fAttributeQName.rawname, isVC, - fCurrentElement.rawname); + + // Scan attribute value and return true if the un-normalized and normalized value are the same + boolean isSameNormalizedAttr = scanAttributeValue(fTempString, fTempString2, + fAttributeQName.rawname, isVC, fCurrentElement.rawname); + attributes.setValue(attrIndex, fTempString.toString()); - attributes.setNonNormalizedValue(attrIndex, fTempString2.toString()); + // If the non-normalized and normalized value are the same, avoid creating a new string. + if (!isSameNormalizedAttr) { + attributes.setNonNormalizedValue(attrIndex, fTempString2.toString()); + } attributes.setSpecified(attrIndex, true); if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanAttribute()"); --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]