mrglavas 2004/08/12 11:24:56
Modified: java/src/org/apache/xerces/impl XMLScanner.java
XML11DocumentScannerImpl.java
XML11NSDocumentScannerImpl.java
XMLNSDocumentScannerImpl.java
XMLDocumentFragmentScannerImpl.java
Log:
JIRA Issue #1001:
http://nagoya.apache.org/jira/browse/XERCESJ-1001
When processing attributes in the scanner we store both the
non-normalized and normalized values of an attribute. Attribute
value normalization as specified by the XML 1.0 specification
replaces tabs and line breaks with spaces and performs entity
expansion, if any entity references exist in the attribute values.
In the vast number of real world documents both the
non-normalized and normalized values are the same, however we
were creating a new string for both values. Thanks to the
patch by John Kim, we only create two strings if the non-normalized
and normalized value are different.
Revision Changes Path
1.49 +8 -3 xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java
Index: XMLScanner.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java,v
retrieving revision 1.48
retrieving revision 1.49
diff -u -r1.48 -r1.49
--- XMLScanner.java 25 Apr 2004 05:05:50 -0000 1.48
+++ XMLScanner.java 12 Aug 2004 18:24:56 -0000 1.49
@@ -746,10 +746,12 @@
* false if undeclared entities should be reported as WFC
violation.
* @param eleName The name of element to which this attribute belongs.
*
+ * @return true if the non-normalized and normalized value are the same
+ *
* <strong>Note:</strong> This method uses fStringBuffer2, anything in it
* at the time of calling is lost.
**/
- protected void scanAttributeValue(XMLString value,
+ protected boolean scanAttributeValue(XMLString value,
XMLString nonNormalizedValue,
String atName,
boolean checkEntities,String eleName)
@@ -769,6 +771,7 @@
System.out.println("** scanLiteral -> \""
+ value.toString() + "\"");
}
+
fStringBuffer2.clear();
fStringBuffer2.append(value);
normalizeWhitespace(value);
@@ -953,6 +956,8 @@
if (cquote != quote) {
reportFatalError("CloseQuoteExpected", new
Object[]{eleName,atName});
}
+ return nonNormalizedValue.equals(value.ch, value.offset, value.length);
+
} // scanAttributeValue()
@@ -1098,7 +1103,7 @@
*/
protected void normalizeWhitespace(XMLString value) {
int end = value.offset + value.length;
- for (int i = value.offset; i < end; i++) {
+ for (int i = value.offset; i < end; ++i) {
int c = value.ch[i];
// Performance: For XML 1.0 documents take advantage of
// the fact that the only legal characters below 0x20
1.20 +5 -2
xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java
Index: XML11DocumentScannerImpl.java
===================================================================
RCS file:
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- XML11DocumentScannerImpl.java 25 Apr 2004 05:05:50 -0000 1.19
+++ XML11DocumentScannerImpl.java 12 Aug 2004 18:24:56 -0000 1.20
@@ -147,10 +147,12 @@
* false if undeclared entities should be reported as WFC
violation.
* @param eleName The name of element to which this attribute belongs.
*
+ * @return true if the non-normalized and normalized value are the same
+ *
* <strong>Note:</strong> This method uses fStringBuffer2, anything in it
* at the time of calling is lost.
**/
- protected void scanAttributeValue(XMLString value,
+ protected boolean scanAttributeValue(XMLString value,
XMLString nonNormalizedValue,
String atName,
boolean checkEntities,String eleName)
@@ -357,6 +359,7 @@
if (cquote != quote) {
reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
}
+ return nonNormalizedValue.equals(value.ch, value.offset, value.length);
} // scanAttributeValue()
//
1.12 +9 -9
xml-xerces/java/src/org/apache/xerces/impl/XML11NSDocumentScannerImpl.java
Index: XML11NSDocumentScannerImpl.java
===================================================================
RCS file:
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11NSDocumentScannerImpl.java,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- XML11NSDocumentScannerImpl.java 30 Apr 2004 15:36:38 -0000 1.11
+++ XML11NSDocumentScannerImpl.java 12 Aug 2004 18:24:56 -0000 1.12
@@ -591,16 +591,16 @@
//REVISIT: one more case needs to be included: external PE and standalone
is no
boolean isVC = fHasExternalDTD && !fStandalone;
- // REVISIT: it seems that this function should not take attributes, and
length
- scanAttributeValue(
- this.fTempString,
- fTempString2,
- fAttributeQName.rawname,
- isVC,
- fCurrentElement.rawname);
+ // Scan attribute value and return true if the non-normalized and
normalized value are the same
+ boolean isSameNormalizedAttr = scanAttributeValue(this.fTempString,
fTempString2,
+ fAttributeQName.rawname,isVC,fCurrentElement.rawname);
+
String value = fTempString.toString();
attributes.setValue(attrIndex, value);
- attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
+ // If the non-normalized and normalized value are the same, avoid creating
a new string.
+ if (!isSameNormalizedAttr) {
+ attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
+ }
attributes.setSpecified(attrIndex, true);
// record namespace declarations if any.
1.24 +9 -5
xml-xerces/java/src/org/apache/xerces/impl/XMLNSDocumentScannerImpl.java
Index: XMLNSDocumentScannerImpl.java
===================================================================
RCS file:
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLNSDocumentScannerImpl.java,v
retrieving revision 1.23
retrieving revision 1.24
diff -u -r1.23 -r1.24
--- XMLNSDocumentScannerImpl.java 30 Apr 2004 15:36:38 -0000 1.23
+++ XMLNSDocumentScannerImpl.java 12 Aug 2004 18:24:56 -0000 1.24
@@ -528,12 +528,16 @@
//REVISIT: one more case needs to be included: external PE and standalone
is no
boolean isVC = fHasExternalDTD && !fStandalone;
- scanAttributeValue(this.fTempString, fTempString2,
- fAttributeQName.rawname, isVC,
- fCurrentElement.rawname);
+ // Scan attribute value and return true if the non-normalized and
normalized value are the same
+ boolean isSameNormalizedAttr = scanAttributeValue(this.fTempString,
fTempString2,
+ fAttributeQName.rawname, isVC, fCurrentElement.rawname);
+
String value = fTempString.toString();
attributes.setValue(attrIndex, value);
- attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
+ // If the non-normalized and normalized value are the same, avoid creating
a new string.
+ if (!isSameNormalizedAttr) {
+ attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
+ }
attributes.setSpecified(attrIndex, true);
// record namespace declarations if any.
1.55 +10 -5
xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java
Index: XMLDocumentFragmentScannerImpl.java
===================================================================
RCS file:
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java,v
retrieving revision 1.54
retrieving revision 1.55
diff -u -r1.54 -r1.55
--- XMLDocumentFragmentScannerImpl.java 8 Aug 2004 20:30:05 -0000 1.54
+++ XMLDocumentFragmentScannerImpl.java 12 Aug 2004 18:24:56 -0000 1.55
@@ -972,11 +972,16 @@
}
//REVISIT: one more case needs to be included: external PE and standalone
is no
boolean isVC = fHasExternalDTD && !fStandalone;
- scanAttributeValue(fTempString, fTempString2,
- fAttributeQName.rawname, isVC,
- fCurrentElement.rawname);
+
+ // Scan attribute value and return true if the un-normalized and normalized
value are the same
+ boolean isSameNormalizedAttr = scanAttributeValue(fTempString,
fTempString2,
+ fAttributeQName.rawname, isVC, fCurrentElement.rawname);
+
attributes.setValue(attrIndex, fTempString.toString());
- attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
+ // If the non-normalized and normalized value are the same, avoid creating
a new string.
+ if (!isSameNormalizedAttr) {
+ attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
+ }
attributes.setSpecified(attrIndex, true);
if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanAttribute()");
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]