mrglavas    2004/08/12 11:24:56

  Modified:    java/src/org/apache/xerces/impl XMLScanner.java
                        XML11DocumentScannerImpl.java
                        XML11NSDocumentScannerImpl.java
                        XMLNSDocumentScannerImpl.java
                        XMLDocumentFragmentScannerImpl.java
  Log:
  JIRA Issue #1001:

  http://nagoya.apache.org/jira/browse/XERCESJ-1001

  

  When processing attributes in the scanner we store both the

  non-normalized and normalized values of an attribute. Attribute

  value normalization as specified by the XML 1.0 specification

  replaces tabs and line breaks with spaces and performs entity

  expansion, if any entity references exist in the attribute values.

  

  In the vast number of real world documents both the

  non-normalized and normalized values are the same, however we

  were creating a new string for both values. Thanks to the

  patch by John Kim, we only create two strings if the non-normalized

  and normalized value are different.
  
  Revision  Changes    Path
  1.49      +8 -3      xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java
  
  Index: XMLScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java,v
  retrieving revision 1.48
  retrieving revision 1.49
  diff -u -r1.48 -r1.49
  --- XMLScanner.java   25 Apr 2004 05:05:50 -0000      1.48
  +++ XMLScanner.java   12 Aug 2004 18:24:56 -0000      1.49
  @@ -746,10 +746,12 @@
        *                      false if undeclared entities should be reported as WFC 
violation.
        * @param eleName The name of element to which this attribute belongs.
        *
  +     * @return true if the non-normalized and normalized value are the same
  +     * 
        * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
        * at the time of calling is lost.
        **/
  -    protected void scanAttributeValue(XMLString value, 
  +    protected boolean scanAttributeValue(XMLString value, 
                                         XMLString nonNormalizedValue,
                                         String atName,
                                         boolean checkEntities,String eleName)
  @@ -769,6 +771,7 @@
               System.out.println("** scanLiteral -> \""
                                  + value.toString() + "\"");
           }
  +        
           fStringBuffer2.clear();
           fStringBuffer2.append(value);
           normalizeWhitespace(value);
  @@ -953,6 +956,8 @@
           if (cquote != quote) {
                        reportFatalError("CloseQuoteExpected", new 
Object[]{eleName,atName});
           }
  +        return nonNormalizedValue.equals(value.ch, value.offset, value.length);
  +        
       } // scanAttributeValue()
   
   
  @@ -1098,7 +1103,7 @@
        */
       protected void normalizeWhitespace(XMLString value) {
           int end = value.offset + value.length;
  -        for (int i = value.offset; i < end; i++) {
  +        for (int i = value.offset; i < end; ++i) {
               int c = value.ch[i];
               // Performance: For XML 1.0 documents take advantage of 
               // the fact that the only legal characters below 0x20 
  
  
  
  1.20      +5 -2      
xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java
  
  Index: XML11DocumentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java,v
  retrieving revision 1.19
  retrieving revision 1.20
  diff -u -r1.19 -r1.20
  --- XML11DocumentScannerImpl.java     25 Apr 2004 05:05:50 -0000      1.19
  +++ XML11DocumentScannerImpl.java     12 Aug 2004 18:24:56 -0000      1.20
  @@ -147,10 +147,12 @@
        *                      false if undeclared entities should be reported as WFC 
violation.
        * @param eleName The name of element to which this attribute belongs.
        *
  +     * @return true if the non-normalized and normalized value are the same
  +     * 
        * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
        * at the time of calling is lost.
        **/
  -    protected void scanAttributeValue(XMLString value, 
  +    protected boolean scanAttributeValue(XMLString value, 
                                         XMLString nonNormalizedValue,
                                         String atName,
                                         boolean checkEntities,String eleName)
  @@ -357,6 +359,7 @@
           if (cquote != quote) {
               reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
           }
  +        return nonNormalizedValue.equals(value.ch, value.offset, value.length);
       } // scanAttributeValue()
   
       //
  
  
  
  1.12      +9 -9      
xml-xerces/java/src/org/apache/xerces/impl/XML11NSDocumentScannerImpl.java
  
  Index: XML11NSDocumentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11NSDocumentScannerImpl.java,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- XML11NSDocumentScannerImpl.java   30 Apr 2004 15:36:38 -0000      1.11
  +++ XML11NSDocumentScannerImpl.java   12 Aug 2004 18:24:56 -0000      1.12
  @@ -591,16 +591,16 @@
           //REVISIT: one more case needs to be included: external PE and standalone 
is no
           boolean isVC = fHasExternalDTD && !fStandalone;
   
  -        // REVISIT: it seems that this function should not take attributes, and 
length
  -        scanAttributeValue(
  -            this.fTempString,
  -            fTempString2,
  -            fAttributeQName.rawname,
  -            isVC,
  -            fCurrentElement.rawname);
  +        // Scan attribute value and return true if the non-normalized and 
normalized value are the same
  +        boolean isSameNormalizedAttr = scanAttributeValue(this.fTempString, 
fTempString2, 
  +                fAttributeQName.rawname,isVC,fCurrentElement.rawname);
  +        
           String value = fTempString.toString();
           attributes.setValue(attrIndex, value);
  -        attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
  +        // If the non-normalized and normalized value are the same, avoid creating 
a new string.
  +        if (!isSameNormalizedAttr) {
  +            attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
  +        }
           attributes.setSpecified(attrIndex, true);
   
           // record namespace declarations if any.
  
  
  
  1.24      +9 -5      
xml-xerces/java/src/org/apache/xerces/impl/XMLNSDocumentScannerImpl.java
  
  Index: XMLNSDocumentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLNSDocumentScannerImpl.java,v
  retrieving revision 1.23
  retrieving revision 1.24
  diff -u -r1.23 -r1.24
  --- XMLNSDocumentScannerImpl.java     30 Apr 2004 15:36:38 -0000      1.23
  +++ XMLNSDocumentScannerImpl.java     12 Aug 2004 18:24:56 -0000      1.24
  @@ -528,12 +528,16 @@
           //REVISIT: one more case needs to be included: external PE and standalone 
is no
           boolean isVC =  fHasExternalDTD && !fStandalone;
   
  -        scanAttributeValue(this.fTempString, fTempString2,
  -                           fAttributeQName.rawname, isVC, 
  -                           fCurrentElement.rawname);
  +        // Scan attribute value and return true if the non-normalized and 
normalized value are the same
  +        boolean isSameNormalizedAttr = scanAttributeValue(this.fTempString, 
fTempString2,
  +                fAttributeQName.rawname, isVC, fCurrentElement.rawname);
  +        
           String value = fTempString.toString();
           attributes.setValue(attrIndex, value);
  -        attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
  +        // If the non-normalized and normalized value are the same, avoid creating 
a new string.
  +        if (!isSameNormalizedAttr) {
  +            attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
  +        }
           attributes.setSpecified(attrIndex, true);
   
           // record namespace declarations if any.
  
  
  
  1.55      +10 -5     
xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java
  
  Index: XMLDocumentFragmentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java,v
  retrieving revision 1.54
  retrieving revision 1.55
  diff -u -r1.54 -r1.55
  --- XMLDocumentFragmentScannerImpl.java       8 Aug 2004 20:30:05 -0000       1.54
  +++ XMLDocumentFragmentScannerImpl.java       12 Aug 2004 18:24:56 -0000      1.55
  @@ -972,11 +972,16 @@
           }
           //REVISIT: one more case needs to be included: external PE and standalone 
is no
           boolean isVC =  fHasExternalDTD && !fStandalone;        
  -        scanAttributeValue(fTempString, fTempString2,
  -                           fAttributeQName.rawname, isVC, 
  -                           fCurrentElement.rawname);
  +        
  +        // Scan attribute value and return true if the un-normalized and normalized 
value are the same
  +        boolean isSameNormalizedAttr =  scanAttributeValue(fTempString, 
fTempString2,
  +                fAttributeQName.rawname, isVC, fCurrentElement.rawname);
  +        
           attributes.setValue(attrIndex, fTempString.toString());
  -        attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
  +        // If the non-normalized and normalized value are the same, avoid creating 
a new string.
  +        if (!isSameNormalizedAttr) {
  +            attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
  +        }
           attributes.setSpecified(attrIndex, true);
   
           if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanAttribute()");
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to