impl XMLScanner.java XML11DocumentScannerImpl.java XML11DTDScannerImpl.java

mrglavas Thu, 12 Aug 2004 12:13:34 -0700

mrglavas    2004/08/12 12:13:32

  Modified:    java/src/org/apache/xerces/impl XMLScanner.java
                        XML11DocumentScannerImpl.java
                        XML11DTDScannerImpl.java
  Log:
  Performance: If the scanner stopped on the closing quote


  that means we've scanned the entire attribute value. If

  this value does not contain any TABs, CRs or LFs then it

  would be unchanged by normalization. Now checking

  this common condition to test whether both the normalized

  and non-normalized values are equal.
  
  Revision  Changes    Path
  1.50      +56 -2     xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java
  
  Index: XMLScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java,v
  retrieving revision 1.49
  retrieving revision 1.50
  diff -u -r1.49 -r1.50
  --- XMLScanner.java   12 Aug 2004 18:24:56 -0000      1.49
  +++ XMLScanner.java   12 Aug 2004 19:13:32 -0000      1.50
  @@ -772,9 +772,19 @@
                                  + value.toString() + "\"");
           }
           
  +        int fromIndex = 0;
  +        if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
  +            /** Both the non-normalized and normalized attribute values are equal. 
**/
  +            nonNormalizedValue.setValues(value);
  +            int cquote = fEntityScanner.scanChar();
  +            if (cquote != quote) {
  +                reportFatalError("CloseQuoteExpected", new 
Object[]{eleName,atName});
  +            }
  +            return true;
  +        }
           fStringBuffer2.clear();
           fStringBuffer2.append(value);
  -        normalizeWhitespace(value);
  +        normalizeWhitespace(value, fromIndex);
           if (DEBUG_ATTR_NORMALIZATION) {
               System.out.println("** normalizeWhitespace -> \""
                                  + value.toString() + "\"");
  @@ -1115,6 +1125,50 @@
                   value.ch[i] = ' ';
               }
           }
  +    }
  +    
  +    /**
  +     * Normalize whitespace in an XMLString converting all whitespace
  +     * characters to space characters.
  +     */
  +    protected void normalizeWhitespace(XMLString value, int fromIndex) {
  +        int end = value.offset + value.length;
  +        for (int i = value.offset + fromIndex; i < end; ++i) {
  +            int c = value.ch[i];
  +            // Performance: For XML 1.0 documents take advantage of 
  +            // the fact that the only legal characters below 0x20 
  +            // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've 
  +            // already determined the well-formedness of these
  +            // characters it is sufficient (and safe) to check
  +            // against 0x20. -- mrglavas
  +            if (c < 0x20) {
  +                value.ch[i] = ' ';
  +            }
  +        }
  +    }
  +    
  +    /**
  +     * Checks whether this string would be unchanged by normalization.
  +     * 
  +     * @return -1 if the value would be unchanged by normalization,
  +     * otherwise the index of the first whitespace character which
  +     * would be transformed.
  +     */
  +    protected int isUnchangedByNormalization(XMLString value) {
  +        int end = value.offset + value.length;
  +        for (int i = value.offset; i < end; ++i) {
  +            int c = value.ch[i];
  +            // Performance: For XML 1.0 documents take advantage of 
  +            // the fact that the only legal characters below 0x20 
  +            // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've 
  +            // already determined the well-formedness of these
  +            // characters it is sufficient (and safe) to check
  +            // against 0x20. -- mrglavas
  +            if (c < 0x20) {
  +                return i - value.offset;
  +            }
  +        }
  +        return -1;
       }
   
       //
  
  
  
  1.21      +46 -3     
xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java
  
  Index: XML11DocumentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java,v
  retrieving revision 1.20
  retrieving revision 1.21
  diff -u -r1.20 -r1.21
  --- XML11DocumentScannerImpl.java     12 Aug 2004 18:24:56 -0000      1.20
  +++ XML11DocumentScannerImpl.java     12 Aug 2004 19:13:32 -0000      1.21
  @@ -172,9 +172,20 @@
               System.out.println("** scanLiteral -> \""
                                  + value.toString() + "\"");
           }
  +        
  +        int fromIndex = 0;
  +        if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
  +            /** Both the non-normalized and normalized attribute values are equal. 
**/
  +            nonNormalizedValue.setValues(value);
  +            int cquote = fEntityScanner.scanChar();
  +            if (cquote != quote) {
  +                reportFatalError("CloseQuoteExpected", new 
Object[]{eleName,atName});
  +            }
  +            return true;
  +        }
           fStringBuffer2.clear();
           fStringBuffer2.append(value);
  -        normalizeWhitespace(value);
  +        normalizeWhitespace(value, fromIndex);
           if (DEBUG_ATTR_NORMALIZATION) {
               System.out.println("** normalizeWhitespace -> \""
                                  + value.toString() + "\"");
  @@ -441,12 +452,44 @@
        */
       protected void normalizeWhitespace(XMLString value) {
           int end = value.offset + value.length;
  -         for (int i = value.offset; i < end; i++) {
  +         for (int i = value.offset; i < end; ++i) {
              int c = value.ch[i];
              if (XMLChar.isSpace(c)) {
                  value.ch[i] = ' ';
              }
          }
  +    }
  +    
  +    /**
  +     * Normalize whitespace in an XMLString converting all whitespace
  +     * characters to space characters.
  +     */
  +    protected void normalizeWhitespace(XMLString value, int fromIndex) {
  +        int end = value.offset + value.length;
  +        for (int i = value.offset + fromIndex; i < end; ++i) {
  +            int c = value.ch[i];
  +            if (XMLChar.isSpace(c)) {
  +                value.ch[i] = ' ';
  +            }
  +        }
  +    }
  +    
  +    /**
  +     * Checks whether this string would be unchanged by normalization.
  +     * 
  +     * @return -1 if the value would be unchanged by normalization,
  +     * otherwise the index of the first whitespace character which
  +     * would be transformed.
  +     */
  +    protected int isUnchangedByNormalization(XMLString value) {
  +        int end = value.offset + value.length;
  +        for (int i = value.offset; i < end; ++i) {
  +            int c = value.ch[i];
  +            if (XMLChar.isSpace(c)) {
  +                return i - value.offset;
  +            }
  +        }
  +        return -1;
       }
   
       // returns true if the given character is not
  
  
  
  1.12      +34 -2     
xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java
  
  Index: XML11DTDScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- XML11DTDScannerImpl.java  24 Feb 2004 23:03:46 -0000      1.11
  +++ XML11DTDScannerImpl.java  12 Aug 2004 19:13:32 -0000      1.12
  @@ -161,12 +161,44 @@
        */
       protected void normalizeWhitespace(XMLString value) {
           int end = value.offset + value.length;
  -        for (int i = value.offset; i < end; i++) {
  +        for (int i = value.offset; i < end; ++i) {
               int c = value.ch[i];
               if (XMLChar.isSpace(c)) {
                   value.ch[i] = ' ';
               }
           }
  +    }
  +    
  +    /**
  +     * Normalize whitespace in an XMLString converting all whitespace
  +     * characters to space characters.
  +     */
  +    protected void normalizeWhitespace(XMLString value, int fromIndex) {
  +        int end = value.offset + value.length;
  +        for (int i = value.offset + fromIndex; i < end; ++i) {
  +            int c = value.ch[i];
  +            if (XMLChar.isSpace(c)) {
  +                value.ch[i] = ' ';
  +            }
  +        }
  +    }
  +    
  +    /**
  +     * Checks whether this string would be unchanged by normalization.
  +     * 
  +     * @return -1 if the value would be unchanged by normalization,
  +     * otherwise the index of the first whitespace character which
  +     * would be transformed.
  +     */
  +    protected int isUnchangedByNormalization(XMLString value) {
  +        int end = value.offset + value.length;
  +        for (int i = value.offset; i < end; ++i) {
  +            int c = value.ch[i];
  +            if (XMLChar.isSpace(c)) {
  +                return i - value.offset;
  +            }
  +        }
  +        return -1;
       }
   
       // returns true if the given character is not
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: xml-xerces/java/src/org/apache/xerces/impl XMLScanner.java XML11DocumentScannerImpl.java XML11DTDScannerImpl.java

Reply via email to