mrglavas    2004/01/26 11:37:35

  Modified:    java/src/org/apache/xerces/impl XML11DTDScannerImpl.java
                        XMLScanner.java XML11DocumentScannerImpl.java
  Log:
  Improve performance of attribute value normalization for XML 1.0
  documents.
  
  Check if each character is < 0x20 instead of calling XMLChar.isSpace.
  This simple check against 0x20 is safe because at the point we 
  normalize the attribute value we've determined that it is well formed, 
  and therefore if we're looking at anything less then 0x20 it must be 
  TAB, LF or CR. This would avoid a method call and table lookup and 
  allow us to skip over 0x20 which doesn't require further normalization.
  
  Since this doesn't work for XML 1.1 docs since control chars are
  allowed in them add the old method to the 1.1 scanners.
  
  Revision  Changes    Path
  1.10      +15 -1     
xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java
  
  Index: XML11DTDScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- XML11DTDScannerImpl.java  5 Jan 2004 21:54:05 -0000       1.9
  +++ XML11DTDScannerImpl.java  26 Jan 2004 19:37:35 -0000      1.10
  @@ -195,6 +195,20 @@
           }
           return dataok;
      }
  +   
  +    /**
  +     * Normalize whitespace in an XMLString converting all whitespace
  +     * characters to space characters.
  +     */
  +    protected void normalizeWhitespace(XMLString value) {
  +        int end = value.offset + value.length;
  +        for (int i = value.offset; i < end; i++) {
  +            int c = value.ch[i];
  +            if (XMLChar.isSpace(c)) {
  +                value.ch[i] = ' ';
  +            }
  +        }
  +    }
   
       // returns true if the given character is not
       // valid with respect to the version of
  
  
  
  1.44      +8 -2      xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java
  
  Index: XMLScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java,v
  retrieving revision 1.43
  retrieving revision 1.44
  diff -u -r1.43 -r1.44
  --- XMLScanner.java   26 Jan 2004 17:28:12 -0000      1.43
  +++ XMLScanner.java   26 Jan 2004 19:37:35 -0000      1.44
  @@ -1143,7 +1143,13 @@
           int end = value.offset + value.length;
           for (int i = value.offset; i < end; i++) {
               int c = value.ch[i];
  -            if (XMLChar.isSpace(c)) {
  +            // Performance: For XML 1.0 documents take advantage of 
  +            // the fact that the only legal characters below 0x20 
  +            // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've 
  +            // already determined the well-formedness of these
  +            // characters it is sufficient (and safe) to check
  +            // against 0x20. -- mrglavas
  +            if (c < 0x20) {
                   value.ch[i] = ' ';
               }
           }
  
  
  
  1.16      +15 -1     
xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java
  
  Index: XML11DocumentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -r1.15 -r1.16
  --- XML11DocumentScannerImpl.java     5 Jan 2004 21:54:05 -0000       1.15
  +++ XML11DocumentScannerImpl.java     26 Jan 2004 19:37:35 -0000      1.16
  @@ -475,6 +475,20 @@
           }
           return dataok;
      }
  +   
  +    /**
  +     * Normalize whitespace in an XMLString converting all whitespace
  +     * characters to space characters.
  +     */
  +    protected void normalizeWhitespace(XMLString value) {
  +        int end = value.offset + value.length;
  +         for (int i = value.offset; i < end; i++) {
  +           int c = value.ch[i];
  +           if (XMLChar.isSpace(c)) {
  +               value.ch[i] = ' ';
  +           }
  +       }
  +    }
   
       // returns true if the given character is not
       // valid with respect to the version of
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to