andyc       2003/01/31 13:35:23

  Modified:    java/docs releases.xml
               java/src/org/apache/xerces/impl
                        XMLDocumentFragmentScannerImpl.java
                        XMLEntityScanner.java
  Log:
  Updated the scanner implementation to NOT buffer when scanning
  CDATA sections. This enables Xerces to use less memory and be
  able to parse very large CDATA sections which would previously
  cause out-of-memory exceptions.
  
  Reported by:  Jim Layer <[EMAIL PROTECTED]>
  
  Revision  Changes    Path
  1.148     +14 -2     xml-xerces/java/docs/releases.xml
  
  Index: releases.xml
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/docs/releases.xml,v
  retrieving revision 1.147
  retrieving revision 1.148
  diff -u -r1.147 -r1.148
  --- releases.xml      27 Jan 2003 17:06:19 -0000      1.147
  +++ releases.xml      31 Jan 2003 21:35:22 -0000      1.148
  @@ -1,7 +1,19 @@
  -<?xml version='1.0' encoding='UTF-8'?>
  +<?xml version='1.0' encoding='UTF-8'?>
   <!-- $Id$ -->
   <!DOCTYPE releases SYSTEM 'dtd/releases.dtd'>
   <releases>
  +  <release version='&ParserName; TBD'>
  +   <desc>
  +    To be determined...
  +   </desc>
  +   <fix>
  +    <note>
  +     Fixed scanner implementation to be able to handle large CDATA sections
  +     without buffering.
  +    </note>
  +    <submitter name='Andy Clark'/>
  +   </fix>
  +  </release>
     <release version="&ParserName; 2.3.0">
       <desc>
           With this release, the Xerces-J developers are declaring the Xerces
  
  
  
  1.27      +2 -2      
xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java
  
  Index: XMLDocumentFragmentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java,v
  retrieving revision 1.26
  retrieving revision 1.27
  diff -u -r1.26 -r1.27
  --- XMLDocumentFragmentScannerImpl.java       7 Dec 2002 00:15:58 -0000       1.26
  +++ XMLDocumentFragmentScannerImpl.java       31 Jan 2003 21:35:22 -0000      1.27
  @@ -963,7 +963,7 @@
   
           while (true) {
               fStringBuffer.clear();
  -            if (!fEntityScanner.scanData("]]", fStringBuffer)) {
  +            if (fEntityScanner.scanData("]]", fStringBuffer)) {
                   if (fDocumentHandler != null && fStringBuffer.length > 0) {
                       fDocumentHandler.characters(fStringBuffer, null);
                   }
  
  
  
  1.10      +140 -131  xml-xerces/java/src/org/apache/xerces/impl/XMLEntityScanner.java
  
  Index: XMLEntityScanner.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLEntityScanner.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- XMLEntityScanner.java     16 Dec 2002 01:26:19 -0000      1.9
  +++ XMLEntityScanner.java     31 Jan 2003 21:35:22 -0000      1.10
  @@ -890,167 +890,176 @@
       public boolean scanData(String delimiter, XMLStringBuffer buffer)
           throws IOException {
   
  -        boolean done = false;
  +        // REVISIT: This method does not need to use a string buffer.
  +        //          The change would avoid the array copies and increase
  +        //          performance. -Ac
  +        //
  +        //          Currently, this method is only called for scanning 
  +        //          CDATA sections and processing instruction data. So 
  +        //          if this code is updated to NOT buffer, the scanning
  +        //          code for processing instructions will need to be
  +        //          updated to do its own buffering. The code for CDATA
  +        //          sections is safe as-is. -Ac
  +
  +        boolean found = false;
           int delimLen = delimiter.length();
           char charAt0 = delimiter.charAt(0);
           boolean external = fCurrentEntity.isExternal();
  -        do {
  +        if (DEBUG_BUFFER) {
  +            System.out.print("(scanData: ");
  +            XMLEntityManager.print(fCurrentEntity);
  +            System.out.println();
  +        }
  +
  +        // load more characters, if needed
  +
  +        if (fCurrentEntity.position == fCurrentEntity.count) {
  +            load(0, true);
  +        }
  +
  +        boolean bNextEntity = false;
  +
  +        while ((fCurrentEntity.position >= fCurrentEntity.count - delimLen)
  +            && (!bNextEntity))
  +        {
  +          System.arraycopy(fCurrentEntity.ch,
  +                           fCurrentEntity.position,
  +                           fCurrentEntity.ch,
  +                           0,
  +                           fCurrentEntity.count - fCurrentEntity.position);
  +
  +          bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false);
  +          fCurrentEntity.position = 0;
  +        }
  +
  +        if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
  +            // something must be wrong with the input:  e.g., file ends  an 
unterminated comment
  +            int length = fCurrentEntity.count - fCurrentEntity.position;
  +            buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); 
  +            fCurrentEntity.columnNumber += fCurrentEntity.count;
  +            fCurrentEntity.position = fCurrentEntity.count;
  +            load(0,true);
  +            return false;
  +        }
  +
  +        // normalize newlines
  +        int offset = fCurrentEntity.position;
  +        int c = fCurrentEntity.ch[offset];
  +        int newlines = 0;
  +        if (c == '\n' || (c == '\r' && external)) {
               if (DEBUG_BUFFER) {
  -                System.out.print("(scanData: ");
  +                System.out.print("[newline, "+offset+", 
"+fCurrentEntity.position+": ");
                   XMLEntityManager.print(fCurrentEntity);
                   System.out.println();
               }
  -
  -            // load more characters, if needed
  -
  -            if (fCurrentEntity.position == fCurrentEntity.count) {
  -                load(0, true);
  -            }
  -
  -            boolean bNextEntity = false;
  -
  -            while ((fCurrentEntity.position >= fCurrentEntity.count - delimLen)
  -                && (!bNextEntity))
  -            {
  -              System.arraycopy(fCurrentEntity.ch,
  -                               fCurrentEntity.position,
  -                               fCurrentEntity.ch,
  -                               0,
  -                               fCurrentEntity.count - fCurrentEntity.position);
  -
  -              bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, 
false);
  -              fCurrentEntity.position = 0;
  -            }
  -
  -            if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
  -                // something must be wrong with the input:  e.g., file ends  an 
unterminated comment
  -                int length = fCurrentEntity.count - fCurrentEntity.position;
  -                buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); 
  -                fCurrentEntity.columnNumber += fCurrentEntity.count;
  -                fCurrentEntity.position = fCurrentEntity.count;
  -                load(0,true);
  -                return false;
  -            }
  -
  -            // normalize newlines
  -            int offset = fCurrentEntity.position;
  -            int c = fCurrentEntity.ch[offset];
  -            int newlines = 0;
  -            if (c == '\n' || (c == '\r' && external)) {
  -                if (DEBUG_BUFFER) {
  -                    System.out.print("[newline, "+offset+", 
"+fCurrentEntity.position+": ");
  -                    XMLEntityManager.print(fCurrentEntity);
  -                    System.out.println();
  -                }
  -                do {
  -                    c = fCurrentEntity.ch[fCurrentEntity.position++];
  -                    if (c == '\r' && external) {
  -                        newlines++;
  -                        fCurrentEntity.lineNumber++;
  -                        fCurrentEntity.columnNumber = 1;
  -                        if (fCurrentEntity.position == fCurrentEntity.count) {
  -                            offset = 0;
  -                            fCurrentEntity.position = newlines;
  -                            if (load(newlines, false)) {
  -                                break;
  -                            }
  -                        }
  -                        if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  -                            fCurrentEntity.position++;
  -                            offset++;
  -                        }
  -                        /*** NEWLINE NORMALIZATION ***/
  -                        else {
  -                            newlines++;
  +            do {
  +                c = fCurrentEntity.ch[fCurrentEntity.position++];
  +                if (c == '\r' && external) {
  +                    newlines++;
  +                    fCurrentEntity.lineNumber++;
  +                    fCurrentEntity.columnNumber = 1;
  +                    if (fCurrentEntity.position == fCurrentEntity.count) {
  +                        offset = 0;
  +                        fCurrentEntity.position = newlines;
  +                        if (load(newlines, false)) {
  +                            break;
                           }
                       }
  -                    else if (c == '\n') {
  -                        newlines++;
  -                        fCurrentEntity.lineNumber++;
  -                        fCurrentEntity.columnNumber = 1;
  -                        if (fCurrentEntity.position == fCurrentEntity.count) {
  -                            offset = 0;
  -                            fCurrentEntity.position = newlines;
  -                            fCurrentEntity.count = newlines;
  -                            if (load(newlines, false)) {
  -                                break;
  -                            }
  -                        }
  +                    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
  +                        fCurrentEntity.position++;
  +                        offset++;
                       }
  +                    /*** NEWLINE NORMALIZATION ***/
                       else {
  -                        fCurrentEntity.position--;
  -                        break;
  +                        newlines++;
                       }
  -                } while (fCurrentEntity.position < fCurrentEntity.count - 1);
  -                for (int i = offset; i < fCurrentEntity.position; i++) {
  -                    fCurrentEntity.ch[i] = '\n';
                   }
  -                int length = fCurrentEntity.position - offset;
  -                if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  -                    buffer.append(fCurrentEntity.ch, offset, length);
  -                    if (DEBUG_BUFFER) {
  -                        System.out.print("]newline, "+offset+", 
"+fCurrentEntity.position+": ");
  -                        XMLEntityManager.print(fCurrentEntity);
  -                        System.out.println();
  +                else if (c == '\n') {
  +                    newlines++;
  +                    fCurrentEntity.lineNumber++;
  +                    fCurrentEntity.columnNumber = 1;
  +                    if (fCurrentEntity.position == fCurrentEntity.count) {
  +                        offset = 0;
  +                        fCurrentEntity.position = newlines;
  +                        fCurrentEntity.count = newlines;
  +                        if (load(newlines, false)) {
  +                            break;
  +                        }
                       }
  -                    return true;
                   }
  +                else {
  +                    fCurrentEntity.position--;
  +                    break;
  +                }
  +            } while (fCurrentEntity.position < fCurrentEntity.count - 1);
  +            for (int i = offset; i < fCurrentEntity.position; i++) {
  +                fCurrentEntity.ch[i] = '\n';
  +            }
  +            int length = fCurrentEntity.position - offset;
  +            if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  +                buffer.append(fCurrentEntity.ch, offset, length);
                   if (DEBUG_BUFFER) {
                       System.out.print("]newline, "+offset+", 
"+fCurrentEntity.position+": ");
                       XMLEntityManager.print(fCurrentEntity);
                       System.out.println();
                   }
  +                return true;
  +            }
  +            if (DEBUG_BUFFER) {
  +                System.out.print("]newline, "+offset+", 
"+fCurrentEntity.position+": ");
  +                XMLEntityManager.print(fCurrentEntity);
  +                System.out.println();
               }
  +        }
   
  -            // iterate over buffer looking for delimiter
  -            OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
  -                c = fCurrentEntity.ch[fCurrentEntity.position++];
  -                if (c == charAt0) {
  -                    // looks like we just hit the delimiter
  -                    int delimOffset = fCurrentEntity.position - 1;
  -                    for (int i = 1; i < delimLen; i++) {
  -                        if (fCurrentEntity.position == fCurrentEntity.count) {
  -                            fCurrentEntity.position -= i;
  -                            break OUTER;
  -                        }
  -                        c = fCurrentEntity.ch[fCurrentEntity.position++];
  -                        if (delimiter.charAt(i) != c) {
  -                            fCurrentEntity.position--;
  -                            break;
  -                        }
  +        // iterate over buffer looking for delimiter
  +        OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
  +            c = fCurrentEntity.ch[fCurrentEntity.position++];
  +            if (c == charAt0) {
  +                // looks like we just hit the delimiter
  +                int delimOffset = fCurrentEntity.position - 1;
  +                for (int i = 1; i < delimLen; i++) {
  +                    if (fCurrentEntity.position == fCurrentEntity.count) {
  +                        fCurrentEntity.position -= i;
  +                        break OUTER;
                       }
  -                    if (fCurrentEntity.position == delimOffset + delimLen) {
  -                        done = true;
  +                    c = fCurrentEntity.ch[fCurrentEntity.position++];
  +                    if (delimiter.charAt(i) != c) {
  +                        fCurrentEntity.position--;
                           break;
                       }
                   }
  -                else if (c == '\n' || (external && c == '\r')) {
  -                    fCurrentEntity.position--;
  +                if (fCurrentEntity.position == delimOffset + delimLen) {
  +                    found = true;
                       break;
                   }
  -                else if (XMLChar.isInvalid(c)) {
  -                    fCurrentEntity.position--;
  -                    int length = fCurrentEntity.position - offset;
  -                    fCurrentEntity.columnNumber += length - newlines;
  -                    buffer.append(fCurrentEntity.ch, offset, length); 
  -                    return true;
  -                }
               }
  -            int length = fCurrentEntity.position - offset;
  -            fCurrentEntity.columnNumber += length - newlines;
  -            if (done) {
  -                length -= delimLen;
  -            }
  -            buffer.append (fCurrentEntity.ch, offset, length);
  -
  -            // return true if string was skipped
  -            if (DEBUG_BUFFER) {
  -                System.out.print(")scanData: ");
  -                XMLEntityManager.print(fCurrentEntity);
  -                System.out.println(" -> " + done);
  +            else if (c == '\n' || (external && c == '\r')) {
  +                fCurrentEntity.position--;
  +                break;
               }
  -        } while (!done);
  -        return !done;
  +            else if (XMLChar.isInvalid(c)) {
  +                fCurrentEntity.position--;
  +                int length = fCurrentEntity.position - offset;
  +                fCurrentEntity.columnNumber += length - newlines;
  +                buffer.append(fCurrentEntity.ch, offset, length); 
  +                return true;
  +            }
  +        }
  +        int length = fCurrentEntity.position - offset;
  +        fCurrentEntity.columnNumber += length - newlines;
  +        if (found) {
  +            length -= delimLen;
  +        }
  +        buffer.append (fCurrentEntity.ch, offset, length);
  +
  +        // return true if string was skipped
  +        if (DEBUG_BUFFER) {
  +            System.out.print(")scanData: ");
  +            XMLEntityManager.print(fCurrentEntity);
  +            System.out.println(" -> " + found);
  +        }
  +        return found;
   
       } // scanData(String,XMLString)
   
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to