impl XML11EntityScanner.java XMLScanner.java XMLEntityScanner.java XMLVersionDetector.java XML11DocumentScannerImpl.java

mrglavas Thu, 13 Nov 2003 11:50:07 -0800

mrglavas    2003/11/13 10:46:00

  Modified:    java/src/org/apache/xerces/util XML11Char.java
               java/src/org/apache/xerces/impl XML11EntityScanner.java
                        XMLScanner.java XMLEntityScanner.java
                        XMLVersionDetector.java
                        XML11DocumentScannerImpl.java
  Log:
  Made necessary changes to align our XML 1.1 implementation with
  the PR for XML 1.1: http://www.w3.org/TR/2003/PR-xml11-20031105/
  
  The PR for XML 1.1 introduced the following changes: removal of some
  characters from NameChar, allowing control characters to appear as
  literals in internal entities, and disallowing NEL and LSEP from appearing
  in XMLDecls or TextDecls.
  
  Updated productions in XML11Char for NameChar.
  
  Added new method called isInternalEntityContent to XML11Char which
  returns whether or not a character can be interpreted as content in an 
  internal entity.
  
  Modified XML11EntityScanner so that it will accept control characters as
  literals in internal entities.
  
  Added a new method to XMLEntityScanner called skipDeclSpaces which
  only skips characters that would match non-terminal S before end of line
  normalization.
  
  Modified XMLVersionDetector so that it always uses the XML 1.0 scanner.
  Skips spaces using new skipDeclSpaces method.
  
  Modified XMLScanner so that it skips spaces in an XMLDecl or 
  TextDecl using new skipDeclSpaces method. This prevents NEL and
  LSEP from being matched as S in XML 1.1 entities.
  
  Removed scanXMLDeclorTextDecl method in XML11DocumentScannerImpl
  which had bugs and furthermore was never called by the parser.
  
  Revision  Changes    Path
  1.3       +39 -17    xml-xerces/java/src/org/apache/xerces/util/XML11Char.java
  
  Index: XML11Char.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/XML11Char.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- XML11Char.java    10 Apr 2003 19:54:12 -0000      1.2
  +++ XML11Char.java    13 Nov 2003 18:45:59 -0000      1.3
  @@ -101,7 +101,7 @@
       /** XML 1.1 control character mask */
       public static final int MASK_XML11_CONTROL = 0x10;
   
  -    /** XML 1.1 content (valid - "special" chars) */
  +    /** XML 1.1 content for external entities (valid - "special" chars - control 
chars) */
       public static final int MASK_XML11_CONTENT = 0x20;
   
       /** XML namespaces 1.1 NCNameStart */
  @@ -109,6 +109,9 @@
   
       /** XML namespaces 1.1 NCName */
       public static final int MASK_XML11_NCNAME = 0x80;
  +    
  +    /** XML 1.1 content for internal entities (valid - "special" chars) */
  +    public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | 
MASK_XML11_CONTENT; 
   
       //
       // Static initialization
  @@ -120,11 +123,13 @@
            * XML 1.1 initialization.
            */
   
  -        // [2]: Char    ::=    #x9 | #xA | #xD | [#x20-#x7E] | #x85 | [#xA0-#xD7FF]
  -        //              | [#xE000-#xFFFD] | [#x10000-#x10FFFF] 
  +        // [2]: Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
  +        // 
  +        // NOTE: This range is Char - (RestrictedChar | S | #x85 | #x2028).
           int xml11NonWhitespaceRange  [] = {
                   0x21, 0x7E, 0xA0, 0x2027, 0x2029, 0xD7FF, 0xE000, 0xFFFD, 
           };
  +
           // NOTE:  this does *NOT* correspond to the S production
           // from XML 1.0.  Rather, it corresponds to S+chars that are
           // involved in whitespace normalization.  It's handy
  @@ -133,23 +138,28 @@
           int xml11WhitespaceChars [] = {
               0x9, 0xA, 0xD, 0x20, 0x85, 0x2028,
           };
  +        
  +        // [2a]: RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | 
  +        //                          [#x7F-#x84] | [#x86-#x9F] 
           int xml11ControlCharRange [] = {
               0x1, 0x8, 0xB, 0xC, 0xE, 0x1F, 0x7f, 0x84, 0x86, 0x9f,
           };
  -
  -        // [4]: NameStartChar := ":" | [A-Z] | "_" | [a-z] |
  -        //         [#xC0-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] |
  -        //         [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
  -        //          [#x3001-#xD7FF] 
  -        //           | ([#xF900-#xEFFFF])??? [#xF900-#xFFFD
  +        
  +        // [4]: NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | 
  +        //                        [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | 
  +        //                        [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] 
| 
  +        //                        [#x2070-#x218F] | [#x2C00-#x2FEF] | 
[#x3001-#xD7FF] | 
  +        //                        [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | 
[#x10000-#xEFFFF]
           int xml11NameStartCharRange [] = {
  -            ':', ':', 'A', 'Z', '_', '_',
  -            'a', 'z', 0xC0, 0x2FF, 0x370, 0x37D, 0x37F, 0x1FFF,
  -            0x200C, 0x200D, 0x2070, 0x218F, 0x2C00, 0x2FEF, 
  -            0x3001, 0xD7FF, 0xF900, 0xFFFD,
  +            ':', ':', 'A', 'Z', '_', '_', 'a', 'z', 
  +            0xC0, 0xD6, 0xD8, 0xF6, 0xF8, 0x2FF,
  +            0x370, 0x37D, 0x37F, 0x1FFF, 0x200C, 0x200D,
  +            0x2070, 0x218F, 0x2C00, 0x2FEF, 0x3001, 0xD7FF,
  +            0xF900, 0xFDCF, 0xFDF0, 0xFFFD,
           };
  -        // [4a]:  NameChar := NameStartChar | "-" | "." | [0-9] | #xB7 |
  -        //      [#x0300-#x036F] | [#x203F-#x2040] 
  +        
  +        // [4a]:  NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | 
  +        //                     [#x0300-#x036F] | [#x203F-#x2040] 
           int xml11NameCharRange [] = {
               '-', '-', '.', '.', '0', '9', 0xB7, 0xB7, 
               0x0300, 0x036F, 0x203F, 0x2040,
  @@ -249,7 +259,8 @@
       } // isXML11ValidLiteral(int):boolean
   
       /**
  -     * Returns true if the specified character can be considered content.
  +     * Returns true if the specified character can be considered 
  +     * content in an external parsed entity.
        *
        * @param c The character to check.
        */
  @@ -257,6 +268,17 @@
           return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) ||
                  (0x10000 <= c && c <= 0x10FFFF);
       } // isXML11Content(int):boolean
  +    
  +    /**
  +     * Returns true if the specified character can be considered 
  +     * content in an internal parsed entity.
  +     *
  +     * @param c The character to check.
  +     */
  +    public static boolean isXML11InternalEntityContent(int c) {
  +        return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) 
||
  +               (0x10000 <= c && c <= 0x10FFFF);
  +    } // isXML11InternalEntityContent(int):boolean
   
       /**
        * Returns true if the specified character is a valid name start
  
  
  
  1.9       +104 -40   
xml-xerces/java/src/org/apache/xerces/impl/XML11EntityScanner.java
  
  Index: XML11EntityScanner.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11EntityScanner.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- XML11EntityScanner.java   7 Nov 2003 21:00:27 -0000       1.8
  +++ XML11EntityScanner.java   13 Nov 2003 18:45:59 -0000      1.9
  @@ -70,6 +70,7 @@
    * Implements the entity scanner methods in
    * the context of XML 1.1.
    *
  + * @author Michael Glavassevich, IBM
    * @author Neil Graham, IBM
    * @version $Id$
    */
  @@ -552,11 +553,23 @@
           }
   
           // inner loop, scanning for content
  -        while (fCurrentEntity.position < fCurrentEntity.count) {
  -            c = fCurrentEntity.ch[fCurrentEntity.position++];
  -            if (!XML11Char.isXML11Content(c) || ((c == 0x85 || c == 0x2028) && 
external)) {
  -                fCurrentEntity.position--;
  -                break;
  +        if (external) {
  +            while (fCurrentEntity.position < fCurrentEntity.count) {
  +                c = fCurrentEntity.ch[fCurrentEntity.position++];
  +                if (!XML11Char.isXML11Content(c) || c == 0x85 || c == 0x2028) {
  +                    fCurrentEntity.position--;
  +                    break;
  +                }
  +            }
  +        }
  +        else {
  +            while (fCurrentEntity.position < fCurrentEntity.count) {
  +                c = fCurrentEntity.ch[fCurrentEntity.position++];
  +                // In internal entities control characters are allowed to appear 
unescaped.
  +                if (!XML11Char.isXML11InternalEntityContent(c)) {
  +                    fCurrentEntity.position--;
  +                    break;
  +                }
               }
           }
           int length = fCurrentEntity.position - offset;
  @@ -677,14 +690,25 @@
           }
   
           // scan literal value
  -        while (fCurrentEntity.position < fCurrentEntity.count) {
  -            c = fCurrentEntity.ch[fCurrentEntity.position++];
  -            if ((c == quote &&
  -                 (!fCurrentEntity.literal || external))
  -                || c == '%' || !XML11Char.isXML11Content(c) 
  -                || ((c == 0x85 || c == 0x2028) && external)) {
  -                fCurrentEntity.position--;
  -                break;
  +        if (external) {
  +            while (fCurrentEntity.position < fCurrentEntity.count) {
  +                c = fCurrentEntity.ch[fCurrentEntity.position++];
  +                if (c == quote || c == '%' || !XML11Char.isXML11Content(c) 
  +                    || c == 0x85 || c == 0x2028) {
  +                    fCurrentEntity.position--;
  +                    break;
  +                }
  +            }
  +        }
  +        else {
  +            while (fCurrentEntity.position < fCurrentEntity.count) {
  +                c = fCurrentEntity.ch[fCurrentEntity.position++];
  +                // In internal entities control characters are allowed to appear 
unescaped.
  +                if ((c == quote && !fCurrentEntity.literal)
  +                    || c == '%' || !XML11Char.isXML11InternalEntityContent(c)) {
  +                    fCurrentEntity.position--;
  +                    break;
  +                }
               }
           }
           int length = fCurrentEntity.position - offset;
  @@ -833,38 +857,78 @@
               }
   
               // iterate over buffer looking for delimiter
  -            OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
  -                c = fCurrentEntity.ch[fCurrentEntity.position++];
  -                if (c == charAt0) {
  -                    // looks like we just hit the delimiter
  -                    int delimOffset = fCurrentEntity.position - 1;
  -                    for (int i = 1; i < delimLen; i++) {
  -                        if (fCurrentEntity.position == fCurrentEntity.count) {
  -                            fCurrentEntity.position -= i;
  -                            break OUTER;
  +            if (external) {
  +                OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
  +                    c = fCurrentEntity.ch[fCurrentEntity.position++];
  +                    if (c == charAt0) {
  +                        // looks like we just hit the delimiter
  +                        int delimOffset = fCurrentEntity.position - 1;
  +                        for (int i = 1; i < delimLen; i++) {
  +                            if (fCurrentEntity.position == fCurrentEntity.count) {
  +                                fCurrentEntity.position -= i;
  +                                break OUTER;
  +                            }
  +                            c = fCurrentEntity.ch[fCurrentEntity.position++];
  +                            if (delimiter.charAt(i) != c) {
  +                                fCurrentEntity.position--;
  +                                break;
  +                            }
  +                         }
  +                         if (fCurrentEntity.position == delimOffset + delimLen) {
  +                            done = true;
  +                            break;
  +                         }
  +                    }
  +                    else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
  +                        fCurrentEntity.position--;
  +                        break;
  +                    }
  +                    // In external entities control characters cannot appear 
  +                    // as literals so do not skip over them.
  +                    else if (!XML11Char.isXML11ValidLiteral(c)) {
  +                        fCurrentEntity.position--;
  +                        int length = fCurrentEntity.position - offset;
  +                        fCurrentEntity.columnNumber += length - newlines;
  +                        buffer.append(fCurrentEntity.ch, offset, length); 
  +                        return true;
  +                    }
  +                }
  +            }
  +            else {
  +                OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
  +                    c = fCurrentEntity.ch[fCurrentEntity.position++];
  +                    if (c == charAt0) {
  +                        // looks like we just hit the delimiter
  +                        int delimOffset = fCurrentEntity.position - 1;
  +                        for (int i = 1; i < delimLen; i++) {
  +                            if (fCurrentEntity.position == fCurrentEntity.count) {
  +                                fCurrentEntity.position -= i;
  +                                break OUTER;
  +                            }
  +                            c = fCurrentEntity.ch[fCurrentEntity.position++];
  +                            if (delimiter.charAt(i) != c) {
  +                                fCurrentEntity.position--;
  +                                break;
  +                            }
                           }
  -                        c = fCurrentEntity.ch[fCurrentEntity.position++];
  -                        if (delimiter.charAt(i) != c) {
  -                            fCurrentEntity.position--;
  +                        if (fCurrentEntity.position == delimOffset + delimLen) {
  +                            done = true;
                               break;
                           }
                       }
  -                    if (fCurrentEntity.position == delimOffset + delimLen) {
  -                        done = true;
  +                    else if (c == '\n') {
  +                        fCurrentEntity.position--;
                           break;
                       }
  -                }
  -                else if (c == '\n' || (external && (c == '\r' || c == 0x85 || c == 
0x2028))) {
  -                    fCurrentEntity.position--;
  -                    break;
  -                }
  -                // note that we should not skip over control characters!
  -                else if (!XML11Char.isXML11ValidLiteral(c)) {
  -                    fCurrentEntity.position--;
  -                    int length = fCurrentEntity.position - offset;
  -                    fCurrentEntity.columnNumber += length - newlines;
  -                    buffer.append(fCurrentEntity.ch, offset, length); 
  -                    return true;
  +                    // Control characters are allowed to appear as literals
  +                    // in internal entities.
  +                    else if (!XML11Char.isXML11Valid(c)) {
  +                        fCurrentEntity.position--;
  +                        int length = fCurrentEntity.position - offset;
  +                        fCurrentEntity.columnNumber += length - newlines;
  +                        buffer.append(fCurrentEntity.ch, offset, length); 
  +                        return true;
  +                    }
                   }
               }
               int length = fCurrentEntity.position - offset;
  
  
  
  1.38      +20 -5     xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java
  
  Index: XMLScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java,v
  retrieving revision 1.37
  retrieving revision 1.38
  diff -u -r1.37 -r1.38
  --- XMLScanner.java   7 Nov 2003 00:26:17 -0000       1.37
  +++ XMLScanner.java   13 Nov 2003 18:45:59 -0000      1.38
  @@ -404,7 +404,7 @@
           int state = STATE_VERSION;
   
           boolean dataFoundForTarget = false;
  -        boolean sawSpace = fEntityScanner.skipSpaces();
  +        boolean sawSpace = fEntityScanner.skipDeclSpaces();
           // since pseudoattributes are *not* attributes,
           // their quotes don't need to be preserved in external parameter entities.
           // the XMLEntityScanner#scanLiteral method will continue to
  @@ -507,7 +507,7 @@
                       reportFatalError("NoMorePseudoAttributes", null);
                   }
               }
  -            sawSpace = fEntityScanner.skipSpaces();
  +            sawSpace = fEntityScanner.skipDeclSpaces();
           }
           // restore original literal value
           if(currLiteral) 
  @@ -565,17 +565,29 @@
                                         XMLString value) 
           throws IOException, XNIException {
   
  +        // REVISIT: This method is used for generic scanning of 
  +        // pseudo attributes, but since there are only three such
  +        // attributes: version, encoding, and standalone there are
  +        // for performant ways of scanning them. Every decl must
  +        // have a version, and in TextDecls this version must
  +        // be followed by an encoding declaration. Also the
  +        // methods we invoke on the scanners allow non-ASCII
  +        // characters to be parsed in the decls, but since
  +        // we don't even know what the actual encoding of the
  +        // document is until we scan the encoding declaration
  +        // you cannot reliably read any characters outside
  +        // of the ASCII range here. -- mrglavas
           String name = fEntityScanner.scanName();
           XMLEntityManager.print(fEntityManager.getCurrentEntity());
           if (name == null) {
               reportFatalError("PseudoAttrNameExpected", null);
           }
  -        fEntityScanner.skipSpaces();
  +        fEntityScanner.skipDeclSpaces();
           if (!fEntityScanner.skipChar('=')) {
               reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
                                : "EqRequiredInXMLDecl", new Object[]{name});
           }
  -        fEntityScanner.skipSpaces();
  +        fEntityScanner.skipDeclSpaces();
           int quote = fEntityScanner.peekChar();
           if (quote != '\'' && quote != '"') {
               reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl"
  @@ -591,6 +603,9 @@
                       if (c == '&' || c == '%' || c == '<' || c == ']') {
                           fStringBuffer2.append((char)fEntityScanner.scanChar());
                       }
  +                    // REVISIT: Even if you could reliably read non-ASCII chars
  +                    // why bother scanning for surrogates here? Only ASCII chars
  +                    // match the productions in XMLDecls and TextDecls. -- mrglavas
                       else if (XMLChar.isHighSurrogate(c)) {
                           scanSurrogates(fStringBuffer2);
                       }
  
  
  
  1.19      +92 -1     xml-xerces/java/src/org/apache/xerces/impl/XMLEntityScanner.java
  
  Index: XMLEntityScanner.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLEntityScanner.java,v
  retrieving revision 1.18
  retrieving revision 1.19
  diff -u -r1.18 -r1.19
  --- XMLEntityScanner.java     7 Nov 2003 21:00:27 -0000       1.18
  +++ XMLEntityScanner.java     13 Nov 2003 18:45:59 -0000      1.19
  @@ -1322,6 +1322,97 @@
       } // skipSpaces():boolean
   
       /**
  +     * Skips space characters appearing immediately on the input that would
  +     * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line 
  +     * normalization is performed. This is useful when scanning structures 
  +     * such as the XMLDecl and TextDecl that can only contain US-ASCII 
  +     * characters.
  +     * <p>
  +     * <strong>Note:</strong> The characters are consumed only if they would
  +     * match non-terminal S before end of line normalization is performed.
  +     *
  +     * @return Returns true if at least one space character was skipped.
  +     *
  +     * @throws IOException  Thrown if i/o error occurs.
  +     * @throws EOFException Thrown on end of file.
  +     *
  +     * @see org.apache.xerces.util.XMLChar#isSpace
  +     */
  +    public boolean skipDeclSpaces() throws IOException {
  +        if (DEBUG_BUFFER) {
  +            System.out.print("(skipDeclSpaces: ");
  +            XMLEntityManager.print(fCurrentEntity);
  +            System.out.println();
  +        }
  +
  +        // load more characters, if needed
  +        if (fCurrentEntity.position == fCurrentEntity.count) {
  +            load(0, true);
  +        }
  +
  +        // skip spaces
  +        int c = fCurrentEntity.ch[fCurrentEntity.position];
  +        if (XMLChar.isSpace(c)) {
  +            boolean external = fCurrentEntity.isExternal();
  +            do {
  +                boolean entityChanged = false;
  +                // handle newlines
  +                if (c == '\n' || (external && c == '\r')) {
  +                    fCurrentEntity.lineNumber++;
  +                    fCurrentEntity.columnNumber = 1;
  +                    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
  +                        fCurrentEntity.ch[0] = (char)c;
  +                        entityChanged = load(1, true);
  +                        if (!entityChanged)
  +                            // the load change the position to be 1,
  +                            // need to restore it when entity not changed
  +                            fCurrentEntity.position = 0;
  +                    }
  +                    if (c == '\r' && external) {
  +                        // REVISIT: Does this need to be updated to fix the
  +                        //          #x0D ^#x0A newline normalization problem? -Ac
  +                        if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
  +                            fCurrentEntity.position--;
  +                        }
  +                    }
  +                    /*** NEWLINE NORMALIZATION ***
  +                    else {
  +                        if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
  +                            && external) {
  +                            fCurrentEntity.position++;
  +                        }
  +                    }
  +                    /***/
  +                }
  +                else {
  +                    fCurrentEntity.columnNumber++;
  +                }
  +                // load more characters, if needed
  +                if (!entityChanged)
  +                    fCurrentEntity.position++;
  +                if (fCurrentEntity.position == fCurrentEntity.count) {
  +                    load(0, true);
  +                }
  +            } while (XMLChar.isSpace(c = 
fCurrentEntity.ch[fCurrentEntity.position]));
  +            if (DEBUG_BUFFER) {
  +                System.out.print(")skipDeclSpaces: ");
  +                XMLEntityManager.print(fCurrentEntity);
  +                System.out.println(" -> true");
  +            }
  +            return true;
  +        }
  +
  +        // no spaces were found
  +        if (DEBUG_BUFFER) {
  +            System.out.print(")skipDeclSpaces: ");
  +            XMLEntityManager.print(fCurrentEntity);
  +            System.out.println(" -> false");
  +        }
  +        return false;
  +
  +    } // skipDeclSpaces():boolean
  +
  +    /**
        * Skips the specified string appearing immediately on the input.
        * <p>
        * <strong>Note:</strong> The characters are consumed only if they are
  
  
  
  1.10      +7 -7      
xml-xerces/java/src/org/apache/xerces/impl/XMLVersionDetector.java
  
  Index: XMLVersionDetector.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLVersionDetector.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- XMLVersionDetector.java   2 Nov 2003 12:25:33 -0000       1.9
  +++ XMLVersionDetector.java   13 Nov 2003 18:45:59 -0000      1.10
  @@ -183,16 +183,16 @@
       public short determineDocVersion(XMLInputSource inputSource) throws IOException 
{
           fEncoding = fEntityManager.setupCurrentEntity(fXMLSymbol, inputSource, 
false, true);
   
  -        // must assume 1.1 at this stage so that whitespace
  -        // handling is correct in the XML decl...
  -        fEntityManager.setScannerVersion(Constants.XML_VERSION_1_1);
  +        // Must use XML 1.0 scanner to handle whitespace correctly
  +        // in the XML declaration.
  +        fEntityManager.setScannerVersion(Constants.XML_VERSION_1_0);
           XMLEntityScanner scanner = fEntityManager.getEntityScanner();
           try {
               if (!scanner.skipString("<?xml")) {
                   // definitely not a well-formed 1.1 doc!
                   return Constants.XML_VERSION_1_0;
               }
  -            if (!scanner.skipSpaces()) {
  +            if (!scanner.skipDeclSpaces()) {
                   fixupCurrentEntity(fEntityManager, fExpectedVersionString, 5);
                   return Constants.XML_VERSION_1_0;
               }
  @@ -200,14 +200,14 @@
                   fixupCurrentEntity(fEntityManager, fExpectedVersionString, 6);
                   return Constants.XML_VERSION_1_0;
               }
  -            scanner.skipSpaces();
  +            scanner.skipDeclSpaces();
               // Check if the next character is '='. If it is then consume it.
               if (scanner.peekChar() != '=') {
                   fixupCurrentEntity(fEntityManager, fExpectedVersionString, 13);
                   return Constants.XML_VERSION_1_0;
               }
               scanner.scanChar();
  -            scanner.skipSpaces();
  +            scanner.skipDeclSpaces();
               int quoteChar = scanner.scanChar();
               fExpectedVersionString[14] = (char) quoteChar;
               for (int versionPos = 0; versionPos < XML11_VERSION.length; 
versionPos++) {
  
  
  
  1.13      +1 -172    
xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java
  
  Index: XML11DocumentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -r1.12 -r1.13
  --- XML11DocumentScannerImpl.java     25 Jul 2003 19:41:10 -0000      1.12
  +++ XML11DocumentScannerImpl.java     13 Nov 2003 18:45:59 -0000      1.13
  @@ -176,177 +176,6 @@
       } // scanContent():int
   
       /**
  -     * Scans an XML or text declaration.
  -     * <p>
  -     * <pre>
  -     * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  -     * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
  -     * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
  -     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  -     * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
  -     *                 | ('"' ('yes' | 'no') '"'))
  -     *
  -     * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  -     * </pre>
  -     * <p> this was overridden to allow for version="1.1"</p>
  -     *
  -     * @param scanningTextDecl True if a text declaration is to
  -     *                         be scanned instead of an XML
  -     *                         declaration.
  -     * @param pseudoAttributeValues An array of size 3 to return the version,
  -     *                         encoding and standalone pseudo attribute values
  -     *                         (in that order).
  -     *
  -     * <strong>Note:</strong> This method uses fString, anything in it
  -     * at the time of calling is lost.
  -     */
  -    protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
  -                                         String[] pseudoAttributeValues) 
  -        throws IOException, XNIException {
  -
  -        // pseudo-attribute values
  -        String version = null;
  -        String encoding = null;
  -        String standalone = null;
  -
  -        // scan pseudo-attributes
  -        final int STATE_VERSION = 0;
  -        final int STATE_ENCODING = 1;
  -        final int STATE_STANDALONE = 2;
  -        final int STATE_DONE = 3;
  -        int state = STATE_VERSION;
  -
  -        boolean dataFoundForTarget = false;
  -        boolean sawSpace = fEntityScanner.skipSpaces();
  -        while (fEntityScanner.peekChar() != '?') {
  -            dataFoundForTarget = true;
  -            String name = scanPseudoAttribute(scanningTextDecl, fString);
  -            switch (state) {
  -                case STATE_VERSION: {
  -                    if (name == fVersionSymbol) {
  -                        if (!sawSpace) {
  -                            reportFatalError(scanningTextDecl
  -                                       ? "SpaceRequiredBeforeVersionInTextDecl"
  -                                       : "SpaceRequiredBeforeVersionInXMLDecl",
  -                                             null);
  -                        }
  -                        version = fString.toString();
  -                        state = STATE_ENCODING;
  -                        if (!version.equals("1.0") || !version.equals("1.1")) {
  -                            // REVISIT: XML REC says we should throw an error in 
such cases.
  -                            // some may object the throwing of fatalError.
  -                            reportFatalError("VersionNotSupported", 
  -                                             new Object[]{version});
  -                        }
  -                    }
  -                    else if (name == fEncodingSymbol) {
  -                        if (!scanningTextDecl) {
  -                            reportFatalError("VersionInfoRequired", null);
  -                        }
  -                        if (!sawSpace) {
  -                            reportFatalError(scanningTextDecl
  -                                      ? "SpaceRequiredBeforeEncodingInTextDecl"
  -                                      : "SpaceRequiredBeforeEncodingInXMLDecl",
  -                                             null);
  -                        }
  -                        encoding = fString.toString();
  -                        state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
  -                    }
  -                    else {
  -                        if (scanningTextDecl) {
  -                            reportFatalError("EncodingDeclRequired", null);
  -                        }
  -                        else {
  -                            reportFatalError("VersionInfoRequired", null);
  -                        }
  -                    }
  -                    break;
  -                }
  -                case STATE_ENCODING: {
  -                    if (name == fEncodingSymbol) {
  -                        if (!sawSpace) {
  -                            reportFatalError(scanningTextDecl
  -                                      ? "SpaceRequiredBeforeEncodingInTextDecl"
  -                                      : "SpaceRequiredBeforeEncodingInXMLDecl",
  -                                             null);
  -                        }
  -                        encoding = fString.toString();
  -                        state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
  -                    }
  -                    else if (!scanningTextDecl && name == fStandaloneSymbol) {
  -                        if (!sawSpace) {
  -                            reportFatalError("SpaceRequiredBeforeStandalone",
  -                                             null);
  -                        }
  -                        standalone = fString.toString();
  -                        state = STATE_DONE;
  -                        if (!standalone.equals("yes") && !standalone.equals("no")) {
  -                            reportFatalError("SDDeclInvalid", new Object[] 
{standalone});
  -                        }
  -                    }
  -                    else {
  -                        reportFatalError("EncodingDeclRequired", new Object[] 
{standalone});
  -                    }
  -                    break;
  -                }
  -                case STATE_STANDALONE: {
  -                    if (name == fStandaloneSymbol) {
  -                        if (!sawSpace) {
  -                            reportFatalError("SpaceRequiredBeforeStandalone",
  -                                             null);
  -                        }
  -                        standalone = fString.toString();
  -                        state = STATE_DONE;
  -                        if (!standalone.equals("yes") && !standalone.equals("no")) {
  -                            reportFatalError("SDDeclInvalid", new Object[] 
{standalone});
  -                        }
  -                    }
  -                    else {
  -                        reportFatalError("EncodingDeclRequired", null);
  -                    }
  -                    break;
  -                }
  -                default: {
  -                    reportFatalError("NoMorePseudoAttributes", null);
  -                }
  -            }
  -            sawSpace = fEntityScanner.skipSpaces();
  -        }
  -        // REVISIT: should we remove this error reporting?
  -        if (scanningTextDecl && state != STATE_DONE) {
  -            reportFatalError("MorePseudoAttributes", null);
  -        }
  -        
  -        // If there is no data in the xml or text decl then we fail to report error 
  -        // for version or encoding info above.
  -        if (scanningTextDecl) {
  -            if (!dataFoundForTarget && encoding == null) {
  -                reportFatalError("EncodingDeclRequired", null);
  -            }
  -        }
  -        else {
  -            if (!dataFoundForTarget && version == null) {
  -                reportFatalError("VersionInfoRequired", null);
  -            }
  -        }
  -
  -        // end
  -        if (!fEntityScanner.skipChar('?')) {
  -            reportFatalError("XMLDeclUnterminated", null);
  -        }
  -        if (!fEntityScanner.skipChar('>')) {
  -            reportFatalError("XMLDeclUnterminated", null);
  -
  -        }
  -        
  -        // fill in return array
  -        pseudoAttributeValues[0] = version;
  -        pseudoAttributeValues[1] = encoding;
  -        pseudoAttributeValues[2] = standalone;
  -
  -    } // scanXMLDeclOrTextDecl(boolean)
  -
  -    /**
        * Scans an attribute value and normalizes whitespace converting all
        * whitespace characters to space characters.
        *


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: xml-xerces/java/src/org/apache/xerces/impl XML11EntityScanner.java XMLScanner.java XMLEntityScanner.java XMLVersionDetector.java XML11DocumentScannerImpl.java

Reply via email to