mrglavas    2003/12/16 13:37:11

  Modified:    java/src/org/apache/xerces/impl
                        XML11NSDocumentScannerImpl.java
                        XML11DocumentScannerImpl.java
                        XML11DTDScannerImpl.java
                        XMLDocumentScannerImpl.java XML11EntityScanner.java
                        XMLDocumentFragmentScannerImpl.java XMLScanner.java
               java/src/org/apache/xerces/util XML11Char.java
  Log:
  Fixing Bugs 24886, 25571, 25572 and 25573.
  
  24886 - Names in XML 1.1 are now allowed to contain supplemental
  characters. Support for this was missed from the implementation.
  We need to check for surrogate character pairs in all names.
  Reorganzied the order of checks in the dispatchers so that we do
  checks for regular name characters early and then as a last check,
  check for high surrogates of name chars. We should now be able
  to determine the well-formedness and validity of names containing
  supplemental characters. This should also fix some of the 1.1
  support in DOM Level 3.
  
  25571 - An internal buffer wasn't being cleared which caused
  attribute values containing supplemental characters to become
  corrupted.
  
  25572 - Supplemental characters were being rejected in comments
  in XML 1.0 and 1.1 documents.
  
  25573 - Root elements whose names start with any 1.1
  NameStartChar were being rejected. We didn't have the proper
  hook in the base class for the 1.1 document scanner.
  
  Revision  Changes    Path
  1.5       +7 -3      
xml-xerces/java/src/org/apache/xerces/impl/XML11NSDocumentScannerImpl.java
  
  Index: XML11NSDocumentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11NSDocumentScannerImpl.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- XML11NSDocumentScannerImpl.java   22 Oct 2003 18:33:43 -0000      1.4
  +++ XML11NSDocumentScannerImpl.java   16 Dec 2003 21:37:10 -0000      1.5
  @@ -208,9 +208,13 @@
                   empty = true;
                   break;
               } else if (!isValidNameStartChar(c) || !sawSpace) {
  -                reportFatalError(
  -                    "ElementUnterminated",
  -                    new Object[] { rawname });
  +                // Second chance. Check if this character is a high
  +                // surrogate of a valid name start character.
  +                if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
  +                    reportFatalError(
  +                        "ElementUnterminated",
  +                        new Object[] { rawname });
  +                }
               }
   
               // attributes
  
  
  
  1.14      +10 -1     
xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java
  
  Index: XML11DocumentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- XML11DocumentScannerImpl.java     13 Nov 2003 18:45:59 -0000      1.13
  +++ XML11DocumentScannerImpl.java     16 Dec 2003 21:37:10 -0000      1.14
  @@ -359,6 +359,7 @@
                       }
                   }
                   else if (c != -1 && XMLChar.isHighSurrogate(c)) {
  +                    fStringBuffer3.clear();
                       if (scanSurrogates(fStringBuffer3)) {
                           fStringBuffer.append(fStringBuffer3);
                           if (entityDepth == fEntityDepth) {
  @@ -502,6 +503,14 @@
       protected boolean isValidNameStartChar(int value) {
           return (XML11Char.isXML11NameStart(value)); 
       } // isValidNameStartChar(int):  boolean
  +    
  +    // returns true if the given character is 
  +    // a valid high surrogate for a nameStartChar 
  +    // with respect to the version of XML understood 
  +    // by this scanner.
  +    protected boolean isValidNameStartHighSurrogate(int value) {
  +        return XML11Char.isXML11NameHighSurrogate(value); 
  +    } // isValidNameStartHighSurrogate(int):  boolean
   
       protected boolean versionSupported(String version) {
           return (version.equals("1.1") || version.equals("1.0"));
  
  
  
  1.8       +9 -1      
xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java
  
  Index: XML11DTDScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java,v
  retrieving revision 1.7
  retrieving revision 1.8
  diff -u -r1.7 -r1.8
  --- XML11DTDScannerImpl.java  25 Jul 2003 19:41:10 -0000      1.7
  +++ XML11DTDScannerImpl.java  16 Dec 2003 21:37:10 -0000      1.8
  @@ -223,6 +223,14 @@
       protected boolean isValidNameStartChar(int value) {
           return (XML11Char.isXML11NameStart(value)); 
       } // isValidNameStartChar(int):  boolean
  +    
  +    // returns true if the given character is 
  +    // a valid high surrogate for a nameStartChar 
  +    // with respect to the version of XML understood 
  +    // by this scanner.
  +    protected boolean isValidNameStartHighSurrogate(int value) {
  +        return XML11Char.isXML11NameHighSurrogate(value); 
  +    } // isValidNameStartHighSurrogate(int):  boolean
   
       // note that, according to 4.3.4 of the XML 1.1 spec, XML 1.1
       // documents may invoke 1.0 entities; thus either version decl (or none!)
  
  
  
  1.35      +19 -8     
xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java
  
  Index: XMLDocumentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java,v
  retrieving revision 1.34
  retrieving revision 1.35
  diff -u -r1.34 -r1.35
  --- XMLDocumentScannerImpl.java       7 Nov 2003 00:26:17 -0000       1.34
  +++ XMLDocumentScannerImpl.java       16 Dec 2003 21:37:10 -0000      1.35
  @@ -757,11 +757,7 @@
                           }
                           case SCANNER_STATE_START_OF_MARKUP: {
                               fMarkupDepth++;
  -                            if (fEntityScanner.skipChar('?')) {
  -                                setScannerState(SCANNER_STATE_PI);
  -                                again = true;
  -                            }
  -                            else if (fEntityScanner.skipChar('!')) {
  +                            if (fEntityScanner.skipChar('!')) {
                                   if (fEntityScanner.skipChar('-')) {
                                       if (!fEntityScanner.skipChar('-')) {
                                           reportFatalError("InvalidCommentStart",
  @@ -779,7 +775,16 @@
                                                        null);
                                   }
                               }
  -                            else if 
(XMLChar.isNameStart(fEntityScanner.peekChar())) {
  +                            else if 
(isValidNameStartChar(fEntityScanner.peekChar())) {
  +                                setScannerState(SCANNER_STATE_ROOT_ELEMENT);
  +                                setDispatcher(fContentDispatcher);
  +                                return true;
  +                            }
  +                            else if (fEntityScanner.skipChar('?')) {
  +                                setScannerState(SCANNER_STATE_PI);
  +                                again = true;
  +                            }
  +                            else if 
(isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
                                   setScannerState(SCANNER_STATE_ROOT_ELEMENT);
                                   setDispatcher(fContentDispatcher);
                                   return true;
  @@ -1142,7 +1147,13 @@
                                                    null);
                                   again = true;
                               }
  -                            else if 
(XMLChar.isNameStart(fEntityScanner.peekChar())) {
  +                            else if 
(isValidNameStartChar(fEntityScanner.peekChar())) {
  +                                reportFatalError("MarkupNotRecognizedInMisc",
  +                                                 null);
  +                                scanStartElement();
  +                                setScannerState(SCANNER_STATE_CONTENT);
  +                            }
  +                            else if 
(isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
                                   reportFatalError("MarkupNotRecognizedInMisc",
                                                    null);
                                   scanStartElement();
  
  
  
  1.10      +374 -57   
xml-xerces/java/src/org/apache/xerces/impl/XML11EntityScanner.java
  
  Index: XML11EntityScanner.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XML11EntityScanner.java,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- XML11EntityScanner.java   13 Nov 2003 18:45:59 -0000      1.9
  +++ XML11EntityScanner.java   16 Dec 2003 21:37:10 -0000      1.10
  @@ -183,26 +183,80 @@
   
           // scan nmtoken
           int offset = fCurrentEntity.position;
  -        while (XML11Char.isXML11Name(fCurrentEntity.ch[fCurrentEntity.position])) {
  -            if (++fCurrentEntity.position == fCurrentEntity.count) {
  -                int length = fCurrentEntity.position - offset;
  -                if (length == fCurrentEntity.ch.length) {
  -                    // bad luck we have to resize our buffer
  -                    char[] tmp = new char[fCurrentEntity.ch.length << 1];
  -                    System.arraycopy(fCurrentEntity.ch, offset,
  -                                     tmp, 0, length);
  -                    fCurrentEntity.ch = tmp;
  -                }
  -                else {
  -                    System.arraycopy(fCurrentEntity.ch, offset,
  -                                     fCurrentEntity.ch, 0, length);
  +        
  +        do {
  +            char ch = fCurrentEntity.ch[fCurrentEntity.position];
  +            if (XML11Char.isXML11Name(ch)) {
  +                if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                    int length = fCurrentEntity.position - offset;
  +                    if (length == fCurrentEntity.ch.length) {
  +                        // bad luck we have to resize our buffer
  +                        char[] tmp = new char[fCurrentEntity.ch.length << 1];
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         tmp, 0, length);
  +                        fCurrentEntity.ch = tmp;
  +                    }
  +                    else {
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         fCurrentEntity.ch, 0, length);
  +                    }
  +                    offset = 0;
  +                    if (load(length, false)) {
  +                        break;
  +                    }
                   }
  -                offset = 0;
  -                if (load(length, false)) {
  +            }
  +            else if (XML11Char.isXML11NameHighSurrogate(ch)) {
  +                if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                    int length = fCurrentEntity.position - offset;
  +                    if (length == fCurrentEntity.ch.length) {
  +                        // bad luck we have to resize our buffer
  +                        char[] tmp = new char[fCurrentEntity.ch.length << 1];
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         tmp, 0, length);
  +                        fCurrentEntity.ch = tmp;
  +                    }
  +                    else {
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         fCurrentEntity.ch, 0, length);
  +                    }
  +                    offset = 0;
  +                    if (load(length, false)) {
  +                        --fCurrentEntity.position;
  +                        break;
  +                    }
  +                }
  +                char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
  +                if ( !XMLChar.isLowSurrogate(ch2) ||
  +                     !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
  +                    --fCurrentEntity.position;
                       break;
                   }
  +                if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                    int length = fCurrentEntity.position - offset;
  +                    if (length == fCurrentEntity.ch.length) {
  +                        // bad luck we have to resize our buffer
  +                        char[] tmp = new char[fCurrentEntity.ch.length << 1];
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         tmp, 0, length);
  +                        fCurrentEntity.ch = tmp;
  +                    }
  +                    else {
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         fCurrentEntity.ch, 0, length);
  +                    }
  +                    offset = 0;
  +                    if (load(length, false)) {
  +                        break;
  +                    }
  +                }
  +            }
  +            else {
  +                break;
               }
           }
  +        while (true);
  +        
           int length = fCurrentEntity.position - offset;
           fCurrentEntity.columnNumber += length;
   
  @@ -239,9 +293,11 @@
   
           // scan name
           int offset = fCurrentEntity.position;
  -        if (XML11Char.isXML11NameStart(fCurrentEntity.ch[offset])) {
  +        char ch = fCurrentEntity.ch[offset];
  +        
  +        if (XML11Char.isXML11NameStart(ch)) {
               if (++fCurrentEntity.position == fCurrentEntity.count) {
  -                fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  +                fCurrentEntity.ch[0] = ch;
                   offset = 0;
                   if (load(1, false)) {
                       fCurrentEntity.columnNumber++;
  @@ -249,7 +305,60 @@
                       return symbol;
                   }
               }
  -            while 
(XML11Char.isXML11Name(fCurrentEntity.ch[fCurrentEntity.position])) {
  +        }
  +        else if (XML11Char.isXML11NameHighSurrogate(ch)) {
  +            if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                fCurrentEntity.ch[0] = ch;
  +                offset = 0;
  +                if (load(1, false)) {
  +                    --fCurrentEntity.position;
  +                    return null;
  +                }
  +            }
  +            char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
  +            if ( !XMLChar.isLowSurrogate(ch2) ||
  +                 !XML11Char.isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) {
  +                --fCurrentEntity.position;
  +                return null;
  +            }
  +            if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                fCurrentEntity.ch[0] = ch;
  +                fCurrentEntity.ch[1] = ch2;
  +                offset = 0;
  +                if (load(2, false)) {
  +                    fCurrentEntity.columnNumber += 2;
  +                    String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2);
  +                    return symbol;
  +                }
  +            }
  +        }
  +        else {
  +            return null;
  +        }
  +        
  +        do {
  +            ch = fCurrentEntity.ch[fCurrentEntity.position];
  +            if (XML11Char.isXML11Name(ch)) {
  +                if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                    int length = fCurrentEntity.position - offset;
  +                    if (length == fCurrentEntity.ch.length) {
  +                        // bad luck we have to resize our buffer
  +                        char[] tmp = new char[fCurrentEntity.ch.length << 1];
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         tmp, 0, length);
  +                        fCurrentEntity.ch = tmp;
  +                    }
  +                    else {
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         fCurrentEntity.ch, 0, length);
  +                    }
  +                    offset = 0;
  +                    if (load(length, false)) {
  +                        break;
  +                    }
  +                }
  +            }
  +            else if (XML11Char.isXML11NameHighSurrogate(ch)) {
                   if (++fCurrentEntity.position == fCurrentEntity.count) {
                       int length = fCurrentEntity.position - offset;
                       if (length == fCurrentEntity.ch.length) {
  @@ -265,11 +374,41 @@
                       }
                       offset = 0;
                       if (load(length, false)) {
  +                        --fCurrentEntity.position;
                           break;
                       }
                   }
  +                char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
  +                if ( !XMLChar.isLowSurrogate(ch2) ||
  +                     !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
  +                    --fCurrentEntity.position;
  +                    break;
  +                }
  +                if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                    int length = fCurrentEntity.position - offset;
  +                    if (length == fCurrentEntity.ch.length) {
  +                        // bad luck we have to resize our buffer
  +                        char[] tmp = new char[fCurrentEntity.ch.length << 1];
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         tmp, 0, length);
  +                        fCurrentEntity.ch = tmp;
  +                    }
  +                    else {
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         fCurrentEntity.ch, 0, length);
  +                    }
  +                    offset = 0;
  +                    if (load(length, false)) {
  +                        break;
  +                    }
  +                }
  +            }
  +            else {
  +                break;
               }
           }
  +        while (true);
  +
           int length = fCurrentEntity.position - offset;
           fCurrentEntity.columnNumber += length;
   
  @@ -307,9 +446,11 @@
   
           // scan name
           int offset = fCurrentEntity.position;
  -        if (XML11Char.isXML11NCNameStart(fCurrentEntity.ch[offset])) {
  +        char ch = fCurrentEntity.ch[offset];
  +        
  +        if (XML11Char.isXML11NCNameStart(ch)) {
               if (++fCurrentEntity.position == fCurrentEntity.count) {
  -                fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  +                fCurrentEntity.ch[0] = ch;
                   offset = 0;
                   if (load(1, false)) {
                       fCurrentEntity.columnNumber++;
  @@ -317,7 +458,60 @@
                       return symbol;
                   }
               }
  -            while 
(XML11Char.isXML11NCName(fCurrentEntity.ch[fCurrentEntity.position])) {
  +        }
  +        else if (XML11Char.isXML11NameHighSurrogate(ch)) {
  +            if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                fCurrentEntity.ch[0] = ch;
  +                offset = 0;
  +                if (load(1, false)) {
  +                    --fCurrentEntity.position;
  +                    return null;
  +                }
  +            }
  +            char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
  +            if ( !XMLChar.isLowSurrogate(ch2) ||
  +                 !XML11Char.isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
  +                --fCurrentEntity.position;
  +                return null;
  +            }
  +            if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                fCurrentEntity.ch[0] = ch;
  +                fCurrentEntity.ch[1] = ch2;
  +                offset = 0;
  +                if (load(2, false)) {
  +                    fCurrentEntity.columnNumber += 2;
  +                    String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2);
  +                    return symbol;
  +                }
  +            }
  +        }
  +        else {
  +            return null;
  +        }
  +        
  +        do {
  +            ch = fCurrentEntity.ch[fCurrentEntity.position];
  +            if (XML11Char.isXML11NCName(ch)) {
  +                if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                    int length = fCurrentEntity.position - offset;
  +                    if (length == fCurrentEntity.ch.length) {
  +                        // bad luck we have to resize our buffer
  +                        char[] tmp = new char[fCurrentEntity.ch.length << 1];
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         tmp, 0, length);
  +                        fCurrentEntity.ch = tmp;
  +                    }
  +                    else {
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         fCurrentEntity.ch, 0, length);
  +                    }
  +                    offset = 0;
  +                    if (load(length, false)) {
  +                        break;
  +                    }
  +                }
  +            }
  +            else if (XML11Char.isXML11NameHighSurrogate(ch)) {
                   if (++fCurrentEntity.position == fCurrentEntity.count) {
                       int length = fCurrentEntity.position - offset;
                       if (length == fCurrentEntity.ch.length) {
  @@ -333,11 +527,41 @@
                       }
                       offset = 0;
                       if (load(length, false)) {
  +                        --fCurrentEntity.position;
                           break;
                       }
                   }
  +                char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
  +                if ( !XMLChar.isLowSurrogate(ch2) ||
  +                     !XML11Char.isXML11NCName(XMLChar.supplemental(ch, ch2)) ) {
  +                    --fCurrentEntity.position;
  +                    break;
  +                }
  +                if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                    int length = fCurrentEntity.position - offset;
  +                    if (length == fCurrentEntity.ch.length) {
  +                        // bad luck we have to resize our buffer
  +                        char[] tmp = new char[fCurrentEntity.ch.length << 1];
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         tmp, 0, length);
  +                        fCurrentEntity.ch = tmp;
  +                    }
  +                    else {
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         fCurrentEntity.ch, 0, length);
  +                    }
  +                    offset = 0;
  +                    if (load(length, false)) {
  +                        break;
  +                    }
  +                }
  +            }
  +            else {
  +                break;
               }
           }
  +        while (true);
  +        
           int length = fCurrentEntity.position - offset;
           fCurrentEntity.columnNumber += length;
   
  @@ -381,22 +605,57 @@
   
           // scan qualified name
           int offset = fCurrentEntity.position;
  -        if (XML11Char.isXML11NCNameStart(fCurrentEntity.ch[offset])) {
  +        char ch = fCurrentEntity.ch[offset];
  +        
  +        if (XML11Char.isXML11NCNameStart(ch)) {
               if (++fCurrentEntity.position == fCurrentEntity.count) {
  -                fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
  +                fCurrentEntity.ch[0] = ch;
                   offset = 0;
                   if (load(1, false)) {
                       fCurrentEntity.columnNumber++;
  -                    String name =
  -                        fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
  +                    String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
                       qname.setValues(null, name, name, null);
                       return true;
                   }
               }
  -            int index = -1;
  -            while 
(XML11Char.isXML11Name(fCurrentEntity.ch[fCurrentEntity.position])) {
  -                char c = fCurrentEntity.ch[fCurrentEntity.position];
  -                if (c == ':') {
  +        }
  +        else if (XML11Char.isXML11NameHighSurrogate(ch)) {
  +            if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                fCurrentEntity.ch[0] = ch;
  +                offset = 0;
  +                if (load(1, false)) {
  +                    --fCurrentEntity.position;
  +                    return false;
  +                }
  +            }
  +            char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
  +            if ( !XMLChar.isLowSurrogate(ch2) ||
  +                 !XML11Char.isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
  +                --fCurrentEntity.position;
  +                return false;
  +            }
  +            if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                fCurrentEntity.ch[0] = ch;
  +                fCurrentEntity.ch[1] = ch2;
  +                offset = 0;
  +                if (load(2, false)) {
  +                    fCurrentEntity.columnNumber += 2;
  +                    String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2);
  +                    qname.setValues(null, name, name, null);
  +                    return true;
  +                }
  +            }
  +        }
  +        else {
  +            return false;
  +        }
  +        
  +        int index = -1;
  +        boolean sawIncompleteSurrogatePair = false;
  +        do {
  +            ch = fCurrentEntity.ch[fCurrentEntity.position];
  +            if (XML11Char.isXML11Name(ch)) {
  +                if (ch == ':') {
                       if (index != -1) {
                           break;
                       }
  @@ -415,44 +674,102 @@
                           System.arraycopy(fCurrentEntity.ch, offset,
                                            fCurrentEntity.ch, 0, length);
                       }
  +                    offset = 0;
  +                    if (load(length, false)) {
  +                        break;
  +                    }
  +                }
  +            }
  +            else if (XML11Char.isXML11NameHighSurrogate(ch)) {
  +                if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                    int length = fCurrentEntity.position - offset;
  +                    if (length == fCurrentEntity.ch.length) {
  +                        // bad luck we have to resize our buffer
  +                        char[] tmp = new char[fCurrentEntity.ch.length << 1];
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         tmp, 0, length);
  +                        fCurrentEntity.ch = tmp;
  +                    }
  +                    else {
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         fCurrentEntity.ch, 0, length);
  +                    }
                       if (index != -1) {
  -                        index -= offset;
  +                        index = index - offset;
                       }
                       offset = 0;
                       if (load(length, false)) {
  +                        sawIncompleteSurrogatePair = true;
  +                        --fCurrentEntity.position;
                           break;
                       }
                   }
  +                char ch2 = fCurrentEntity.ch[fCurrentEntity.position];
  +                if ( !XMLChar.isLowSurrogate(ch2) ||
  +                     !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
  +                    sawIncompleteSurrogatePair = true;
  +                    --fCurrentEntity.position;
  +                    break;
  +                }
  +                if (++fCurrentEntity.position == fCurrentEntity.count) {
  +                    int length = fCurrentEntity.position - offset;
  +                    if (length == fCurrentEntity.ch.length) {
  +                        // bad luck we have to resize our buffer
  +                        char[] tmp = new char[fCurrentEntity.ch.length << 1];
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         tmp, 0, length);
  +                        fCurrentEntity.ch = tmp;
  +                    }
  +                    else {
  +                        System.arraycopy(fCurrentEntity.ch, offset,
  +                                         fCurrentEntity.ch, 0, length);
  +                    }
  +                    if (index != -1) {
  +                        index = index - offset;
  +                    }
  +                    offset = 0;
  +                    if (load(length, false)) {
  +                        break;
  +                    }
  +                }
  +            }
  +            else {
  +                break;
               }
  -            int length = fCurrentEntity.position - offset;
  -            fCurrentEntity.columnNumber += length;
  -            if (length > 0) {
  -                String prefix = null;
  -                String localpart = null;
  -                String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
  -                                                        offset, length);
  -                if (index != -1) {
  -                    int prefixLength = index - offset;
  -                    prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
  +        }
  +        while (true);
  +        
  +        int length = fCurrentEntity.position - offset;
  +        fCurrentEntity.columnNumber += length;
  +        
  +        if (length > 0) {
  +            String prefix = null;
  +            String localpart = null;
  +            String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
  +                                                    offset, length);
  +            if (index != -1) {
  +                int prefixLength = index - offset;
  +                prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
                                                       offset, prefixLength);
  -                    int len = length - prefixLength - 1;
  -                    int startLocal = index +1;
  -                    if 
(!XML11Char.isXML11NCNameStart(fCurrentEntity.ch[startLocal])){
  -                        fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  -                                                   "IllegalQName",
  -                                                   null,
  -                                                   
XMLErrorReporter.SEVERITY_FATAL_ERROR);
  -                    }
  -                    localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
  -                                                       index + 1, len);
  -
  +                int len = length - prefixLength - 1;
  +                int startLocal = index +1;
  +                if (!XML11Char.isXML11NCNameStart(fCurrentEntity.ch[startLocal]) &&
  +                    
(!XML11Char.isXML11NameHighSurrogate(fCurrentEntity.ch[startLocal]) ||
  +                    sawIncompleteSurrogatePair)){
  +                    fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  +                                               "IllegalQName",
  +                                               null,
  +                                               
XMLErrorReporter.SEVERITY_FATAL_ERROR);
                   }
  -                else {
  -                    localpart = rawname;
  -                }
  -                qname.setValues(prefix, localpart, rawname, null);
  -                return true;
  +                localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
  +                                                   index + 1, len);
  +
  +            }
  +            else {
  +                localpart = rawname;
               }
  +            qname.setValues(prefix, localpart, rawname, null);
  +            return true;
           }
           return false;
   
  
  
  
  1.41      +22 -13    
xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java
  
  Index: XMLDocumentFragmentScannerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java,v
  retrieving revision 1.40
  retrieving revision 1.41
  diff -u -r1.40 -r1.41
  --- XMLDocumentFragmentScannerImpl.java       7 Nov 2003 00:26:17 -0000       1.40
  +++ XMLDocumentFragmentScannerImpl.java       16 Dec 2003 21:37:10 -0000      1.41
  @@ -780,7 +780,12 @@
                   break;
               }
               else if (!isValidNameStartChar(c) || !sawSpace) {
  -                reportFatalError("ElementUnterminated", new Object[]{rawname});
  +                // Second chance. Check if this character is a high
  +                // surrogate of a valid name start character.
  +                if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
  +                    reportFatalError("ElementUnterminated",
  +                                     new Object[] { rawname });
  +                }
               }
   
               // attributes
  @@ -1515,9 +1520,17 @@
                           }
                           case SCANNER_STATE_START_OF_MARKUP: {
                               fMarkupDepth++;
  -                            if (fEntityScanner.skipChar('?')) {
  -                                setScannerState(SCANNER_STATE_PI);
  -                                again = true;
  +                            if (fEntityScanner.skipChar('/')) {
  +                                if (scanEndElement() == 0) {
  +                                    if (elementDepthIsZeroHook()) {
  +                                        return true;
  +                                    }
  +                                }
  +                                setScannerState(SCANNER_STATE_CONTENT);
  +                            }
  +                            else if 
(isValidNameStartChar(fEntityScanner.peekChar())) {
  +                                scanStartElement();
  +                                setScannerState(SCANNER_STATE_CONTENT);
                               }
                               else if (fEntityScanner.skipChar('!')) {
                                   if (fEntityScanner.skipChar('-')) {
  @@ -1537,15 +1550,11 @@
                                                        null);
                                   }
                               }
  -                            else if (fEntityScanner.skipChar('/')) {
  -                                if (scanEndElement() == 0) {
  -                                    if (elementDepthIsZeroHook()) {
  -                                        return true;
  -                                    }
  -                                }
  -                                setScannerState(SCANNER_STATE_CONTENT);
  +                            else if (fEntityScanner.skipChar('?')) {
  +                                setScannerState(SCANNER_STATE_PI);
  +                                again = true;
                               }
  -                            else if 
(isValidNameStartChar(fEntityScanner.peekChar())) {
  +                            else if 
(isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
                                   scanStartElement();
                                   setScannerState(SCANNER_STATE_CONTENT);
                               }
  
  
  
  1.40      +11 -2     xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java
  
  Index: XMLScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLScanner.java,v
  retrieving revision 1.39
  retrieving revision 1.40
  diff -u -r1.39 -r1.40
  --- XMLScanner.java   18 Nov 2003 18:17:32 -0000      1.39
  +++ XMLScanner.java   16 Dec 2003 21:37:10 -0000      1.40
  @@ -757,7 +757,7 @@
                   if (XMLChar.isHighSurrogate(c)) {
                       scanSurrogates(text);
                   }
  -                if (isInvalidLiteral(c)) {
  +                else if (isInvalidLiteral(c)) {
                       reportFatalError("InvalidCharInComment",
                                        new Object[] { Integer.toHexString(c) }); 
                       fEntityScanner.scanChar();
  @@ -951,6 +951,7 @@
                       }
                   }
                   else if (c != -1 && XMLChar.isHighSurrogate(c)) {
  +                    fStringBuffer3.clear();
                       if (scanSurrogates(fStringBuffer3)) {
                           fStringBuffer.append(fStringBuffer3);
                           if (entityDepth == fEntityDepth) {
  @@ -1353,6 +1354,14 @@
       protected boolean isValidNameStartChar(int value) {
           return (XMLChar.isNameStart(value)); 
       } // isValidNameStartChar(int):  boolean
  +    
  +    // returns true if the given character is 
  +    // a valid high surrogate for a nameStartChar 
  +    // with respect to the version of XML understood 
  +    // by this scanner.
  +    protected boolean isValidNameStartHighSurrogate(int value) {
  +        return false; 
  +    } // isValidNameStartHighSurrogate(int):  boolean
       
       protected boolean versionSupported(String version ) {
           return version.equals("1.0");
  
  
  
  1.5       +92 -25    xml-xerces/java/src/org/apache/xerces/util/XML11Char.java
  
  Index: XML11Char.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/XML11Char.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- XML11Char.java    18 Nov 2003 15:39:22 -0000      1.4
  +++ XML11Char.java    16 Dec 2003 21:37:11 -0000      1.5
  @@ -74,6 +74,7 @@
    * @author Andy Clark, IBM
    * @author Arnaud  Le Hors, IBM
    * @author Neil Graham, IBM
  + * @author Michael Glavassevich, IBM
    *
    * @version $Id$
    */
  @@ -327,6 +328,18 @@
           return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
               || (0x10000 <= c && c < 0xF0000);
       } // isXML11NCName(int):boolean
  +    
  +    /**
  +     * Returns whether the given character is a valid 
  +     * high surrogate for a name character. This includes
  +     * all high surrogates for characters [0x10000-0xEFFFF].
  +     * In other words everything excluding planes 15 and 16.
  +     *
  +     * @param c The character to check.
  +     */
  +    public static boolean isXML11NameHighSurrogate(int c) {
  +        return (0xD800 <= c && c <= 0xDB7F);
  +    }
   
       /*
        * [5] Name ::= NameStartChar NameChar*
  @@ -339,16 +352,39 @@
        * @return true if name is a valid Name
        */
       public static boolean isXML11ValidName(String name) {
  -        if (name.length() == 0)
  +        int length = name.length();
  +        if (length == 0)
               return false;
  +        int i = 1;
           char ch = name.charAt(0);
  -        if( !isXML11NameStart(ch) )
  -           return false;
  -        for (int i = 1; i < name.length(); i++ ) {
  -           ch = name.charAt(i);
  -           if( ! isXML11Name( ch ) ){
  -              return false;
  -           }
  +        if( !isXML11NameStart(ch) ) {
  +            if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
  +                char ch2 = name.charAt(1);
  +                if ( !XMLChar.isLowSurrogate(ch2) || 
  +                     !isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) {
  +                    return false;
  +                }
  +                i = 2;
  +            }
  +            else {
  +                return false;
  +            }
  +        }
  +        while (i < length) {
  +            ch = name.charAt(i);
  +            if ( !isXML11Name(ch) ) {
  +                if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
  +                    char ch2 = name.charAt(i);
  +                    if ( !XMLChar.isLowSurrogate(ch2) || 
  +                         !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
  +                        return false;
  +                    }
  +                }
  +                else {
  +                    return false;
  +                }
  +            }
  +            ++i;
           }
           return true;
       } // isXML11ValidName(String):boolean
  @@ -366,16 +402,39 @@
        * @return true if name is a valid NCName
        */
       public static boolean isXML11ValidNCName(String ncName) {
  -        if (ncName.length() == 0)
  +        int length = ncName.length();
  +        if (length == 0)
               return false;
  +        int i = 1;
           char ch = ncName.charAt(0);
  -        if( !isXML11NCNameStart(ch) )
  -           return false;
  -        for (int i = 1; i < ncName.length(); i++ ) {
  -           ch = ncName.charAt(i);
  -           if( !isXML11NCName( ch ) ){
  -              return false;
  -           }
  +        if( !isXML11NCNameStart(ch) ) {
  +            if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
  +                char ch2 = ncName.charAt(1);
  +                if ( !XMLChar.isLowSurrogate(ch2) || 
  +                     !isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
  +                    return false;
  +                }
  +                i = 2;
  +            }
  +            else {
  +                return false;
  +            }
  +        }
  +        while (i < length) {
  +            ch = ncName.charAt(i);
  +            if ( !isXML11NCName(ch) ) {
  +                if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
  +                    char ch2 = ncName.charAt(i);
  +                    if ( !XMLChar.isLowSurrogate(ch2) || 
  +                         !isXML11NCName(XMLChar.supplemental(ch, ch2)) ) {
  +                        return false;
  +                    }
  +                }
  +                else {
  +                    return false;
  +                }
  +            }
  +            ++i;
           }
           return true;
       } // isXML11ValidNCName(String):boolean
  @@ -391,18 +450,26 @@
        * @return true if nmtoken is a valid Nmtoken 
        */
       public static boolean isXML11ValidNmtoken(String nmtoken) {
  -        if (nmtoken.length() == 0)
  +        int length = nmtoken.length();
  +        if (length == 0)
               return false;
  -        for (int i = 0; i < nmtoken.length(); i++ ) {
  -           char ch = nmtoken.charAt(i);
  -           if(  ! isXML11Name( ch ) ){
  -              return false;
  -           }
  +        for (int i = 0; i < length; ++i ) {
  +            char ch = nmtoken.charAt(i);
  +            if( !isXML11Name(ch) ) {
  +                if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
  +                    char ch2 = nmtoken.charAt(i);
  +                    if ( !XMLChar.isLowSurrogate(ch2) || 
  +                         !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
  +                        return false;
  +                    }
  +                }
  +                else {
  +                    return false;
  +                }
  +            }
           }
           return true;
       } // isXML11ValidName(String):boolean
  -
  -
   
   } // class XML11Char
   
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to