mrglavas    2004/06/15 14:36:39

  Modified:    java/src/org/apache/xerces/dom DOMNormalizer.java
  Log:
  The DOM normalizer was rejecting supplemental characters in

  text, comments and CDATA sections. We should now be

  correctly handling surrogate character pairs.
  
  Revision  Changes    Path
  1.56      +156 -100  xml-xerces/java/src/org/apache/xerces/dom/DOMNormalizer.java
  
  Index: DOMNormalizer.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/dom/DOMNormalizer.java,v
  retrieving revision 1.55
  retrieving revision 1.56
  diff -u -r1.55 -r1.56
  --- DOMNormalizer.java        7 May 2004 21:35:35 -0000       1.55
  +++ DOMNormalizer.java        15 Jun 2004 21:36:39 -0000      1.56
  @@ -949,25 +949,34 @@
        * @param isXML11Version = true if XML 1.1
        */
       public static final void isCDataWF(DOMErrorHandler errorHandler, DOMErrorImpl 
error, DOMLocatorImpl locator, 
  -            String datavalue, boolean isXML11Version)
  +        String datavalue, boolean isXML11Version)
       {
  -        if(datavalue == null || (datavalue.length() == 0) ) return ;
  -                
  +        if (datavalue == null || (datavalue.length() == 0) ) {
  +            return;
  +        }
  +        
           char [] dataarray = datavalue.toCharArray(); 
  -        int datalength = dataarray.length ;
  -
  -        //version of the document is XML 1.1
  -        if(isXML11Version){                    
  -            //we need to check all chracters as per production rules of XML11
  -            int i = 0 ;
  +        int datalength = dataarray.length;
  +        
  +        // version of the document is XML 1.1
  +        if (isXML11Version) {                    
  +            // we need to check all chracters as per production rules of XML11
  +            int i = 0;
               while(i < datalength){     
                   char c = dataarray[i++];                                      
  -                if(XML11Char.isXML11Invalid(c)){
  -                    String msg =
  -                        DOMMessageFormatter.formatMessage(
  -                            DOMMessageFormatter.XML_DOMAIN,
  -                            "InvalidCharInCDSect",
  -                            new Object[] { Integer.toString(c, 16)});
  +                if ( XML11Char.isXML11Invalid(c) ) {
  +                    // check if this is a supplemental character
  +                    if (XMLChar.isHighSurrogate(c) && i < datalength) {
  +                        char c2 = dataarray[i++];
  +                        if (XMLChar.isLowSurrogate(c2) && 
  +                            XMLChar.isSupplemental(XMLChar.supplemental(c, c2))) {
  +                            continue;
  +                        }
  +                    }
  +                    String msg = DOMMessageFormatter.formatMessage(
  +                        DOMMessageFormatter.XML_DOMAIN,
  +                        "InvalidCharInCDSect",
  +                        new Object[] { Integer.toString(c, 16)});
                       reportDOMError(
                           errorHandler,
                           error,
  @@ -976,54 +985,63 @@
                           DOMError.SEVERITY_ERROR,
                           "wf-invalid-character");
                   }
  -                else if (c==']'){
  +                else if (c == ']') {
                       int count = i;
  -                    if (count<datalength && dataarray[count]==']'){
  -                        while (++count <datalength && dataarray[count]==']'){
  +                    if (count < datalength && dataarray[count] == ']') {
  +                        while (++count < datalength && dataarray[count] == ']') {
                               // do nothing
                           }
  -                        if (count <datalength && dataarray[count]=='>'){
  -                            //CDEndInContent
  -                                                     String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN,
  -                                                         "CDEndInContent", null);
  -                                                     reportDOMError(errorHandler, 
error, locator,msg, DOMError.SEVERITY_ERROR, "wf-invalid-character");
  +                        if (count < datalength && dataarray[count] == '>') {
  +                            // CDEndInContent
  +                            String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN, "CDEndInContent", 
null);
  +                            reportDOMError(errorHandler, error, locator,msg, 
DOMError.SEVERITY_ERROR, "wf-invalid-character");
                           }
                       }
                       
                   }
               }
  -        }//version of the document is XML 1.0
  -        else{                    
  -            //we need to check all chracters as per production rules of XML 1.0
  -            int i = 0 ;
  -            while(i < datalength){   
  +        } // version of the document is XML 1.0
  +        else {                    
  +            // we need to check all chracters as per production rules of XML 1.0
  +            int i = 0;
  +            while (i < datalength) {   
                   char c = dataarray[i++];                         
  -                if( XMLChar.isInvalid(c) ){
  -                     //Note:  The key InvalidCharInCDSect from 
XMLMessages.properties
  -                     //is being used to obtain the message and DOM error type
  -                     //"wf-invalid-character" is used.  Also per DOM it is error 
but 
  -                     //as per XML spec. it is fatal error
  -                                     String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN, 
"InvalidCharInCDSect", new Object[]{Integer.toString(c, 16)});
  -                                     reportDOMError(errorHandler, error, locator, 
msg, DOMError.SEVERITY_ERROR, 
  -                                         "wf-invalid-character");
  +                if( XMLChar.isInvalid(c) ) {
  +                    // check if this is a supplemental character
  +                    if (XMLChar.isHighSurrogate(c) && i < datalength) {
  +                        char c2 = dataarray[i++];
  +                        if (XMLChar.isLowSurrogate(c2) && 
  +                            XMLChar.isSupplemental(XMLChar.supplemental(c, c2))) {
  +                            continue;
  +                        }
  +                    }
  +                    // Note:  The key InvalidCharInCDSect from 
XMLMessages.properties
  +                    // is being used to obtain the message and DOM error type
  +                    // "wf-invalid-character" is used.  Also per DOM it is error 
but 
  +                    // as per XML spec. it is fatal error
  +                    String msg = DOMMessageFormatter.formatMessage(
  +                        DOMMessageFormatter.XML_DOMAIN, 
  +                        "InvalidCharInCDSect", 
  +                        new Object[]{Integer.toString(c, 16)});
  +                    reportDOMError(errorHandler, error, locator, msg, 
DOMError.SEVERITY_ERROR, "wf-invalid-character");
                   }
  -                else if (c==']'){
  +                else if (c==']') {
                       int count = i;
  -                    if (count<datalength && dataarray[count]==']'){
  -                        while (++count <datalength && dataarray[count]==']'){
  +                    if ( count< datalength && dataarray[count]==']' ) {
  +                        while (++count < datalength && dataarray[count]==']' ) {
                               // do nothing
                           }
  -                        if (count <datalength && dataarray[count]=='>'){
  -                                                     String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN,"CDEndInContent", 
null);
  -                                                     reportDOMError(errorHandler, 
error, locator,  msg, DOMError.SEVERITY_ERROR, "wf-invalid-character");
  +                        if ( count < datalength && dataarray[count]=='>' ) {
  +                            String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN, "CDEndInContent", 
null);
  +                            reportDOMError(errorHandler, error, locator, msg, 
DOMError.SEVERITY_ERROR, "wf-invalid-character");
                           }
                       }
                       
                   }
               }            
  -        }//end-else fDocument.isXMLVersion()
  +        } // end-else fDocument.isXMLVersion()
           
  -    }//isCDataWF
  +    } // isCDataWF
        
       /**
        * NON-DOM: check for valid XML characters as per the XML version
  @@ -1031,41 +1049,62 @@
        * @param isXML11Version = true if XML 1.1
        */
       public static final void isXMLCharWF(DOMErrorHandler errorHandler, DOMErrorImpl 
error, DOMLocatorImpl locator, 
  -                String datavalue, boolean isXML11Version)
  +        String datavalue, boolean isXML11Version)
       {
  -        if(datavalue == null || (datavalue.length() == 0) ) return ;      
  +        if ( datavalue == null || (datavalue.length() == 0) ) {
  +            return;      
  +        }
  +        
           char [] dataarray = datavalue.toCharArray(); 
  -        int datalength = dataarray.length ;
  -
  -        //version of the document is XML 1.1
  +        int datalength = dataarray.length;
  +        
  +        // version of the document is XML 1.1
           if(isXML11Version){                    
               //we need to check all characters as per production rules of XML11
               int i = 0 ;
  -            while(i < datalength){                            
  +            while (i < datalength) {                            
                   if(XML11Char.isXML11Invalid(dataarray[i++])){
  -                                     String msg = DOMMessageFormatter.formatMessage(
  +                    // check if this is a supplemental character
  +                    char ch = dataarray[i-1];
  +                    if (XMLChar.isHighSurrogate(ch) && i < datalength) {
  +                        char ch2 = dataarray[i++];
  +                        if (XMLChar.isLowSurrogate(ch2) && 
  +                            XMLChar.isSupplemental(XMLChar.supplemental(ch, ch2))) {
  +                            continue;
  +                        }
  +                    }
  +                    String msg = DOMMessageFormatter.formatMessage(
                           DOMMessageFormatter.DOM_DOMAIN, "InvalidXMLCharInDOM", 
                           new Object[]{Integer.toString(dataarray[i-1], 16)});
  -                                     reportDOMError(errorHandler, error, locator, 
msg, DOMError.SEVERITY_ERROR, 
  -                                         "wf-invalid-character");
  -                };
  +                    reportDOMError(errorHandler, error, locator, msg, 
DOMError.SEVERITY_ERROR, 
  +                    "wf-invalid-character");
  +                }
               }
  -        }//version of the document is XML 1.0
  +        } // version of the document is XML 1.0
           else{                    
  -            //we need to check all characters as per production rules of XML 1.0
  +            // we need to check all characters as per production rules of XML 1.0
               int i = 0 ;
  -            while(i < datalength){                            
  -                if( XMLChar.isInvalid(dataarray[i++]) ){
  -                                     String msg = DOMMessageFormatter.formatMessage(
  +            while (i < datalength) {                            
  +                if( XMLChar.isInvalid(dataarray[i++]) ) {
  +                    // check if this is a supplemental character
  +                    char ch = dataarray[i-1];
  +                    if (XMLChar.isHighSurrogate(ch) && i < datalength) {
  +                        char ch2 = dataarray[i++];
  +                        if (XMLChar.isLowSurrogate(ch2) && 
  +                            XMLChar.isSupplemental(XMLChar.supplemental(ch, ch2))) {
  +                            continue;
  +                        }
  +                    }
  +                    String msg = DOMMessageFormatter.formatMessage(
                           DOMMessageFormatter.DOM_DOMAIN, "InvalidXMLCharInDOM", 
                           new Object[]{Integer.toString(dataarray[i-1], 16)});
  -                                     reportDOMError(errorHandler, error, locator, 
msg, DOMError.SEVERITY_ERROR, 
  -                                         "wf-invalid-character");
  -                };
  +                    reportDOMError(errorHandler, error, locator, msg, 
DOMError.SEVERITY_ERROR, 
  +                    "wf-invalid-character");
  +                }
               }            
  -        }//end-else fDocument.isXMLVersion()
  +        } // end-else fDocument.isXMLVersion()
           
  -    }//isXMLCharWF
  +    } // isXMLCharWF
       
       /**
        * NON-DOM: check if value of the comment is well-formed
  @@ -1073,55 +1112,72 @@
        * @param isXML11Version = true if XML 1.1
        */
       public static final void isCommentWF(DOMErrorHandler errorHandler, DOMErrorImpl 
error, DOMLocatorImpl locator, 
  -                                    String datavalue, boolean isXML11Version)
  +        String datavalue, boolean isXML11Version)
       {
  -        if(datavalue == null || (datavalue.length() == 0) ) return ;
  -                
  +        if ( datavalue == null || (datavalue.length() == 0) ) {
  +            return;
  +        }
  +        
           char [] dataarray = datavalue.toCharArray(); 
           int datalength = dataarray.length ;
  -
  -        //version of the document is XML 1.1
  -        if(isXML11Version){                    
  -            //we need to check all chracters as per production rules of XML11
  +        
  +        // version of the document is XML 1.1
  +        if (isXML11Version) {                    
  +            // we need to check all chracters as per production rules of XML11
               int i = 0 ;
  -            while(i < datalength){   
  +            while (i < datalength){   
                   char c = dataarray[i++];
  -                                         
  -                if(XML11Char.isXML11Invalid(c)){
  -                                     String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN, 
  -                                         "InvalidCharInComment", 
  -                                         new Object [] 
{Integer.toString(dataarray[i-1], 16)});
  -                                     reportDOMError(errorHandler, error, locator, 
msg, DOMError.SEVERITY_ERROR, "wf-invalid-character");
  -                }
  -                else if (c == '-' && i<datalength && dataarray[i]=='-'){
  -                                     String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN,
  -                                         "DashDashInComment", null);
  -                                     // invalid: '--' in comment                   
  -                                     reportDOMError(errorHandler, error, locator, 
msg, DOMError.SEVERITY_ERROR, "wf-invalid-character");
  +                if ( XML11Char.isXML11Invalid(c) ) {
  +                    // check if this is a supplemental character
  +                    if (XMLChar.isHighSurrogate(c) && i < datalength) {
  +                        char c2 = dataarray[i++];
  +                        if (XMLChar.isLowSurrogate(c2) && 
  +                            XMLChar.isSupplemental(XMLChar.supplemental(c, c2))) {
  +                            continue;
  +                        }
  +                    }
  +                    String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN, 
  +                        "InvalidCharInComment", 
  +                        new Object [] {Integer.toString(dataarray[i-1], 16)});
  +                    reportDOMError(errorHandler, error, locator, msg, 
DOMError.SEVERITY_ERROR, "wf-invalid-character");
  +                }
  +                else if (c == '-' && i < datalength && dataarray[i] == '-') {
  +                    String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN,
  +                        "DashDashInComment", null);
  +                    // invalid: '--' in comment                   
  +                    reportDOMError(errorHandler, error, locator, msg, 
DOMError.SEVERITY_ERROR, "wf-invalid-character");
                   }
               }
  -        }//version of the document is XML 1.0
  -        else{                    
  -            //we need to check all chracters as per production rules of XML 1.0
  -            int i = 0 ;
  -            while(i < datalength){ 
  +        } // version of the document is XML 1.0
  +        else {                    
  +            // we need to check all chracters as per production rules of XML 1.0
  +            int i = 0;
  +            while (i < datalength){ 
                   char c = dataarray[i++];                           
                   if( XMLChar.isInvalid(c) ){
  -                                     String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN,
  -                                         "InvalidCharInComment", new Object [] 
{Integer.toString(dataarray[i-1], 16)});
  -                                     reportDOMError(errorHandler, error, locator, 
msg, DOMError.SEVERITY_ERROR, "wf-invalid-character");
  +                    // check if this is a supplemental character
  +                    if (XMLChar.isHighSurrogate(c) && i < datalength) {
  +                        char c2 = dataarray[i++];
  +                        if (XMLChar.isLowSurrogate(c2) && 
  +                            XMLChar.isSupplemental(XMLChar.supplemental(c, c2))) {
  +                            continue;
  +                        }
  +                    }
  +                    String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN,
  +                        "InvalidCharInComment", new Object [] 
{Integer.toString(dataarray[i-1], 16)});
  +                    reportDOMError(errorHandler, error, locator, msg, 
DOMError.SEVERITY_ERROR, "wf-invalid-character");
                   }  
                   else if (c == '-' && i<datalength && dataarray[i]=='-'){
  -                                     String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN,
  -                                         "DashDashInComment", null);
  -                                     // invalid: '--' in comment                   
  -                                     reportDOMError(errorHandler, error, locator, 
msg, DOMError.SEVERITY_ERROR, "wf-invalid-character");
  +                    String msg = 
DOMMessageFormatter.formatMessage(DOMMessageFormatter.XML_DOMAIN,
  +                        "DashDashInComment", null);
  +                    // invalid: '--' in comment                   
  +                    reportDOMError(errorHandler, error, locator, msg, 
DOMError.SEVERITY_ERROR, "wf-invalid-character");
                   }                                      
               }
  -                        
  -        }//end-else fDocument.isXMLVersion()
  +            
  +        } // end-else fDocument.isXMLVersion()
           
  -    }//isCommentWF
  +    } // isCommentWF
       
       /** NON-DOM: check if attribute value is well-formed
        * @param attributes
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to