dom DOMNormalizer.java

neeraj Mon, 17 Nov 2003 07:59:13 -0800

neeraj      2003/11/17 06:55:28

  Modified:    java/src/org/apache/xerces/dom DOMNormalizer.java
  Log:
  Adding further changes for well-formed feature support. Adding valid
  xml character checks for CDATA and TEXT nodes. Added new function
  checkInvaliXMLdCharactes to check valid XML characters as per the version of the 
document.
  
  Revision  Changes    Path
  1.41      +106 -89   xml-xerces/java/src/org/apache/xerces/dom/DOMNormalizer.java
  
  Index: DOMNormalizer.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/dom/DOMNormalizer.java,v
  retrieving revision 1.40
  retrieving revision 1.41
  diff -u -r1.40 -r1.41
  --- DOMNormalizer.java        17 Nov 2003 10:53:07 -0000      1.40
  +++ DOMNormalizer.java        17 Nov 2003 14:55:28 -0000      1.41
  @@ -484,58 +484,16 @@
                       }
                   }//if comment node need not be removed
                   else {
  -                    //REVISIT: it is possible that bad XML characters
  -                    //enter into DOM when created in memory -- so we should
  -                    //still be doing these checks.
  -                    
  -                    //go ahead only if version didn't change.
  -                    if(!fDocument.isXMLVersionChanged()){
  -                        return null;
  -                    }
  -                    //check comments for invalid xml chracter as per the version
  -                    //of the document
  -                    String commentdata = ((Comment)node).getData();
  -                    char [] commentarray = null ;
  -                    if(commentdata != null && commentdata.length() > 0){
  -                        commentarray = commentdata.toCharArray();
  -                    }
  -                    else{
  -                        return null ;
  -                    }
  -                    if (DEBUG_ND) {
  -                    }
  -                    
  -                    //version of the document is XML 1.1
  -                    if(fDocument.isXML11Version()){
  -                        
  -                        // check  comment data
  -                        //we need to check all chracters as per production rules
  -                        //of XML11
  -                        int i = 0 ;
  -                        while(i < commentarray.length){                            
  -                            if(XML11Char.isXML11Invalid(commentarray[i++])){
  -                                String msg = "Invalid XML Character " + 
Integer.toString(commentarray[i-1], 16) ;
  -                                //REVISIT: As per DOM it is error but as per XML 
spec. it is fatal error
  -                                reportDOMError(msg,
  -                                    DOMError.SEVERITY_FATAL_ERROR, node,  
"wf-invalid-character");
  -                            };
  -                        }
  -                    }//version of the document is XML 1.0
  -                    else{
  +                    //REVISIT: As of right now we are doing checks only if the XML 
version changed at any moment
  +                    //but it is possible that bad XML characters enter into DOM 
when created in memory -- so we should
  +                    //still be doing these checks when document is loaded or 
modified in memory                    
  +                    if(fDocument.isXMLVersionChanged()){
  +                        String commentdata = ((Comment)node).getData();
  +                        //check comments for invalid xml chracter as per the version
  +                        //of the document                            
  +                        checkInValidXMLCharacters(commentdata, 
fDocument.isXML11Version());
                           
  -                        // check comment data
  -                        //we need to check all chracters as per production rules
  -                        //of XML 1.0
  -                        int i = 0 ;
  -                        while(i < commentarray.length){                            
  -                            if( XMLChar.isInvalid(commentarray[i++]) ){             
                   
  -                                String msg = "Invalid XML Character " + 
Integer.toString(commentarray[i-1], 16) ;
  -                                //REVISIT: As per DOM it is error but as per XML 
spec. it is fatal error                               
  -                                reportDOMError(msg, 
  -                                    DOMError.SEVERITY_FATAL_ERROR, node,  
"wf-invalid-character");
  -                            };
  -                        }            
  -                    }//end-else fDocument.isXMLVersion()
  +                    }
                   }//end-else if comment node is not to be removed.                   
                             
               }
           case Node.ENTITY_REFERENCE_NODE: { 
  @@ -575,6 +533,32 @@
                   if (DEBUG_ND) {
                       System.out.println("==>normalizeNode:{cdata}");
                   }
  +                
  +                //1. Wether we are converting CDATA nodes to text nodes or not...
  +                //we should be checking the node value in any case for valid XML 
character
  +                    
  +                //2. it is  possible to have the character sequence "]]>" in the 
content, 
  +                //which is illegal in a CDATA section per section 2.7 of [XML 1.0]. 
  +                //But DOM Says that the presence of this character sequence must 
generate a fatal error 
  +                //only during _serialization_ or the cdata section must be splitted 
before the 
  +                //serialization (see also the parameter "split-cdata-sections" in 
the DOMConfiguration interface). 
  +                
  +                //2a We dont need to generate fatal error when the DOM is in 
memory, this 
  +                //would be taken care in serializer code
  +                
  +                //2b CDATA section splition is taken care down depending on the 
feature value
  +                //or presence of ']]>' in CDATA shouldnot affect the following 
checks
  +                //we should be checking for presence of valid XML characters
  +                
  +                //REVISIT: As of right now we are doing checks only if the XML 
version changed at any moment
  +                //but it is possible that bad XML characters enter into DOM when 
created in memory -- so we should
  +                //still be doing these checks when document is loaded or modified 
in memory
  +                
  +                if(fDocument.isXMLVersionChanged()){
  +                    String cdatavalue = node.getNodeValue() ;                       
                 
  +                    checkInValidXMLCharacters(cdatavalue, 
fDocument.isXML11Version());                    
  +                }
  +                
                   if ((fConfiguration.features & DOMConfigurationImpl.CDATA) == 0) {
                       // convert CDATA to TEXT nodes
                       Text text = fDocument.createTextNode(node.getNodeValue());
  @@ -635,11 +619,33 @@
                   if ( next!=null && next.getNodeType() == Node.TEXT_NODE ) {
                       ((Text)node).appendData(next.getNodeValue());
                       node.getParentNode().removeChild( next );
  +                    
  +                    //check the text values for valid xml character as per document 
version...                                                
  +                    
  +                    //REVISIT: As of right now we are doing checks only if the XML 
version changed at any moment
  +                    //but it is possible that bad XML characters enter into DOM 
when created in memory -- so we should
  +                    //still be doing these checks when document is loaded or 
modified in memory                    
  +                    
  +                    if(fDocument.isXMLVersionChanged()){                            
                                                   
  +                        checkInValidXMLCharacters(node.getNodeValue(), 
fDocument.isXML11Version());
  +                    }
  +                    
                       return node; // Don't advance;
  +                    
                   } else if (node.getNodeValue().length()==0) {
                       // If kid is empty, remove it
                       node.getParentNode().removeChild( node );
  -                } else {
  +                } else {                    
  +                    //check the text values for valid xml character as per document 
version...                                                
  +                    
  +                    //REVISIT: As of right now we are doing checks only if the XML 
version changed at any moment
  +                    //but it is possible that bad XML characters enter into DOM 
when created in memory -- so we should
  +                    //still be doing these checks when document is loaded or 
modified in memory                    
  +                    
  +                    if(fDocument.isXMLVersionChanged()){                            
                                                   
  +                        checkInValidXMLCharacters(node.getNodeValue(), 
fDocument.isXML11Version());
  +                    }
  +                    
                       // validator.characters() call
                       // Don't send characters in the following cases:
                       // 1. entities is false, next child is entity reference: expand 
tree first
  @@ -676,11 +682,10 @@
                   break;
               }        
           case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE: {
  -            //REVISIT: DOM created in memory may contain invalid            
  -            // xml characters which we should be checking -- so 
  -            //we should also be checking in the case when document
  -            //is created in memory and after that application calls
  -            //normalizeDocument()
  +            //REVISIT: As of right now we are doing checks only if the XML version 
changed at any moment
  +            //but it is possible that bad XML characters enter into DOM when 
created in memory -- so we should
  +            //still be doing these checks when document is loaded or modified in 
memory
  +            
               if(!fDocument.isXMLVersionChanged()){
                   break ;
               }
  @@ -689,49 +694,25 @@
               ProcessingInstruction pinode = (ProcessingInstruction)node ;
               
               String target = pinode.getTarget();
  -            String pidata = pinode.getData() ;
  -            char [] pidataarray = pidata.toCharArray() ;
  +            //1.check PI target name
               if(fDocument.isXML11Version()){
  -                //1. check pi targetname 
  +                
                   if(!XML11Char.isXML11ValidName(target)){
                           //REVISIT: As per DOM it is error but as per XML spec. it 
is fatal error
                           reportDOMError("Invalid Character in node name",
                               DOMError.SEVERITY_FATAL_ERROR, node,  
"wf-invalid-character-in-node-name");                
                   }
  -                //2. check pi data
  -                //we need to check all chracters as per production rules
  -                //of XML11
  -                int i = 0 ;
  -                while(i < pidataarray.length){
  -                    if(XML11Char.isXML11Invalid(pidataarray[i++])){
  -                        //REVISIT: As per DOM it is error but as per XML spec. it 
is fatal error
  -                        reportDOMError("Invalid Character",
  -                            DOMError.SEVERITY_FATAL_ERROR, node,  
"wf-invalid-character");
  -                    };
  -                }
               }
  -            else{
  -                //1. check pi targetname 
  +            else{                
                   if(!XMLChar.isValidName(target)){
                           //REVISIT: As per DOM it is error but as per XML spec. it 
is fatal error
                           reportDOMError("Invalid Character in node name",
                               DOMError.SEVERITY_FATAL_ERROR, node,  
"wf-invalid-character-in-node-name");                
  -                }
  -                //2. check pi data
  -                //we need to check all chracters as per production rules
  -                //of XML 1.0
  -                
  -                //we need to check all chracters as per production rules
  -                //of XML1.0
  -                int i = 0 ;
  -                while(i < pidataarray.length){
  -                    if( XMLChar.isValid(pidataarray[i++]) ){
  -                        //REVISIT: As per DOM it is error but as per XML spec. it 
is fatal error
  -                        reportDOMError("Invalid Character",
  -                            DOMError.SEVERITY_FATAL_ERROR, node,  
"wf-invalid-character");
  -                    };
  -                }            
  +                }                
               }
  +                        
  +           //2. check PI data
  +           checkInValidXMLCharacters(pinode.getData(), fDocument.isXML11Version());
               
           }//end case Node.PROCESSING_INSTRUCTION_NODE
           
  @@ -1040,7 +1021,42 @@
           } // end loop for attributes
       }
   
  +    //check for valid xml charactsr as per the XML version 
  +    public void checkInValidXMLCharacters(String datavalue, boolean isXML11Version)
  +    {
  +        if(datavalue == null || (datavalue.length() == 0) ) return ;
  +                
  +        char [] dataarray = datavalue.toCharArray(); 
  +        int datalength = dataarray.length ;
   
  +        //version of the document is XML 1.1
  +        if(isXML11Version){                    
  +            //we need to check all chracters as per production rules of XML11
  +            int i = 0 ;
  +            while(i < datalength){                            
  +                if(XML11Char.isXML11Invalid(dataarray[i++])){
  +                    String msg = "Invalid XML Character " + 
Integer.toString(dataarray[i-1], 16) ;
  +                    //REVISIT: As per DOM it is error but as per XML spec. it is 
fatal error
  +                    reportDOMError(msg,
  +                        DOMError.SEVERITY_FATAL_ERROR, null,  
"wf-invalid-character");
  +                                        
  +                };
  +            }
  +        }//version of the document is XML 1.0
  +        else{                    
  +            //we need to check all chracters as per production rules of XML 1.0
  +            int i = 0 ;
  +            while(i < datalength){                            
  +                if( XMLChar.isInvalid(dataarray[i++]) ){
  +                    String msg = "Invalid XML Character " + 
Integer.toString(dataarray[i-1], 16) ;                    
  +                    //REVISIT: As per DOM it is error but as per XML spec. it is 
fatal error
  +                    reportDOMError(msg,
  +                        DOMError.SEVERITY_FATAL_ERROR, null,  
"wf-invalid-character");                                        
  +                };
  +            }            
  +        }//end-else fDocument.isXMLVersion()
  +        
  +    }//checkforValidXMLCharacter
   
   
       /**
  @@ -1762,4 +1778,5 @@
           return null;
       }
   
  +    
   }  // DOMNormalizer class


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: xml-xerces/java/src/org/apache/xerces/dom DOMNormalizer.java

Reply via email to