neeraj 2003/11/17 06:55:28 Modified: java/src/org/apache/xerces/dom DOMNormalizer.java Log: Adding further changes for well-formed feature support. Adding valid xml character checks for CDATA and TEXT nodes. Added new function checkInvaliXMLdCharactes to check valid XML characters as per the version of the document. Revision Changes Path 1.41 +106 -89 xml-xerces/java/src/org/apache/xerces/dom/DOMNormalizer.java Index: DOMNormalizer.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/dom/DOMNormalizer.java,v retrieving revision 1.40 retrieving revision 1.41 diff -u -r1.40 -r1.41 --- DOMNormalizer.java 17 Nov 2003 10:53:07 -0000 1.40 +++ DOMNormalizer.java 17 Nov 2003 14:55:28 -0000 1.41 @@ -484,58 +484,16 @@ } }//if comment node need not be removed else { - //REVISIT: it is possible that bad XML characters - //enter into DOM when created in memory -- so we should - //still be doing these checks. - - //go ahead only if version didn't change. - if(!fDocument.isXMLVersionChanged()){ - return null; - } - //check comments for invalid xml chracter as per the version - //of the document - String commentdata = ((Comment)node).getData(); - char [] commentarray = null ; - if(commentdata != null && commentdata.length() > 0){ - commentarray = commentdata.toCharArray(); - } - else{ - return null ; - } - if (DEBUG_ND) { - } - - //version of the document is XML 1.1 - if(fDocument.isXML11Version()){ - - // check comment data - //we need to check all chracters as per production rules - //of XML11 - int i = 0 ; - while(i < commentarray.length){ - if(XML11Char.isXML11Invalid(commentarray[i++])){ - String msg = "Invalid XML Character " + Integer.toString(commentarray[i-1], 16) ; - //REVISIT: As per DOM it is error but as per XML spec. it is fatal error - reportDOMError(msg, - DOMError.SEVERITY_FATAL_ERROR, node, "wf-invalid-character"); - }; - } - }//version of the document is XML 1.0 - else{ + //REVISIT: As of right now we are doing checks only if the XML version changed at any moment + //but it is possible that bad XML characters enter into DOM when created in memory -- so we should + //still be doing these checks when document is loaded or modified in memory + if(fDocument.isXMLVersionChanged()){ + String commentdata = ((Comment)node).getData(); + //check comments for invalid xml chracter as per the version + //of the document + checkInValidXMLCharacters(commentdata, fDocument.isXML11Version()); - // check comment data - //we need to check all chracters as per production rules - //of XML 1.0 - int i = 0 ; - while(i < commentarray.length){ - if( XMLChar.isInvalid(commentarray[i++]) ){ - String msg = "Invalid XML Character " + Integer.toString(commentarray[i-1], 16) ; - //REVISIT: As per DOM it is error but as per XML spec. it is fatal error - reportDOMError(msg, - DOMError.SEVERITY_FATAL_ERROR, node, "wf-invalid-character"); - }; - } - }//end-else fDocument.isXMLVersion() + } }//end-else if comment node is not to be removed. } case Node.ENTITY_REFERENCE_NODE: { @@ -575,6 +533,32 @@ if (DEBUG_ND) { System.out.println("==>normalizeNode:{cdata}"); } + + //1. Wether we are converting CDATA nodes to text nodes or not... + //we should be checking the node value in any case for valid XML character + + //2. it is possible to have the character sequence "]]>" in the content, + //which is illegal in a CDATA section per section 2.7 of [XML 1.0]. + //But DOM Says that the presence of this character sequence must generate a fatal error + //only during _serialization_ or the cdata section must be splitted before the + //serialization (see also the parameter "split-cdata-sections" in the DOMConfiguration interface). + + //2a We dont need to generate fatal error when the DOM is in memory, this + //would be taken care in serializer code + + //2b CDATA section splition is taken care down depending on the feature value + //or presence of ']]>' in CDATA shouldnot affect the following checks + //we should be checking for presence of valid XML characters + + //REVISIT: As of right now we are doing checks only if the XML version changed at any moment + //but it is possible that bad XML characters enter into DOM when created in memory -- so we should + //still be doing these checks when document is loaded or modified in memory + + if(fDocument.isXMLVersionChanged()){ + String cdatavalue = node.getNodeValue() ; + checkInValidXMLCharacters(cdatavalue, fDocument.isXML11Version()); + } + if ((fConfiguration.features & DOMConfigurationImpl.CDATA) == 0) { // convert CDATA to TEXT nodes Text text = fDocument.createTextNode(node.getNodeValue()); @@ -635,11 +619,33 @@ if ( next!=null && next.getNodeType() == Node.TEXT_NODE ) { ((Text)node).appendData(next.getNodeValue()); node.getParentNode().removeChild( next ); + + //check the text values for valid xml character as per document version... + + //REVISIT: As of right now we are doing checks only if the XML version changed at any moment + //but it is possible that bad XML characters enter into DOM when created in memory -- so we should + //still be doing these checks when document is loaded or modified in memory + + if(fDocument.isXMLVersionChanged()){ + checkInValidXMLCharacters(node.getNodeValue(), fDocument.isXML11Version()); + } + return node; // Don't advance; + } else if (node.getNodeValue().length()==0) { // If kid is empty, remove it node.getParentNode().removeChild( node ); - } else { + } else { + //check the text values for valid xml character as per document version... + + //REVISIT: As of right now we are doing checks only if the XML version changed at any moment + //but it is possible that bad XML characters enter into DOM when created in memory -- so we should + //still be doing these checks when document is loaded or modified in memory + + if(fDocument.isXMLVersionChanged()){ + checkInValidXMLCharacters(node.getNodeValue(), fDocument.isXML11Version()); + } + // validator.characters() call // Don't send characters in the following cases: // 1. entities is false, next child is entity reference: expand tree first @@ -676,11 +682,10 @@ break; } case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE: { - //REVISIT: DOM created in memory may contain invalid - // xml characters which we should be checking -- so - //we should also be checking in the case when document - //is created in memory and after that application calls - //normalizeDocument() + //REVISIT: As of right now we are doing checks only if the XML version changed at any moment + //but it is possible that bad XML characters enter into DOM when created in memory -- so we should + //still be doing these checks when document is loaded or modified in memory + if(!fDocument.isXMLVersionChanged()){ break ; } @@ -689,49 +694,25 @@ ProcessingInstruction pinode = (ProcessingInstruction)node ; String target = pinode.getTarget(); - String pidata = pinode.getData() ; - char [] pidataarray = pidata.toCharArray() ; + //1.check PI target name if(fDocument.isXML11Version()){ - //1. check pi targetname + if(!XML11Char.isXML11ValidName(target)){ //REVISIT: As per DOM it is error but as per XML spec. it is fatal error reportDOMError("Invalid Character in node name", DOMError.SEVERITY_FATAL_ERROR, node, "wf-invalid-character-in-node-name"); } - //2. check pi data - //we need to check all chracters as per production rules - //of XML11 - int i = 0 ; - while(i < pidataarray.length){ - if(XML11Char.isXML11Invalid(pidataarray[i++])){ - //REVISIT: As per DOM it is error but as per XML spec. it is fatal error - reportDOMError("Invalid Character", - DOMError.SEVERITY_FATAL_ERROR, node, "wf-invalid-character"); - }; - } } - else{ - //1. check pi targetname + else{ if(!XMLChar.isValidName(target)){ //REVISIT: As per DOM it is error but as per XML spec. it is fatal error reportDOMError("Invalid Character in node name", DOMError.SEVERITY_FATAL_ERROR, node, "wf-invalid-character-in-node-name"); - } - //2. check pi data - //we need to check all chracters as per production rules - //of XML 1.0 - - //we need to check all chracters as per production rules - //of XML1.0 - int i = 0 ; - while(i < pidataarray.length){ - if( XMLChar.isValid(pidataarray[i++]) ){ - //REVISIT: As per DOM it is error but as per XML spec. it is fatal error - reportDOMError("Invalid Character", - DOMError.SEVERITY_FATAL_ERROR, node, "wf-invalid-character"); - }; - } + } } + + //2. check PI data + checkInValidXMLCharacters(pinode.getData(), fDocument.isXML11Version()); }//end case Node.PROCESSING_INSTRUCTION_NODE @@ -1040,7 +1021,42 @@ } // end loop for attributes } + //check for valid xml charactsr as per the XML version + public void checkInValidXMLCharacters(String datavalue, boolean isXML11Version) + { + if(datavalue == null || (datavalue.length() == 0) ) return ; + + char [] dataarray = datavalue.toCharArray(); + int datalength = dataarray.length ; + //version of the document is XML 1.1 + if(isXML11Version){ + //we need to check all chracters as per production rules of XML11 + int i = 0 ; + while(i < datalength){ + if(XML11Char.isXML11Invalid(dataarray[i++])){ + String msg = "Invalid XML Character " + Integer.toString(dataarray[i-1], 16) ; + //REVISIT: As per DOM it is error but as per XML spec. it is fatal error + reportDOMError(msg, + DOMError.SEVERITY_FATAL_ERROR, null, "wf-invalid-character"); + + }; + } + }//version of the document is XML 1.0 + else{ + //we need to check all chracters as per production rules of XML 1.0 + int i = 0 ; + while(i < datalength){ + if( XMLChar.isInvalid(dataarray[i++]) ){ + String msg = "Invalid XML Character " + Integer.toString(dataarray[i-1], 16) ; + //REVISIT: As per DOM it is error but as per XML spec. it is fatal error + reportDOMError(msg, + DOMError.SEVERITY_FATAL_ERROR, null, "wf-invalid-character"); + }; + } + }//end-else fDocument.isXMLVersion() + + }//checkforValidXMLCharacter /** @@ -1762,4 +1778,5 @@ return null; } + } // DOMNormalizer class
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]