neeraj 2003/11/17 23:34:36 Modified: java/src/org/apache/xerces/dom DOMConfigurationImpl.java DOMNormalizer.java Log: Now applications can set the value of well-formedness feature to either true or fals when normalizing document. Revision Changes Path 1.14 +16 -15 xml-xerces/java/src/org/apache/xerces/dom/DOMConfigurationImpl.java Index: DOMConfigurationImpl.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/dom/DOMConfigurationImpl.java,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- DOMConfigurationImpl.java 17 Nov 2003 13:48:40 -0000 1.13 +++ DOMConfigurationImpl.java 18 Nov 2003 07:34:36 -0000 1.14 @@ -188,7 +188,7 @@ protected final static short COMMENTS = 0x1<<5; protected final static short VALIDATE = 0x1<<6; protected final static short PSVI = 0x1<<7; - + protected final static short WELLFORMED = 0x1<<8; // components /** Symbol table. */ @@ -290,6 +290,7 @@ features |= COMMENTS; features |= CDATA; features |= SPLITCDATA; + features |= WELLFORMED; if (symbolTable == null) { symbolTable = new SymbolTable(); @@ -569,23 +570,22 @@ } else if (name.equals(Constants.DOM_ENTITIES)) { features = (short) (state ? features | ENTITIES : features & ~ENTITIES); - } else if (name.equals(Constants.DOM_SPLIT_CDATA)) { features = (short) (state ? features | SPLITCDATA : features & ~SPLITCDATA); - } else if (name.equals(Constants.DOM_VALIDATE)) { features = (short) (state ? features | VALIDATE : features & ~VALIDATE); - } + else if (name.equals(Constants.DOM_WELLFORMED)) { + features = (short) (state ? features | WELLFORMED : features & ~WELLFORMED ); + } else if (name.equals(Constants.DOM_INFOSET) || name.equals(Constants.DOM_NORMALIZE_CHARACTERS) || name.equals(Constants.DOM_CANONICAL_FORM) || name.equals(Constants.DOM_VALIDATE_IF_SCHEMA) || name.equals(Constants.DOM_CHECK_CHAR_NORMALIZATION) - //REVISIT: we need to support true value - || name.equals(Constants.DOM_WELLFORMED)) { + ) { if (state) { // true is not supported String msg = DOMMessageFormatter.formatMessage( @@ -809,13 +809,14 @@ else if (name.equals(Constants.DOM_VALIDATE)) { return (features & VALIDATE) != 0 ? Boolean.TRUE : Boolean.FALSE; } - else if (name.equals(Constants.DOM_INFOSET) + else if (name.equals(Constants.DOM_WELLFORMED)) { + return (features & WELLFORMED) != 0 ? Boolean.TRUE : Boolean.FALSE; + } + else if ( name.equals(Constants.DOM_INFOSET) || name.equals(Constants.DOM_NORMALIZE_CHARACTERS) || name.equals(Constants.DOM_CANONICAL_FORM) || name.equals(Constants.DOM_VALIDATE_IF_SCHEMA) - || name.equals(Constants.DOM_CHECK_CHAR_NORMALIZATION) - //REVISIT: currently its set to false - || name.equals(Constants.DOM_WELLFORMED) + || name.equals(Constants.DOM_CHECK_CHAR_NORMALIZATION) ) { return Boolean.FALSE; } @@ -898,7 +899,9 @@ || name.equals(Constants.DOM_ENTITIES) || name.equals(Constants.DOM_SPLIT_CDATA) || name.equals(Constants.DOM_NAMESPACES) - || name.equals(Constants.DOM_VALIDATE)) { + || name.equals(Constants.DOM_VALIDATE) + || name.equals(Constants.DOM_WELLFORMED) + ) { return true ; }//features whose parameter value can not be set to 'true' else if ( @@ -906,9 +909,7 @@ || name.equals(Constants.DOM_NORMALIZE_CHARACTERS) || name.equals(Constants.DOM_CANONICAL_FORM) || name.equals(Constants.DOM_VALIDATE_IF_SCHEMA) - || name.equals(Constants.DOM_CHECK_CHAR_NORMALIZATION) - //REVISIT: we need to support true value - || name.equals(Constants.DOM_WELLFORMED) + || name.equals(Constants.DOM_CHECK_CHAR_NORMALIZATION) ) { return (value.equals(Boolean.TRUE)) ? false : true; }//features whose parameter value can not be set to 'false' 1.43 +64 -73 xml-xerces/java/src/org/apache/xerces/dom/DOMNormalizer.java Index: DOMNormalizer.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/dom/DOMNormalizer.java,v retrieving revision 1.42 retrieving revision 1.43 diff -u -r1.42 -r1.43 --- DOMNormalizer.java 17 Nov 2003 15:26:01 -0000 1.42 +++ DOMNormalizer.java 18 Nov 2003 07:34:36 -0000 1.43 @@ -314,8 +314,11 @@ if (DEBUG_ND) { System.out.println("==>normalizeNode:{element} "+node.getNodeName()); } - //do the name check only when version of the document was changed. - if(fDocument.isXMLVersionChanged()){ + + //do the name check only when version of the document was changed & + //application has set the value of well-formed features to true + if ( ((fConfiguration.features & DOMConfigurationImpl.WELLFORMED) != 0) && + fDocument.isXMLVersionChanged()){ //take care of namespaces if(fNamespaceValidation){ //checkQName does checking based on the version of the document @@ -402,10 +405,11 @@ Attr attr = (Attr)attributes.item(i); //removeDefault(attr, attributes); attr.normalize(); - //REVISIT: As of right now we are doing checks only if the XML version changed at any moment - //but it is possible that bad XML characters enter into DOM when created in memory -- so we should - //still be doing these checks when document is loaded or modified in memory - if(fDocument.isXMLVersionChanged()){ + + //do the name check only when version of the document was changed & + //application has set the value of well-formed features to true + if ( ((fConfiguration.features & DOMConfigurationImpl.WELLFORMED) != 0) && + fDocument.isXMLVersionChanged()){ fDocument.isXMLName(node.getNodeName() , fDocument.isXML11Version()); } // XML 1.0 attribute value normalization @@ -485,15 +489,13 @@ } }//if comment node need not be removed else { - //REVISIT: As of right now we are doing checks only if the XML version changed at any moment - //but it is possible that bad XML characters enter into DOM when created in memory -- so we should - //still be doing these checks when document is loaded or modified in memory - if(fDocument.isXMLVersionChanged()){ + + //do the well-formed valid character check when application has set the value of well-formed features to true + if ( ((fConfiguration.features & DOMConfigurationImpl.WELLFORMED) != 0)){ String commentdata = ((Comment)node).getData(); //check comments for invalid xml chracter as per the version //of the document - checkInValidXMLCharacters(commentdata, fDocument.isXML11Version()); - + checkInValidXMLCharacters(commentdata, fDocument.isXML11Version()); } }//end-else if comment node is not to be removed. } @@ -501,8 +503,11 @@ if (DEBUG_ND) { System.out.println("==>normalizeNode:{entityRef} "+node.getNodeName()); } - //do the name check only when version of the document was changed. - if(fDocument.isXMLVersionChanged()){ + + //do the name check only when version of the document was changed & + //application has set the value of well-formed features to true + if ( ((fConfiguration.features & DOMConfigurationImpl.WELLFORMED) != 0) && + fDocument.isXMLVersionChanged()){ //REVISIT: checkQName takes care of the version of the document //but isXMLName doesn't.... why its so ? fDocument.isXMLName(node.getNodeName() , fDocument.isXML11Version()); @@ -551,13 +556,10 @@ //or presence of ']]>' in CDATA shouldnot affect the following checks //we should be checking for presence of valid XML characters - //REVISIT: As of right now we are doing checks only if the XML version changed at any moment - //but it is possible that bad XML characters enter into DOM when created in memory -- so we should - //still be doing these checks when document is loaded or modified in memory - - if(fDocument.isXMLVersionChanged()){ - String cdatavalue = node.getNodeValue() ; - checkInValidXMLCharacters(cdatavalue, fDocument.isXML11Version()); + + //do the well-formed valid character check when application has set the value of well-formed features to true + if ( ((fConfiguration.features & DOMConfigurationImpl.WELLFORMED) != 0) ){ + checkInValidXMLCharacters(node.getNodeValue(), fDocument.isXML11Version()); } if ((fConfiguration.features & DOMConfigurationImpl.CDATA) == 0) { @@ -621,13 +623,9 @@ ((Text)node).appendData(next.getNodeValue()); node.getParentNode().removeChild( next ); - //check the text values for valid xml character as per document version... - - //REVISIT: As of right now we are doing checks only if the XML version changed at any moment - //but it is possible that bad XML characters enter into DOM when created in memory -- so we should - //still be doing these checks when document is loaded or modified in memory - - if(fDocument.isXMLVersionChanged()){ + //check the text values for valid xml character as per document version + //when application has set the value of well-formed features to true + if ( ((fConfiguration.features & DOMConfigurationImpl.WELLFORMED) != 0) ){ checkInValidXMLCharacters(node.getNodeValue(), fDocument.isXML11Version()); } @@ -637,13 +635,9 @@ // If kid is empty, remove it node.getParentNode().removeChild( node ); } else { - //check the text values for valid xml character as per document version... - - //REVISIT: As of right now we are doing checks only if the XML version changed at any moment - //but it is possible that bad XML characters enter into DOM when created in memory -- so we should - //still be doing these checks when document is loaded or modified in memory - - if(fDocument.isXMLVersionChanged()){ + //check the text values for valid xml character as per document version... + //do the name check when application has set the value of well-formed features to true + if ( ((fConfiguration.features & DOMConfigurationImpl.WELLFORMED) != 0) ){ checkInValidXMLCharacters(node.getNodeValue(), fDocument.isXML11Version()); } @@ -683,38 +677,35 @@ break; } case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE: { - //REVISIT: As of right now we are doing checks only if the XML version changed at any moment - //but it is possible that bad XML characters enter into DOM when created in memory -- so we should - //still be doing these checks when document is loaded or modified in memory - if(!fDocument.isXMLVersionChanged()){ - break ; + //do the well-formed valid PI target name , data check when application has set the value of well-formed feature to true + if((fConfiguration.features & DOMConfigurationImpl.WELLFORMED) != 0 ){ + ProcessingInstruction pinode = (ProcessingInstruction)node ; + + String target = pinode.getTarget(); + //1.check PI target name + if(fDocument.isXML11Version()){ + + if(!XML11Char.isXML11ValidName(target)){ + //REVISIT: As per DOM it is error but as per XML spec. it is fatal error + reportDOMError("Invalid Character in node name", + DOMError.SEVERITY_FATAL_ERROR, node, "wf-invalid-character-in-node-name"); + } + } + else{ + if(!XMLChar.isValidName(target)){ + //REVISIT: As per DOM it is error but as per XML spec. it is fatal error + reportDOMError("Invalid Character in node name", + DOMError.SEVERITY_FATAL_ERROR, node, "wf-invalid-character-in-node-name"); + } + } + + //2. check PI data + //processing isntruction data may have certain characters + //which may not be valid XML character + + checkInValidXMLCharacters(pinode.getData(), fDocument.isXML11Version()); } - //processing isntruction data may have certain characters - //which may not be valid XML character - ProcessingInstruction pinode = (ProcessingInstruction)node ; - - String target = pinode.getTarget(); - //1.check PI target name - if(fDocument.isXML11Version()){ - - if(!XML11Char.isXML11ValidName(target)){ - //REVISIT: As per DOM it is error but as per XML spec. it is fatal error - reportDOMError("Invalid Character in node name", - DOMError.SEVERITY_FATAL_ERROR, node, "wf-invalid-character-in-node-name"); - } - } - else{ - if(!XMLChar.isValidName(target)){ - //REVISIT: As per DOM it is error but as per XML spec. it is fatal error - reportDOMError("Invalid Character in node name", - DOMError.SEVERITY_FATAL_ERROR, node, "wf-invalid-character-in-node-name"); - } - } - - //2. check PI data - checkInValidXMLCharacters(pinode.getData(), fDocument.isXML11Version()); - }//end case Node.PROCESSING_INSTRUCTION_NODE }//end of switch @@ -762,11 +753,11 @@ // Record all valid local declarations for (int k=0; k < attributes.getLength(); k++) { Attr attr = (Attr)attributes.getItem(k); - - //REVISIT: As of right now we are doing checks only if the XML version changed at any moment - //but it is possible that bad XML characters enter into DOM when created in memory -- so we should - //still be doing these checks when document is loaded or modified in memory - if(fDocument.isXMLVersionChanged()){ + + //do the name check only when version of the document was changed & + //application has set the value of well-formed features to true + if ( ((fConfiguration.features & DOMConfigurationImpl.WELLFORMED) != 0) && + fDocument.isXMLVersionChanged()){ //checkQName does checking based on the version of the document fDocument.checkQName(attr.getPrefix() , attr.getLocalName()) ; } @@ -1014,7 +1005,7 @@ int i = 0 ; while(i < datalength){ if(XML11Char.isXML11Invalid(dataarray[i++])){ - String msg = "Invalid XML Character " + Integer.toString(dataarray[i-1], 16) ; + String msg = "Invalid XML Character " + "'" + Integer.toString(dataarray[i-1], 16) + "'" + "in the DOM." ; //REVISIT: As per DOM it is error but as per XML spec. it is fatal error reportDOMError(msg, DOMError.SEVERITY_FATAL_ERROR, null, "wf-invalid-character"); @@ -1027,7 +1018,7 @@ int i = 0 ; while(i < datalength){ if( XMLChar.isInvalid(dataarray[i++]) ){ - String msg = "Invalid XML Character " + Integer.toString(dataarray[i-1], 16) ; + String msg = "Invalid XML Character " + "'" + Integer.toString(dataarray[i-1], 16) + "'" + "in the DOM." ; //REVISIT: As per DOM it is error but as per XML spec. it is fatal error reportDOMError(msg, DOMError.SEVERITY_FATAL_ERROR, null, "wf-invalid-character");
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]