peiyongz 2003/01/28 10:31:47 Modified: c/src/xercesc/dom/impl DOMWriterImpl.hpp DOMWriterImpl.cpp Log: Bug#13694: Allow Xerces to write the BOM to XML files Revision Changes Path 1.12 +9 -1 xml-xerces/c/src/xercesc/dom/impl/DOMWriterImpl.hpp Index: DOMWriterImpl.hpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/dom/impl/DOMWriterImpl.hpp,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- DOMWriterImpl.hpp 20 Jan 2003 16:50:13 -0000 1.11 +++ DOMWriterImpl.hpp 28 Jan 2003 18:31:47 -0000 1.12 @@ -57,6 +57,9 @@ /* * $Id$ * $Log$ + * Revision 1.12 2003/01/28 18:31:47 peiyongz + * Bug#13694: Allow Xerces to write the BOM to XML files + * * Revision 1.11 2003/01/20 16:50:13 tng * DOMWriter fix: * 1. wrong wrong nested cdata message @@ -415,6 +418,8 @@ void printIndent(int level) const; //does the actual work for processNode while keeping track of the level void processNode(const DOMNode* const nodeToWrite, int level); + + void processBOM(); // ----------------------------------------------------------------------- // Private data members 1.29 +74 -4 xml-xerces/c/src/xercesc/dom/impl/DOMWriterImpl.cpp Index: DOMWriterImpl.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/dom/impl/DOMWriterImpl.cpp,v retrieving revision 1.28 retrieving revision 1.29 diff -u -r1.28 -r1.29 --- DOMWriterImpl.cpp 24 Jan 2003 20:21:46 -0000 1.28 +++ DOMWriterImpl.cpp 28 Jan 2003 18:31:47 -0000 1.29 @@ -57,6 +57,9 @@ /* * $Id$ * $Log$ + * Revision 1.29 2003/01/28 18:31:47 peiyongz + * Bug#13694: Allow Xerces to write the BOM to XML files + * * Revision 1.28 2003/01/24 20:21:46 tng * DOMWriter: Call XMLFormatTarget::flush when done. * @@ -181,6 +184,7 @@ static const int SPLIT_CDATA_SECTIONS_ID = 0x5; static const int VALIDATION_ID = 0x6; static const int WHITESPACE_IN_ELEMENT_CONTENT_ID = 0x7; +static const int BYTE_ORDER_MARK_ID = 0x8; // feature true false // ================================================================================ @@ -207,7 +211,8 @@ false, true, // normalize-characters true, true, // split-cdata-sections false, true, // validation - true, false // whitespace-in-element-content + true, false, // whitespace-in-element-content + true, true // byte-order-mark }; // default end-of-line sequence @@ -370,6 +375,11 @@ chLatin_e, chSingleQuote, chLF, chNull }; +static const XMLByte BOM_utf16be[] = {(XMLByte)0xFE, (XMLByte)0xFF, (XMLByte) 0}; +static const XMLByte BOM_utf16le[] = {(XMLByte)0xFF, (XMLByte)0xFE, (XMLByte) 0}; +static const XMLByte BOM_ucs4be[] = {(XMLByte)0x00, (XMLByte)0x00, (XMLByte)0xFE, (XMLByte)0xFF, (XMLByte) 0}; +static const XMLByte BOM_ucs4le[] = {(XMLByte)0xFF, (XMLByte)0xFE, (XMLByte)0x00, (XMLByte)0x00, (XMLByte) 0}; + // // Notification of the error though error handler // @@ -426,11 +436,12 @@ setFeature(CANONICAL_FORM_ID, false); setFeature(DISCARD_DEFAULT_CONTENT_ID, true ); setFeature(ENTITIES_ID, true ); - setFeature(FORMAT_PRETTY_PRINT_ID, false); + setFeature(FORMAT_PRETTY_PRINT_ID, false); setFeature(NORMALIZE_CHARACTERS_ID, false); setFeature(SPLIT_CDATA_SECTIONS_ID, true ); setFeature(VALIDATION_ID, false); setFeature(WHITESPACE_IN_ELEMENT_CONTENT_ID, true ); + setFeature(BYTE_ORDER_MARK_ID, false); } bool DOMWriterImpl::canSetFeature(const XMLCh* const featName @@ -778,9 +789,13 @@ case DOMNode::DOCUMENT_NODE: // Not to be shown to Filter { + + // output BOM if needed + processBOM(); + setURCharRef(); const DOMDocument *docu = (const DOMDocument*)nodeToWrite; - + //[23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' //[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') //[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName @@ -1220,6 +1235,8 @@ featureId = VALIDATION_ID; else if (XMLString::equals(featName, XMLUni::fgDOMWRTWhitespaceInElementContent)) featureId = WHITESPACE_IN_ELEMENT_CONTENT_ID; + else if (XMLString::equals(featName, XMLUni::fgDOMWRTBOM)) + featureId = BYTE_ORDER_MARK_ID; //feature name not resolvable if (featureId == INVALID_FEATURE_ID) @@ -1478,6 +1495,59 @@ { DOMWriterImpl* writer = (DOMWriterImpl*) this; delete writer; +} + +void DOMWriterImpl::processBOM() +{ + // if the feature is not set, don't output bom + if (!getFeature(BYTE_ORDER_MARK_ID)) + return; + + if ((XMLString::compareIString(fEncoding, XMLUni::fgUTF16LEncodingString) == 0) || + (XMLString::compareIString(fEncoding, XMLUni::fgUTF16LEncodingString2) == 0) ) + { + fFormatter->writeBOM(BOM_utf16le, 2); + } + else if ((XMLString::compareIString(fEncoding, XMLUni::fgUTF16BEncodingString) == 0) || + (XMLString::compareIString(fEncoding, XMLUni::fgUTF16BEncodingString2) == 0) ) + { + fFormatter->writeBOM(BOM_utf16be, 2); + } + else if ((XMLString::compareIString(fEncoding, XMLUni::fgUTF16EncodingString) == 0) || + (XMLString::compareIString(fEncoding, XMLUni::fgUTF16EncodingString2) == 0) || + (XMLString::compareIString(fEncoding, XMLUni::fgUTF16EncodingString3) == 0) || + (XMLString::compareIString(fEncoding, XMLUni::fgUTF16EncodingString4) == 0) || + (XMLString::compareIString(fEncoding, XMLUni::fgUTF16EncodingString5) == 0) ) + { +#if defined(ENDIANMODE_LITTLE) + fFormatter->writeBOM(BOM_utf16le, 2); +#elif defined(ENDIANMODE_BIG) + fFormatter->writeBOM(BOM_utf16be, 2); +#endif + } + else if ((XMLString::compareIString(fEncoding, XMLUni::fgUCS4LEncodingString) == 0) || + (XMLString::compareIString(fEncoding, XMLUni::fgUCS4LEncodingString2) == 0) ) + { + fFormatter->writeBOM(BOM_ucs4le, 4); + } + else if ((XMLString::compareIString(fEncoding, XMLUni::fgUCS4BEncodingString) == 0) || + (XMLString::compareIString(fEncoding, XMLUni::fgUCS4BEncodingString2) == 0) ) + { + fFormatter->writeBOM(BOM_ucs4be, 4); + } + else if ((XMLString::compareIString(fEncoding, XMLUni::fgUCS4EncodingString) == 0) || + (XMLString::compareIString(fEncoding, XMLUni::fgUCS4EncodingString2) == 0) || + (XMLString::compareIString(fEncoding, XMLUni::fgUCS4EncodingString3) == 0) ) + { +#if defined(ENDIANMODE_LITTLE) + fFormatter->writeBOM(BOM_ucs4le, 4); +#elif defined(ENDIANMODE_BIG) + fFormatter->writeBOM(BOM_ucs4be, 4); +#endif + } + + return; + } XERCES_CPP_NAMESPACE_END
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]