Hello, I try without success to validate a xml file that refers to an xsd file using sax parser provided by Xerces-C++ ?
The following code does not work ( parsing a not well-formed xml will report error as expected, but an invalid xml - with respect to xsd - will not report error ) : XMLPlatformUtils::Initialize (); SAXParser * pParser = new SAXParser (); if ( pParser ) { pParser -> setDoSchema ( true ); pParser -> setLoadSchema ( true ); pParser -> setExternalNoNamespaceSchemaLocation ( xsdFile.c_str () ); pParser -> setValidationScheme ( SAXParser::Val_Auto ); pParser -> setValidationSchemaFullChecking ( true ); pParser -> setValidationConstraintFatal ( true ); SaxParserHandler handler; // SaxParserHandler derives from HandlerBase pParser -> setDocumentHandler ( &handler ); pParser -> setErrorHandler ( &handler ); pParser -> parse ( xmlFile.c_str () ); delete pParser; pParser = NULL; } XMLPlatformUtils::Terminate (); I want the parse to stop if the xml is not compliant with the xsd. Bonus if I don't have to handle "by hand" the xsd ( setExternalNoNamespaceSchemaLocation ) : is there a way to tell the parser to validate automatically the xml with xsd in case xml refers to a xsd ? Can somebody help me on that ? FH
<?xml version="1.0" encoding="UTF-8"?> <!-- define header document --> <!-- a well-formed document must be such that : 1. each children tag has only one parent tag 2. each children tag must be fully included in his parent tag N.B. : a root tag must exist to contain the whole xml document --> <!-- a valid document must be such that the grammar (dtd or xsd) is respected --> <helloworld xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="Hello.xsd" > <!-- define body document and associate it to xsd file --> <hello when="morning"> <msg> Hello and have a good day ! </msg> </hello> <hello when="afternoon"> <msg> See you later and have a good afternoon ! </msg> </hello> <hello when="evening"> <msg> Bye and have a good evening ! </msg> </hello> </helloworld>
Hello.xsd
Description: XML document
<?xml version="1.0" encoding="UTF-8"?> <!-- define header document --> <!-- a well-formed document must be such that : 1. each children tag has only one parent tag 2. each children tag must be fully included in his parent tag N.B. : a root tag must exist to contain the whole xml document --> <!-- a valid document must be such that the grammar (dtd or xsd) is respected --> <helloworld xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="Hello.xsd" > <!-- define body document and associate it to xsd file --> <hello when="morning"> <msg> Hello and have a good day ! </msg> </hello> <hello> <msg> See you later and have a good afternoon ! </msg> </hello> <hello when="evening"> <msg> Bye and have a good evening ! </msg> </hello> </helloworld>
#include "SaxParserHandler.h" #include <xercesc/sax/AttributeList.hpp> // xerces #include <xercesc/util/XMLString.hpp> // xerces #include <iostream> // input / output using namespace std; SaxParserHandler::SaxParserHandler () : HandlerBase () { } SaxParserHandler::~SaxParserHandler () { } void SaxParserHandler::startElement ( const XMLCh * const ipName, AttributeList & ioLsAttr ) { char * pMsg = XMLString::transcode ( ipName ); if ( !pMsg ) return; cout << "Parsing element : " << pMsg; XMLString::release ( &pMsg ); for ( XMLSize_t i = 0; i < ioLsAttr.getLength () ; i++ ) { char * pAttrName = XMLString::transcode ( ioLsAttr.getName ( i ) ); char * pAttrVal = XMLString::transcode ( ioLsAttr.getValue ( i ) ); if ( pAttrName && pAttrVal ) cout << " [ " << pAttrName << "=" << pAttrVal << " ]"; if ( pAttrName ) XMLString::release ( &pAttrName ); if ( pAttrVal ) XMLString::release ( &pAttrVal ); } cout << endl; } void SaxParserHandler::error ( const SAXParseException & iExc ) { char * pMsg = XMLString::transcode ( iExc.getMessage () ); if ( !pMsg ) return; cout << "Fatal Error at line " << iExc.getLineNumber () << " : " << pMsg << endl; XMLString::release ( &pMsg ); } void SaxParserHandler::fatalError ( const SAXParseException & iExc ) { error ( iExc ); }
#ifndef SaxParserHandler_H #define SaxParserHandler_H #include <xercesc/sax/HandlerBase.hpp> // xerces : customize parser behavior with handler using namespace xercesc; /* The SAX parser presents each node of the XML document in sequence. So when you process one node, you must have already stored information about any relevant previous nodes, and you have no information at that time about subsequent nodes. The SAX parser uses less memory than the DOM parser and it is a suitable abstraction for documents that can be processed sequentially rather than as a whole. N.B. : The DOM parser parses the whole document at once and stores the whole document structure in memory. The DOM parser enables to access any part of the document at any time, but uses a relatively large amount of memory. */ class SaxParserHandler : public HandlerBase { public: SaxParserHandler (); virtual ~SaxParserHandler (); void startElement ( const XMLCh * const ipName, AttributeList & ioLsAttr ); // From DocumentHandler void error ( const SAXParseException & ); // From ErrorHandler void fatalError ( const SAXParseException & ); // From ErrorHandler }; #endif
#include <iostream> // input / output #include <stdexcept> // exception #include <string> // string #include <cstddef> // constants #include <xercesc/util/PlatformUtils.hpp> // xerces initialize / terminate #include <xercesc/parsers/SAXParser.hpp> // xerces sax parser #include "SaxParserHandler.h" using namespace std; using namespace xercesc; int main ( int argc, char **argv ) { int rc = 1; try { // Check arguments if ( argc < 3 ) { cerr << "XmlChecker ERROR - needs 2 argument : xml file, xsd file" << endl; return rc; } string xmlFile = argv[1]; string xsdFile = argv[2]; // Parse XML file XMLPlatformUtils::Initialize (); SAXParser * pParser = new SAXParser (); if ( pParser ) { pParser -> setDoSchema ( true ); pParser -> setLoadSchema ( true ); pParser -> setExternalNoNamespaceSchemaLocation ( xsdFile.c_str () ); pParser -> setValidationScheme ( SAXParser::Val_Auto ); pParser -> setValidationSchemaFullChecking ( true ); pParser -> setValidationConstraintFatal ( true ); SaxParserHandler handler; pParser -> setDocumentHandler ( &handler ); pParser -> setErrorHandler ( &handler ); pParser -> parse ( xmlFile.c_str () ); delete pParser; pParser = NULL; } XMLPlatformUtils::Terminate (); // End of the program rc = 0; } catch ( exception & e ) { cerr << endl << "XmlChecker ERROR - " << e.what() << endl; rc = 1; } return rc; }