Hello,

I try without success to validate a xml file that refers to an xsd file using 
sax parser provided by Xerces-C++ ?

The following code does not work ( parsing a not well-formed xml will report 
error as expected, but an invalid xml - with respect to xsd - will not report 
error ) :
XMLPlatformUtils::Initialize ();
SAXParser * pParser = new SAXParser ();
if ( pParser )
{
  pParser -> setDoSchema ( true );
  pParser -> setLoadSchema ( true );
  pParser -> setExternalNoNamespaceSchemaLocation ( xsdFile.c_str () );
  pParser -> setValidationScheme ( SAXParser::Val_Auto );
  pParser -> setValidationSchemaFullChecking ( true );
  pParser -> setValidationConstraintFatal ( true );
      
  SaxParserHandler handler; // SaxParserHandler derives from HandlerBase
  pParser -> setDocumentHandler ( &handler );
  pParser -> setErrorHandler ( &handler );
  pParser -> parse ( xmlFile.c_str () );
      
  delete pParser; pParser = NULL;
}
XMLPlatformUtils::Terminate ();

I want the parse to stop if the xml is not compliant with the xsd. Bonus if I 
don't have to handle "by hand" the xsd ( setExternalNoNamespaceSchemaLocation ) 
: is there a way to tell the parser to validate automatically the xml with xsd 
in case xml refers to a xsd ?

Can somebody help me on that ?

FH

                                          
<?xml version="1.0" encoding="UTF-8"?>                    <!-- define header document -->

<!-- a well-formed document must be such that :
     1. each children tag has only one parent tag
     2. each children tag must be fully included in his parent tag
        N.B. : a root tag must exist to contain the whole xml document -->

<!-- a valid document must be such that the grammar (dtd or xsd) is respected -->

<helloworld
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
  xsi:noNamespaceSchemaLocation="Hello.xsd"             > <!-- define body   document and associate it to xsd file -->

  <hello when="morning">
    <msg> Hello and have a good day ! </msg>
  </hello>

  <hello when="afternoon">
    <msg> See you later and have a good afternoon ! </msg>
  </hello>

  <hello when="evening">
    <msg> Bye and have a good evening ! </msg>
  </hello>

</helloworld>

Attachment: Hello.xsd
Description: XML document

<?xml version="1.0" encoding="UTF-8"?>                    <!-- define header document -->

<!-- a well-formed document must be such that :
     1. each children tag has only one parent tag
     2. each children tag must be fully included in his parent tag
        N.B. : a root tag must exist to contain the whole xml document -->

<!-- a valid document must be such that the grammar (dtd or xsd) is respected -->

<helloworld
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
  xsi:noNamespaceSchemaLocation="Hello.xsd"             > <!-- define body   document and associate it to xsd file -->

  <hello when="morning">
    <msg> Hello and have a good day ! </msg>
  </hello>

  <hello>
    <msg> See you later and have a good afternoon ! </msg>
  </hello>

  <hello when="evening">
    <msg> Bye and have a good evening ! </msg>
  </hello>
  
</helloworld>
#include "SaxParserHandler.h"

#include <xercesc/sax/AttributeList.hpp> // xerces
#include <xercesc/util/XMLString.hpp>    // xerces
#include <iostream>                      // input / output

using namespace std;

SaxParserHandler::SaxParserHandler () : HandlerBase ()
{
}

SaxParserHandler::~SaxParserHandler ()
{
}

void SaxParserHandler::startElement ( const XMLCh * const ipName, AttributeList & ioLsAttr )
{
  char * pMsg = XMLString::transcode ( ipName );
  if ( !pMsg ) return;
  cout << "Parsing element : " << pMsg;
  XMLString::release ( &pMsg );
  for ( XMLSize_t i = 0; i < ioLsAttr.getLength () ; i++ )
  {
    char * pAttrName = XMLString::transcode ( ioLsAttr.getName  ( i ) );
    char * pAttrVal  = XMLString::transcode ( ioLsAttr.getValue ( i ) );
    if ( pAttrName && pAttrVal ) cout << " [ " << pAttrName << "=" << pAttrVal << " ]";
    if ( pAttrName ) XMLString::release ( &pAttrName );
    if ( pAttrVal  ) XMLString::release ( &pAttrVal );
  }
  cout << endl;
}

void SaxParserHandler::error ( const SAXParseException & iExc )
{
  char * pMsg = XMLString::transcode ( iExc.getMessage () );
  if ( !pMsg ) return;
  cout << "Fatal Error at line " << iExc.getLineNumber () << " : " << pMsg << endl;
  XMLString::release ( &pMsg );
}

void SaxParserHandler::fatalError ( const SAXParseException & iExc )
{
  error ( iExc );
}
#ifndef SaxParserHandler_H
#define SaxParserHandler_H

#include <xercesc/sax/HandlerBase.hpp> // xerces : customize parser behavior with handler

using namespace xercesc;

/*
 The SAX parser presents each node of the XML document in sequence. So when you process one node, you must have already
 stored information about any relevant previous nodes, and you have no information at that time about subsequent nodes.
 The SAX parser uses less memory than the DOM parser and it is a suitable abstraction for documents that can be processed
 sequentially rather than as a whole.
 N.B. : The DOM parser parses the whole document at once and stores the whole document structure in memory. The DOM parser
        enables to access any part of the document at any time, but uses a relatively large amount of memory.
 */

class SaxParserHandler : public HandlerBase
{
  public:

    SaxParserHandler ();
    virtual ~SaxParserHandler ();
    
    void startElement ( const XMLCh * const ipName, AttributeList & ioLsAttr ); // From DocumentHandler
    void error ( const SAXParseException & );                                   // From ErrorHandler
    void fatalError ( const SAXParseException & );                              // From ErrorHandler
};

#endif
#include <iostream>  // input / output
#include <stdexcept> // exception
#include <string>    // string
#include <cstddef>   // constants
#include <xercesc/util/PlatformUtils.hpp> // xerces initialize / terminate
#include <xercesc/parsers/SAXParser.hpp>  // xerces sax parser

#include "SaxParserHandler.h"

using namespace std;
using namespace xercesc;

int main ( int argc, char **argv )
{
  int rc = 1;
  try
  {
    // Check arguments

    if ( argc < 3 ) { cerr << "XmlChecker ERROR - needs 2 argument : xml file, xsd file" << endl; return rc; }
    string xmlFile = argv[1];
    string xsdFile = argv[2];

    // Parse XML file

    XMLPlatformUtils::Initialize ();
    SAXParser * pParser = new SAXParser ();
    if ( pParser )
    {
      pParser -> setDoSchema ( true );
      pParser -> setLoadSchema ( true );
      pParser -> setExternalNoNamespaceSchemaLocation ( xsdFile.c_str () );
      pParser -> setValidationScheme ( SAXParser::Val_Auto );
      pParser -> setValidationSchemaFullChecking ( true );
      pParser -> setValidationConstraintFatal ( true );
      
      SaxParserHandler handler;
      pParser -> setDocumentHandler ( &handler );
      pParser -> setErrorHandler ( &handler );
      pParser -> parse ( xmlFile.c_str () );
      
      delete pParser; pParser = NULL;
    }
    XMLPlatformUtils::Terminate ();

    // End of the program

    rc = 0;
  }
  catch ( exception & e )
  {
    cerr << endl << "XmlChecker ERROR - " << e.what() << endl;
    rc = 1;
  }
  return rc;
}

Reply via email to