Hello. I'm attempting to develop a Xerces-C driver for some internal
benchmarking of various parsers and (in particular) schema validators.
Let me first apologize for e-mailing this list even though I am not a
member, since I only have one question it didn't seem like subscribing was
necessary.
I've got a driver which seems to work for Xerces-C, but I'm not sure
whether the performance could be improved by tweaking the way I'm using
the API and I wanted to give you guys a chance to point at any mistakes
I might be making.
In particular, I have scanned the archives for speed related concerns and
noticed some people mentioned reusing the parsed DTD information. I am not
validating against a DTD, but I thought that perhaps there might be a similar
flag to prevent unnecessary W3C Schema work when the same schema is being
used over and over, which is certainly the case here.
The program takes a few commands as input on stdin which specify the schemas
that are being validated against (and their namespaces) and the input files
to validate. All of that parsing is taken care of by the base class.
Nothing is timed except for the xsdVerifyInput() function. I seem to get
comparable performance for both DOM and SAX methods, which is somewhat
surprising since the SAX handlers don't actually do anything. I suppose that
this could indicate that schema validation is the bottleneck.
Anyway, I have attached the driver source as two files: one for the
driver itself, and the other is the base class that provides much of the core
driver functionality and which will be required to successfully build the
driver.
thank you,
Niko Matsakis
/*
(C) 2001 by DataPower Technology, Inc. All Rights Reserved.
This software is the confidential and proprietary information
of DataPower Technology, Inc. ("Confidential Information"), subject
to the Non-Disclosure Agreement and/or License Agreement you entered
into with DataPower. You shall use such Confidential Information only
in accordance with the terms of said Agreement(s). DataPower makes
no representations or warranties about the suitability of the
software. The software is provided with ABSOLUTELY NO WARRANTY
and DataPower will NOT BE LIABLE for ANY DAMAGES resulting from
the use of the software.
Contact [EMAIL PROTECTED] with any license-related questions.
http://www.datapower.com or [EMAIL PROTECTED]
*/
#include "xmExternalDriver.hpp"
#if !defined(NDEBUG) && defined(_MSC_VER)
#include <crtdbg.h>
#endif
#include <cstdio>
#include <iostream>
#include <util/PlatformUtils.hpp>
#include <sax/ErrorHandler.hpp>
#include <sax/SAXException.hpp>
#include <sax/SAXParseException.hpp>
#include <sax/HandlerBase.hpp>
#include <dom/DOM_DOMException.hpp>
#include <parsers/SAXParser.hpp>
#include <parsers/DOMParser.hpp>
#define StrX(s) XMLString::transcode(s)
// ---------------------------------------------------------------------------
// Simple error handler deriviative copied from DOMCountErrorHandler
// in Xerces Examples
// ---------------------------------------------------------------------------
class XMErrorHandler : public ErrorHandler
{
public:
// -----------------------------------------------------------------------
// Constructors and Destructor
// -----------------------------------------------------------------------
XMErrorHandler();
~XMErrorHandler();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
bool getSawErrors() const
{
return fSawErrors;
}
void setPrintErrors (bool s)
{
fPrintErrors = s;
}
// -----------------------------------------------------------------------
// Implementation of the SAX ErrorHandler interface
// -----------------------------------------------------------------------
void warning(const SAXParseException& e);
void error(const SAXParseException& e);
void fatalError(const SAXParseException& e);
void resetErrors();
private :
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
XMErrorHandler(const XMErrorHandler&);
void operator=(const XMErrorHandler&);
// -----------------------------------------------------------------------
// Private data members
//
// fSawErrors
// This is set if we get any errors, and is queryable via a getter
// method. Its used by the main code to suppress output if there are
// errors.
// -----------------------------------------------------------------------
bool fSawErrors;
bool fPrintErrors;
};
class xmXercesDriver : public xmExternalDriver
{
public:
// XSD Variables
char* mSchemasSpecified;
int mSSLen;
DOMParser mDOMParser;
SAXParser mSAXParser;
HandlerBase mDH;
XMErrorHandler mEH;
enum { xmDOM, xmSAX } mMode;
xmXercesDriver() :
mDOMParser(),
mSAXParser(),
mDH(),
mEH()
{
mSchemasSpecified = NULL;
mSSLen = 0;
reset();
mMode = xmDOM;
}
~xmXercesDriver ()
{
terminate();
}
virtual int parseOption (char *option)
{
if (!strcmp (option, "print_errors"))
{
mEH.setPrintErrors (true);
}
else if (!strcmp (option, "enable_print_errors"))
{
mEH.setPrintErrors (true);
}
else if (!strcmp (option, "disable_print_errors"))
{
mEH.setPrintErrors (false);
}
else if (!strcmp (option, "DOM_mode"))
{
mMode = xmDOM;
}
else if (!strcmp (option, "SAX_mode"))
{
mMode = xmSAX;
}
else
{
return xmExternalDriver::parseOption (option);
}
return XM_EXT_DRIVER_OK;
}
void SetSchemaLoc (char *nonamespace = NULL)
{
/* If nonamespace is non NULL, sets ExternalNoNamespace schema
location. Otherwise, sets schema location from the
internal member mSchemasSpecified. */
switch (mMode)
{
case xmDOM:
if (nonamespace)
{
mDOMParser.setExternalNoNamespaceSchemaLocation (nonamespace);
}
else
{
mDOMParser.setExternalSchemaLocation (mSchemasSpecified);
}
break;
case xmSAX:
if (nonamespace)
{
mSAXParser.setExternalNoNamespaceSchemaLocation (nonamespace);
}
else
{
mSAXParser.setExternalSchemaLocation (mSchemasSpecified);
}
break;
}
}
int xsdLoadSchema (char *nsuri, char *schemaurl, int *invalid)
{
int slen;
if (!nsuri || !schemaurl)
{
return XM_EXT_DRIVER_ERROR;
}
mEH.resetErrors();
if (!strcmp (nsuri, "##local"))
{
SetSchemaLoc (schemaurl);
}
else
{
slen = strlen (nsuri) + strlen (schemaurl) + 1;
if (mSchemasSpecified == NULL)
{
mSchemasSpecified = (char*)malloc((slen+1)*sizeof(char));
}
else
{
mSchemasSpecified = (char*)realloc
(mSchemasSpecified,
sizeof(char)*(mSSLen+slen+1));
}
snprintf(&mSchemasSpecified[mSSLen], slen+1,
"%s %s",
nsuri, schemaurl);
// cerr << "mSchemasSpecified:" << mSchemasSpecified << "\n";
SetSchemaLoc ();
}
*invalid = mEH.getSawErrors();
return XM_EXT_DRIVER_OK;
}
int xsdVerifyInput (char *input, int *invalid)
{
mEH.resetErrors();
try
{
if (mMode == xmDOM)
mDOMParser.parse (input);
else if (mMode == xmSAX)
mSAXParser.parse (input);
*invalid = mEH.getSawErrors();
}
catch (const XMLException& toCatch)
{
cerr << "\nError during parsing: '" << input << "'\n"
<< "Exception message is: \n"
<< StrX(toCatch.getMessage()) << "\n" << endl;
*invalid = 1;
}
catch (const DOM_DOMException& toCatch)
{
cerr << "\nDOM Error during parsing: '" << input << "'\n"
<< "DOMException code is: \n"
<< toCatch.code << "\n" << endl;
*invalid = 1;
}
catch (...)
{
cerr << "\nUnexpected exception during parsing: '" <<
input << "'\n";
return XM_EXT_DRIVER_ERROR;
}
return XM_EXT_DRIVER_OK;
}
int reset ()
{
mDOMParser.reset();
mDOMParser.setValidationScheme (DOMParser::Val_Always);
mDOMParser.setDoNamespaces(true);
mDOMParser.setDoSchema(true);
mDOMParser.setValidationSchemaFullChecking (true);
mDOMParser.setErrorHandler (&mEH);
//mSAXParser.reset();
/* TODO: how do we clear old schema specifications? */
mSAXParser.setValidationScheme(SAXParser::Val_Always);
mSAXParser.setDoNamespaces(true);
mSAXParser.setDoSchema(true);
mSAXParser.setValidationSchemaFullChecking(true);
mSAXParser.setErrorHandler (&mEH);
mSAXParser.setDocumentHandler(&mDH);
if (mSchemasSpecified)
{
free (mSchemasSpecified);
}
mSchemasSpecified = NULL;
mSSLen = 0;
return XM_EXT_DRIVER_OK;
}
int terminate ()
{
reset ();
return XM_EXT_DRIVER_OK;
}
};
XMErrorHandler::XMErrorHandler() :
fSawErrors(false),
fPrintErrors(false)
{
}
XMErrorHandler::~XMErrorHandler()
{
}
void XMErrorHandler::error(const SAXParseException& e)
{
fSawErrors = true;
if (fPrintErrors)
{
cerr << "\nError at file "
<< ", line " << e.getLineNumber()
<< ", char " << e.getColumnNumber()
<< "\n Message: " << XMLString::transcode(e.getMessage()) << endl;
}
}
void XMErrorHandler::fatalError(const SAXParseException& e)
{
fSawErrors = true;
if (fPrintErrors)
{
cerr << "\nFatal Error at file "
<< ", line " << e.getLineNumber()
<< ", char " << e.getColumnNumber()
<< "\n Message: " << XMLString::transcode(e.getMessage()) << endl;
}
}
void XMErrorHandler::warning(const SAXParseException& e)
{
if (fPrintErrors)
{
cerr << "\nWarning at file "
<< ", line " << e.getLineNumber()
<< ", char " << e.getColumnNumber()
<< "\n Message: "
<< XMLString::transcode(e.getMessage()) << endl;
}
}
void XMErrorHandler::resetErrors()
{
fSawErrors = false;
}
int main (int argc, char ** argv)
{
#if !defined(NDEBUG) && defined(_MSC_VER)
_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
_CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE);
_CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
#endif
XMLPlatformUtils::Initialize();
xmXercesDriver driver;
driver.main (argc, argv);
XMLPlatformUtils::Terminate();
return 0;
}
/*
(C) 2000, 2001 by DataPower Technology, Inc. All Rights Reserved.
This software is the confidential and proprietary information
of DataPower Technology, Inc. ("Confidential Information"), subject
to the Non-Disclosure Agreement and/or License Agreement you entered
into with DataPower. You shall use such Confidential Information only
In accordance with the terms of said Agreement(s). DataPower makes
no representations or warranties about the suitability of the
software. The software is provided with ABSOLUTELY NO WARRANTY
and DataPower will NOT BE LIABLE for ANY DAMAGES resulting from
the use of the software.
Contact [EMAIL PROTECTED] with any license-related questions.
http://www.datapower.com or [EMAIL PROTECTED]
*/
#ifndef XM_EXTERNALDRIVER_HPP
#define XM_EXTERNALDRIVER_HPP
extern "C" {
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#ifdef _WIN32
# include <direct.h>
# include <sys/types.h>
# include <sys/timeb.h>
#else
# include <unistd.h>
# include <sys/time.h>
# include <sys/resource.h>
#endif
}
// Possible return codes for the procedures below.
// UNREC was chosen to be a value unlikely to have been
// returned as an error code by an older driver, when any
// nonzero value was considered an error code.
#define XM_EXT_DRIVER_OK 0
#define XM_EXT_DRIVER_ERROR 1
#define XM_EXT_DRIVER_UNREC 222
//#define XMDEBUG fopen ("temp.out", "a")
#define XMDEBUG NULL
#define xmDebug if (mDebugFile) fprintf
class xmExternalDriver
{
protected:
FILE *mDebugFile;
bool mParseTime; // is parse time to be included?
bool mTerminate;
enum { unrecognized, ok, error } returncodes;
public:
// Overload these for XSLT support
//
// Note that parse time inclusion is a bit tricky. To avoid
// unnecessary branching that might distract from the efficiency
// of the runTransform() method, runTransformPT() is included and
// will be called when mParseTime is true. By default, of course,
// it merely calls runTransform().
//
// These routines should return one of the above XM_EXT_DRIVER_*
// constants.
virtual int loadStylesheet (char * filename)
{ return XM_EXT_DRIVER_UNREC; }
virtual int setInputDocument (char * filename)
{ return XM_EXT_DRIVER_UNREC; }
virtual int openOutput (char * outputFilename)
{ return XM_EXT_DRIVER_UNREC; }
virtual int runTransform ()
{ return XM_EXT_DRIVER_UNREC; }
virtual int closeOutput ()
{ return XM_EXT_DRIVER_UNREC; }
virtual int runTransformPT ()
{ return runTransform(); }
// Overload these for XSD support
virtual int xsdLoadSchema (char *nsuri, char *schemaurl, int *invalid)
{ return XM_EXT_DRIVER_UNREC; }
virtual int xsdVerifyInput (char *input, int *invalid)
{ return XM_EXT_DRIVER_UNREC; }
// Overload these for general configuration
virtual int reset()
{
return XM_EXT_DRIVER_OK;
}
virtual int terminate() = 0;
virtual int parseOption (char *option)
{
if (!strcmp (option, "include_parse_time"))
{
mParseTime = true;
}
else
{
return 1;
}
return 0;
}
xmExternalDriver()
{
mParseTime = false;
}
virtual ~xmExternalDriver ()
{
}
unsigned int memusage ()
{
unsigned int vsize = 0;
unsigned int rss = 0;
FILE *f;
if ((f = fopen ("/proc/self/stat", "r")) != NULL)
{
// pid com sta ppi pgr ses tty tpg fla min cmi maj cma
fscanf (f, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u "
// uti sti cut cst cou pri tim itr sta vsize rss
"" "%*d %*d %*d %*d %*d %*d %*u %*u %*d %u %u", &vsize, &rss);
fclose (f);
}
return rss * 4096;
}
inline double getWallclockMillis ()
{
#ifdef _WIN32
struct _timeb walltime;
_ftime (&walltime);
return (double) (walltime.millitm + 1000 * walltime.time);
#else
struct timeval walltime;
gettimeofday (&walltime, NULL);
return (double) (1000 * walltime.tv_sec) +
0.001 * (double) walltime.tv_usec;
#endif
}
inline char * replace_newline (char *s)
{
char * nl = strchr (s, '\n');
if (nl != NULL)
{
*nl = '\0';
}
return s;
}
void breakIntoTwoWords (char *buffer, char **word1, char **word2)
{
char * c = buffer;
xmDebug (mDebugFile, " breakIntoTwoWords: buffer='%s'\n", buffer);
replace_newline(buffer);
// scan for start of first word
while (*c != 0 && isspace (*c)) c++;
// scan for end of first word
*word1 = c;
while (*c != 0 && ! isspace ((int) *c)) c++;
if (isspace ((int) *c))
{
*c++ = 0;
}
*word2 = c;
}
void procOption (char *buffer)
{
char *opt = replace_newline(buffer+7);
if (parseOption (opt))
{
printf ("ERROR - Unrecognized option\n");
}
else
{
printf ("OK\n");
}
fflush (stdout);
}
void procStylesheet (char *buffer)
{
/* start clocks */
clock_t tstart = clock();
double wallstart = getWallclockMillis ();
/* load stylesheet */
int ret = loadStylesheet (replace_newline (buffer + 11));
if (ret == XM_EXT_DRIVER_UNREC)
{
printf ("UNRECOGNIZED - stylesheet command\n");
return;
}
else if (ret != XM_EXT_DRIVER_OK)
{
printf ("ERROR loading stylesheet\n");
fflush (stdout);
return;
}
/* stop clocks, get running time */
clock_t tstop = clock();
double cpu_ms =
1000.0 * (double) (tstop - tstart) / (double) CLOCKS_PER_SEC;
double wallstop = getWallclockMillis ();
printf ("OK wallclock: %.2f ms; cpuclock: %.2f\n",
(wallstop - wallstart), cpu_ms);
fflush (stdout);
}
void procInput (char *buffer)
{
unsigned int memstart = memusage ();
int ret = setInputDocument (replace_newline(buffer+6));
if (ret == XM_EXT_DRIVER_OK)
{
printf ("OK %u\n", memusage() - memstart);
}
else if (ret == XM_EXT_DRIVER_UNREC)
{
printf ("UNRECOGNIZED command\n");
}
else
{
printf ("ERROR setting input document\n");
}
fflush (stdout);
}
void procTransform (char *buffer)
{
/* get output filename */
char *filename, *iterstr;
breakIntoTwoWords (&buffer[9], &filename, &iterstr);
/* get # of iterations */
int iter;
if ((iter = atoi(iterstr)) <= 0) iter = 1;
/* set clock accumulators to zero */
clock_t cputime = 0;
double wallclock_ms = 0.0;
int i, ret = 0;
/* run transforms */
if (mParseTime)
{
/* start clocks */
clock_t cpustart = clock();
double wallstart = getWallclockMillis ();
for (i=0; i < iter; i++)
{
/* open output */
ret = openOutput (filename);
if (ret != 0)
{
break;
}
/* run transform */
ret = runTransformPT ();
if (ret != 0)
{
break;
}
/* close output */
ret = closeOutput();
if (ret != 0)
{
break;
}
}
/* stop clocks, get running time */
cputime = (clock() - cpustart);
double wallstop = getWallclockMillis ();
wallclock_ms = (wallstop - wallstart);
}
else
{
/* start clocks */
clock_t cpustart = clock();
double wallstart = getWallclockMillis ();
for (i=0; i < iter; i++)
{
/* open output */
ret = openOutput (filename);
if (ret != 0)
{
break;
}
/* run transform */
ret = runTransform ();
if (ret != 0)
{
break;
}
/* close output */
ret = closeOutput();
if (ret != 0)
{
break;
}
}
/* stop clocks, get running time */
cputime = (clock() - cpustart);
double wallstop = getWallclockMillis ();
wallclock_ms = (wallstop - wallstart);
}
// Check return code that made the loop stop.
if (ret == XM_EXT_DRIVER_UNREC)
{
printf ("UNRECOGNIZED\n");
return;
}
else if (ret != XM_EXT_DRIVER_OK)
{
printf ("ERROR in transform, iteration %d; "
"processing aborted\n", i);
fflush (stdout);
return;
}
double cpu_ms = 1000.0 * (double) cputime / (double) CLOCKS_PER_SEC;
printf ("OK wallclock: %.2f ms; cpuclock: %.2f\n",
wallclock_ms, cpu_ms);
fflush (stdout);
}
void procXSDSchema (char *buffer)
{
char *uri, *url;
int invalid, ret;
breakIntoTwoWords (&buffer[10], &url, &uri);
ret = xsdLoadSchema (uri, url, &invalid);
if (ret == XM_EXT_DRIVER_OK)
{
printf ("%s\n", (invalid ? "INVALID" : "OK"));
}
else if (ret == XM_EXT_DRIVER_UNREC)
{
printf ("UNRECOGNIZED command\n");
}
else
{
printf ("ERROR setting schema\n");
}
fflush (stdout);
}
void procXSDInput (char *buffer)
{
/* get output filename */
char *filename, *iterstr;
breakIntoTwoWords (&buffer[9], &filename, &iterstr);
/* get # of iterations */
int iter;
if ((iter = atoi(iterstr)) <= 0) iter = 1;
/* set clock accumulators to zero */
clock_t cputime = 0;
double wallclock_ms = 0.0;
int i, ret = XM_EXT_DRIVER_OK, invalid;
/* start clocks */
clock_t cpustart = clock();
double wallstart = getWallclockMillis ();
for (i=0; i < iter && ret == XM_EXT_DRIVER_OK; i++)
{
ret = xsdVerifyInput (filename, &invalid);
}
/* stop clocks, get running time */
cputime = (clock() - cpustart);
double wallstop = getWallclockMillis ();
wallclock_ms = (wallstop - wallstart);
// Check return code that made the loop stop.
if (ret == XM_EXT_DRIVER_UNREC)
{
printf ("UNRECOGNIZED\n");
return;
}
else if (ret != XM_EXT_DRIVER_OK)
{
printf ("ERROR in verification, iteration %d; "
"processing aborted\n", i);
fflush (stdout);
return;
}
double cpu_ms = 1000.0 * (double) cputime / (double) CLOCKS_PER_SEC;
printf ("%s wallclock: %.2f ms; cpuclock: %.2f\n",
(invalid ? "INVALID" : "OK"), wallclock_ms, cpu_ms);
fflush (stdout);
}
void procCmd(char *buffer)
{
replace_newline(buffer);
xmDebug(mDebugFile,
" --- procCmd: %s\n", buffer);
if (0 == strncmp ("chdir", buffer, 5))
{
char * cwd;
for (cwd = buffer+5; isspace ((int) *cwd); cwd++);
//replace_newline (cwd);
if (chdir (cwd) == 0)
{
printf ("OK\n");
}
else
{
printf ("ERROR - can't chdir to '%s'\n", cwd);
}
fflush (stdout);
}
else if (0 == strncmp ("option", buffer, 6))
{
procOption (buffer);
}
else if (0 == strncmp ("stylesheet", buffer, 10))
{
procStylesheet (buffer);
}
else if (0 == strncmp ("input", buffer, 5))
{
procInput (buffer);
}
else if (0 == strncmp ("transform", buffer, 9))
{
procTransform (buffer);
}
else if (0 == strncmp ("xsd-schema", buffer, 10))
{
procXSDSchema (buffer);
}
else if (0 == strncmp ("xsd-input", buffer, 9))
{
procXSDInput (buffer);
}
else if (0 == strncmp ("help", buffer, 4))
{
printf ("xmExternalDriver:\n"
"option <option>\n"
"chdir <dir>\n"
"stylesheet <sheet>\n"
"input <xml file>\n"
"transform <output> <iter>\n"
"xsd-schema <schema> <namespace>\n"
"xsd-input <input> <iter>\n"
"reset\n"
"terminate\n");
}
else if (0 == strncmp ("reset", buffer, 5))
{
if (reset () == 0)
{
printf ("OK\n");
}
else
{
printf ("ERROR resetting driver\n");
}
fflush (stdout);
}
else if (0 == strncmp ("terminate", buffer, 9))
{
if (terminate () == 0)
{
printf ("OK\n");
}
else
{
printf ("ERROR terminating driver\n");
}
fflush (stdout);
mTerminate = true;
}
else
{
printf ("UNRECOGNIZED command - try 'help'\n");
}
fflush (stdout);
}
void main (int argc, char** argv)
{
char buffer[1000];
mTerminate = false;
mDebugFile = XMDEBUG;
xmDebug (mDebugFile, "main {\n");
if (argc != 1)
{
if (argc >= 5)
{
int i;
for (i = 5; i < argc; i++)
{
sprintf (buffer, "option %s", argv[i]);
printf ("> Executing '%s'\n", buffer);
procCmd (buffer);
}
sprintf (buffer, "stylesheet %s", argv[1]);
fprintf (stderr, "> Executing '%s'\n", buffer);
procCmd (buffer);
sprintf (buffer, "input %s", argv[2]);
fprintf (stderr, "> Executing '%s'\n", buffer);
procCmd (buffer);
sprintf (buffer, "transform %s %s", argv[3], argv[4]);
fprintf (stderr, "> Executing '%s'\n", buffer);
procCmd (buffer);
printf ("> Termination status: %d\n", terminate());
}
else if (argc == 2)
{
FILE *f = fopen (argv[1], "r");
if (f)
{
while (!mTerminate && NULL != fgets (buffer, 1000, f))
{
fprintf(stderr, "> Executing '%s'\n",
replace_newline(buffer));
procCmd (buffer);
}
}
else
{
fprintf(stderr, "Error: could not open file '%s'\n",
argv[1]);
}
}
else
{
fprintf (stderr, "xmExternalDriver:\n");
fprintf (stderr, "Usage 1: %s <stylesheet> <input> "
"<transform> <iterations> <options...>\n",
argv[0]);
fprintf (stderr, "Usage 2: %s <script>\n", argv[0]);
}
}
else
{
while (!mTerminate && NULL != fgets (buffer, 1000, stdin))
{
procCmd (buffer);
}
}
xmDebug (mDebugFile, "} end\n");
if (mDebugFile)
fclose(mDebugFile);
}
};
#endif
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]