Igor recently pinged me on IRC to ask about switching trafficserver's expat use to libxml2. There's a rationale about libxml2 being a dependency for a range of plugins, so why not use a single XML parser for everything?
A quick grep reveals that expat is only used in two places and that its use is extremely brief and straightforward. I attach a trivial proof-of-concept for switching that to libxml2. The use of ugly developer #ifdefs can of course be refined to a working configuration option to select either expat or libxml2 if folks think it's a worthwhile exercise. I don't think this is the best way to switch: it's just what I could hack OTTOMH without having to resort to TFM. Note that a switch to libxml2 could be the basis for further development, including probably a switch to use libxml2's DOM in place of TrafficServer's homebrew partial document tree. But I'm not volunteering to do that anytime soon. -- Nick Kew
diff --git a/mgmt/stats/StatProcessor.cc b/mgmt/stats/StatProcessor.cc index 268dc29..22d1ff9 100644 --- a/mgmt/stats/StatProcessor.cc +++ b/mgmt/stats/StatProcessor.cc @@ -65,7 +65,8 @@ startElement(void * /* userData ATS_UNUSED */, const char *name, const char **at statObject = NEW(new StatObject(++statCount)); Debug(MODULE_INIT, "\nStat #: ----------------------- %d -----------------------\n", statCount); - for (i = 0; atts[i]; i += 2) { + if (atts) + for (i = 0; atts[i]; i += 2) { ink_assert(atts[i + 1]); // Attribute comes in pairs, hopefully. if (!strcmp(atts[i], "minimum")) { @@ -93,7 +94,8 @@ startElement(void * /* userData ATS_UNUSED */, const char *name, const char **at nodeVar = true; sumClusterVar = true; // Should only be used with cluster variable - for (i = 0; atts[i]; i += 2) { + if (atts) + for (i = 0; atts[i]; i += 2) { ink_assert(atts[i + 1]); // Attribute comes in pairs, hopefully. if (!strcmp(atts[i], "scope")) { nodeVar = (!strcmp(atts[i + 1], "node") ? true : false); @@ -137,14 +139,14 @@ endElement(void * /* userData ATS_UNUSED */, const char */* name ATS_UNUSED */) void -charDataHandler(void * /* userData ATS_UNUSED */, const XML_Char * name, int /* len ATS_UNUSED */) +charDataHandler(void * /* userData ATS_UNUSED */, const xmlchar * name, int /* len ATS_UNUSED */) { if (currentTag != EXPR_TAG && currentTag != DST_TAG) { return; } char content[BUFSIZ * 10]; - if (XML_extractContent(name, content, BUFSIZ * 10) == 0) { + if (XML_extractContent((const char*)name, content, BUFSIZ * 10) == 0) { return; } @@ -184,6 +186,7 @@ StatProcessor::rereadConfig() fileBuffer = fileContent->bufPtr(); fileLen = strlen(fileBuffer); +#ifdef XML_EXPAT /* * Start the XML Praser -- the package used is EXPAT */ @@ -214,6 +217,25 @@ StatProcessor::rereadConfig() * Cleaning upt */ XML_ParserFree(parser); +#else + /* Parse XML with libxml2 */ + xmlSAXHandler sax; + memset(&sax, 0, sizeof(xmlSAXHandler)); + sax.startElement = startElement; + sax.endElement = endElement; + sax.characters = charDataHandler; + sax.initialized = 1; + xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(&sax, NULL, NULL, 0, NULL); + + int status = xmlParseChunk(parser, fileBuffer, fileLen, 1); + if (status != 0) { + xmlErrorPtr errptr = xmlCtxtGetLastError(parser); + mgmt_log(stderr, "%s at %s:%d\n", errptr->message, errptr->file, errptr->line); + } + xmlFreeParserCtxt(parser); +#endif + + delete fileContent; Debug(MODULE_INIT, "\n\n---------- END OF PARSING & INITIALIZING ---------\n\n"); diff --git a/mgmt/stats/StatProcessor.h b/mgmt/stats/StatProcessor.h index 8e6b0c6..9c34c15 100644 --- a/mgmt/stats/StatProcessor.h +++ b/mgmt/stats/StatProcessor.h @@ -46,7 +46,13 @@ #define _FOOTER #include "DynamicStats.h" #include "StatType.h" +#ifdef XML_EXPAT #include "expat.h" +typedef XML_Char xmlchar; +#else +#include <libxml/parser.h> +typedef xmlChar xmlchar; +#endif #include <string.h> #include <stdlib.h> diff --git a/mgmt/utils/XmlUtils.cc b/mgmt/utils/XmlUtils.cc index 528d40c..315e996 100644 --- a/mgmt/utils/XmlUtils.cc +++ b/mgmt/utils/XmlUtils.cc @@ -21,7 +21,13 @@ limitations under the License. */ +#ifdef XML_EXPAT #include "expat.h" +typedef XML_Char xmlchar; +#else +#include <libxml/parser.h> +typedef xmlChar xmlchar; +#endif #include <stdio.h> #include <stdlib.h> #include "ink_platform.h" @@ -378,7 +384,7 @@ XMLNode::getAttributeValueByName(const char *pAName) } void /*XMLDom:: */ -elementStart(void *pObj, const char *el, const char **attr) +elementStart(void *pObj, const xmlchar *el, const xmlchar **attr) { XMLDom *pDom = (XMLDom *) pObj; int nTagLen; @@ -392,12 +398,15 @@ elementStart(void *pObj, const char *el, const char **attr) pDom->m_pCur = p; } - nTagLen = strlen(el); + nTagLen = strlen((const char*)el); pTag = new char[nTagLen + 1]; pDom->m_pCur->m_pNodeName = pTag; memcpy(pTag, el, nTagLen); pTag[nTagLen] = 0; + if (!attr) + return; + int i; int nCount = 0; for (i = 0; attr[i]; i += 2) @@ -410,13 +419,13 @@ elementStart(void *pObj, const char *el, const char **attr) pDom->m_pCur->m_pAList = new Attribute[nCount]; for (i = 0; i < nCount; i++) { /* Attribute Name: attr[2*i], Value: attr[2*i+1]; */ - alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAName, attr[2 * i]); - alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAValue, attr[2 * i + 1]); + alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAName, (const char*)attr[2 * i]); + alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAValue, (const char*)attr[2 * i + 1]); } } void /*XMLDom:: */ -elementEnd(void *pObj, const char * /* el ATS_UNUSED */) +elementEnd(void *pObj, const xmlchar * /* el ATS_UNUSED */) { /*ASSERT(strcmp(el, pCur->pNodeName) == 0); */ XMLDom *pDom = (XMLDom *) pObj; @@ -424,7 +433,7 @@ elementEnd(void *pObj, const char * /* el ATS_UNUSED */) } void /*XMLDom:: */ -charHandler(void *pObj, const char *s, int len) +charHandler(void *pObj, const xmlchar *s, int len) { XMLNode *pNode = ((XMLDom *) pObj)->m_pCur; int oldlen; @@ -450,6 +459,9 @@ charHandler(void *pObj, const char *s, int len) int XMLDom::LoadXML(const char *pXml) { + int rv = 0; +#ifdef XML_EXPAT + /* Old (expat) xml parsing */ XML_Parser p = XML_ParserCreate(NULL); if (!p) /* no Memory. */ return 1; @@ -466,11 +478,39 @@ XMLDom::LoadXML(const char *pXml) /*(void (*)(void *, const char *, int)) */ charHandler ); - if (!XML_Parse(p, pXml, strlen(pXml), 0)) { - /*return 2; Parse Error: bad xml format. */ + if (!XML_Parse(p, pXml, strlen(pXml), 1)) { + rv = 2; /*return 2; Parse Error: bad xml format. */ } + XML_ParserFree(p); + +#else + /* Alternative (libxml2) xml parsing */ + xmlParserCtxtPtr p; + xmlSAXHandler sax; + memset(&sax, 0, sizeof(xmlSAXHandler)); + sax.startElement = elementStart; + sax.endElement = elementEnd; + sax.characters = charHandler; + sax.initialized = 1; + p = xmlCreatePushParserCtxt(&sax, this, NULL, 0, NULL); + if (!p) /* no Memory. */ + return 1; - return 0; + /* Looks as if this is actually the whole xml and expat version's + * use of 0 for is_final is a bug. We'll use 1. + */ + if (xmlParseChunk(p, pXml, strlen(pXml), 1) != 0) { + rv = 2; /* expat version ignores error, so we'll do likewise */ + } + xmlFreeParserCtxt(p); +#endif + + /* I don't know why the "return 2" in the old (expat) version was + * commented out. Possibly because the call to XML_Parse incorrectly + * used 0 for is_final last arg, in which case we should restore it. + * If it's a bug in the caller then we'll just have to return 0 always. + */ + return rv; } int