Igor recently pinged me on IRC to ask about switching
trafficserver's expat use to libxml2. There's a rationale
about libxml2 being a dependency for a range of plugins,
so why not use a single XML parser for everything?
A quick grep reveals that expat is only used in two places
and that its use is extremely brief and straightforward.
I attach a trivial proof-of-concept for switching that to
libxml2. The use of ugly developer #ifdefs can of course
be refined to a working configuration option to select either
expat or libxml2 if folks think it's a worthwhile exercise.
I don't think this is the best way to switch: it's just
what I could hack OTTOMH without having to resort to TFM.
Note that a switch to libxml2 could be the basis for further
development, including probably a switch to use libxml2's
DOM in place of TrafficServer's homebrew partial document
tree. But I'm not volunteering to do that anytime soon.
--
Nick Kew
diff --git a/mgmt/stats/StatProcessor.cc b/mgmt/stats/StatProcessor.cc
index 268dc29..22d1ff9 100644
--- a/mgmt/stats/StatProcessor.cc
+++ b/mgmt/stats/StatProcessor.cc
@@ -65,7 +65,8 @@ startElement(void * /* userData ATS_UNUSED */, const char *name, const char **at
statObject = NEW(new StatObject(++statCount));
Debug(MODULE_INIT, "\nStat #: ----------------------- %d -----------------------\n", statCount);
- for (i = 0; atts[i]; i += 2) {
+ if (atts)
+ for (i = 0; atts[i]; i += 2) {
ink_assert(atts[i + 1]); // Attribute comes in pairs, hopefully.
if (!strcmp(atts[i], "minimum")) {
@@ -93,7 +94,8 @@ startElement(void * /* userData ATS_UNUSED */, const char *name, const char **at
nodeVar = true;
sumClusterVar = true; // Should only be used with cluster variable
- for (i = 0; atts[i]; i += 2) {
+ if (atts)
+ for (i = 0; atts[i]; i += 2) {
ink_assert(atts[i + 1]); // Attribute comes in pairs, hopefully.
if (!strcmp(atts[i], "scope")) {
nodeVar = (!strcmp(atts[i + 1], "node") ? true : false);
@@ -137,14 +139,14 @@ endElement(void * /* userData ATS_UNUSED */, const char */* name ATS_UNUSED */)
void
-charDataHandler(void * /* userData ATS_UNUSED */, const XML_Char * name, int /* len ATS_UNUSED */)
+charDataHandler(void * /* userData ATS_UNUSED */, const xmlchar * name, int /* len ATS_UNUSED */)
{
if (currentTag != EXPR_TAG && currentTag != DST_TAG) {
return;
}
char content[BUFSIZ * 10];
- if (XML_extractContent(name, content, BUFSIZ * 10) == 0) {
+ if (XML_extractContent((const char*)name, content, BUFSIZ * 10) == 0) {
return;
}
@@ -184,6 +186,7 @@ StatProcessor::rereadConfig()
fileBuffer = fileContent->bufPtr();
fileLen = strlen(fileBuffer);
+#ifdef XML_EXPAT
/*
* Start the XML Praser -- the package used is EXPAT
*/
@@ -214,6 +217,25 @@ StatProcessor::rereadConfig()
* Cleaning upt
*/
XML_ParserFree(parser);
+#else
+ /* Parse XML with libxml2 */
+ xmlSAXHandler sax;
+ memset(&sax, 0, sizeof(xmlSAXHandler));
+ sax.startElement = startElement;
+ sax.endElement = endElement;
+ sax.characters = charDataHandler;
+ sax.initialized = 1;
+ xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(&sax, NULL, NULL, 0, NULL);
+
+ int status = xmlParseChunk(parser, fileBuffer, fileLen, 1);
+ if (status != 0) {
+ xmlErrorPtr errptr = xmlCtxtGetLastError(parser);
+ mgmt_log(stderr, "%s at %s:%d\n", errptr->message, errptr->file, errptr->line);
+ }
+ xmlFreeParserCtxt(parser);
+#endif
+
+
delete fileContent;
Debug(MODULE_INIT, "\n\n---------- END OF PARSING & INITIALIZING ---------\n\n");
diff --git a/mgmt/stats/StatProcessor.h b/mgmt/stats/StatProcessor.h
index 8e6b0c6..9c34c15 100644
--- a/mgmt/stats/StatProcessor.h
+++ b/mgmt/stats/StatProcessor.h
@@ -46,7 +46,13 @@
#define _FOOTER
#include "DynamicStats.h"
#include "StatType.h"
+#ifdef XML_EXPAT
#include "expat.h"
+typedef XML_Char xmlchar;
+#else
+#include <libxml/parser.h>
+typedef xmlChar xmlchar;
+#endif
#include <string.h>
#include <stdlib.h>
diff --git a/mgmt/utils/XmlUtils.cc b/mgmt/utils/XmlUtils.cc
index 528d40c..315e996 100644
--- a/mgmt/utils/XmlUtils.cc
+++ b/mgmt/utils/XmlUtils.cc
@@ -21,7 +21,13 @@
limitations under the License.
*/
+#ifdef XML_EXPAT
#include "expat.h"
+typedef XML_Char xmlchar;
+#else
+#include <libxml/parser.h>
+typedef xmlChar xmlchar;
+#endif
#include <stdio.h>
#include <stdlib.h>
#include "ink_platform.h"
@@ -378,7 +384,7 @@ XMLNode::getAttributeValueByName(const char *pAName)
}
void /*XMLDom:: */
-elementStart(void *pObj, const char *el, const char **attr)
+elementStart(void *pObj, const xmlchar *el, const xmlchar **attr)
{
XMLDom *pDom = (XMLDom *) pObj;
int nTagLen;
@@ -392,12 +398,15 @@ elementStart(void *pObj, const char *el, const char **attr)
pDom->m_pCur = p;
}
- nTagLen = strlen(el);
+ nTagLen = strlen((const char*)el);
pTag = new char[nTagLen + 1];
pDom->m_pCur->m_pNodeName = pTag;
memcpy(pTag, el, nTagLen);
pTag[nTagLen] = 0;
+ if (!attr)
+ return;
+
int i;
int nCount = 0;
for (i = 0; attr[i]; i += 2)
@@ -410,13 +419,13 @@ elementStart(void *pObj, const char *el, const char **attr)
pDom->m_pCur->m_pAList = new Attribute[nCount];
for (i = 0; i < nCount; i++) {
/* Attribute Name: attr[2*i], Value: attr[2*i+1]; */
- alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAName, attr[2 * i]);
- alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAValue, attr[2 * i + 1]);
+ alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAName, (const char*)attr[2 * i]);
+ alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAValue, (const char*)attr[2 * i + 1]);
}
}
void /*XMLDom:: */
-elementEnd(void *pObj, const char * /* el ATS_UNUSED */)
+elementEnd(void *pObj, const xmlchar * /* el ATS_UNUSED */)
{
/*ASSERT(strcmp(el, pCur->pNodeName) == 0); */
XMLDom *pDom = (XMLDom *) pObj;
@@ -424,7 +433,7 @@ elementEnd(void *pObj, const char * /* el ATS_UNUSED */)
}
void /*XMLDom:: */
-charHandler(void *pObj, const char *s, int len)
+charHandler(void *pObj, const xmlchar *s, int len)
{
XMLNode *pNode = ((XMLDom *) pObj)->m_pCur;
int oldlen;
@@ -450,6 +459,9 @@ charHandler(void *pObj, const char *s, int len)
int
XMLDom::LoadXML(const char *pXml)
{
+ int rv = 0;
+#ifdef XML_EXPAT
+ /* Old (expat) xml parsing */
XML_Parser p = XML_ParserCreate(NULL);
if (!p) /* no Memory. */
return 1;
@@ -466,11 +478,39 @@ XMLDom::LoadXML(const char *pXml)
/*(void (*)(void *, const char *, int)) */ charHandler
);
- if (!XML_Parse(p, pXml, strlen(pXml), 0)) {
- /*return 2; Parse Error: bad xml format. */
+ if (!XML_Parse(p, pXml, strlen(pXml), 1)) {
+ rv = 2; /*return 2; Parse Error: bad xml format. */
}
+ XML_ParserFree(p);
+
+#else
+ /* Alternative (libxml2) xml parsing */
+ xmlParserCtxtPtr p;
+ xmlSAXHandler sax;
+ memset(&sax, 0, sizeof(xmlSAXHandler));
+ sax.startElement = elementStart;
+ sax.endElement = elementEnd;
+ sax.characters = charHandler;
+ sax.initialized = 1;
+ p = xmlCreatePushParserCtxt(&sax, this, NULL, 0, NULL);
+ if (!p) /* no Memory. */
+ return 1;
- return 0;
+ /* Looks as if this is actually the whole xml and expat version's
+ * use of 0 for is_final is a bug. We'll use 1.
+ */
+ if (xmlParseChunk(p, pXml, strlen(pXml), 1) != 0) {
+ rv = 2; /* expat version ignores error, so we'll do likewise */
+ }
+ xmlFreeParserCtxt(p);
+#endif
+
+ /* I don't know why the "return 2" in the old (expat) version was
+ * commented out. Possibly because the call to XML_Parse incorrectly
+ * used 0 for is_final last arg, in which case we should restore it.
+ * If it's a bug in the caller then we'll just have to return 0 always.
+ */
+ return rv;
}
int