Igor recently pinged me on IRC to ask about switching
trafficserver's expat use to libxml2.  There's a rationale
about libxml2 being a dependency for a range of plugins,
so why not use a single XML parser for everything?

A quick grep reveals that expat is only used in two places
and that its use is extremely brief and straightforward.
I attach a trivial proof-of-concept for switching that to
libxml2.  The use of ugly developer #ifdefs can of course
be refined to a working configuration option to select either
expat or libxml2 if folks think it's a worthwhile exercise.

I don't think this is the best way to switch: it's just
what I could hack OTTOMH without having to resort to TFM.

Note that a switch to libxml2 could be the basis for further
development, including probably a switch to use libxml2's
DOM in place of TrafficServer's homebrew partial document
tree.  But I'm not volunteering to do that anytime soon.

-- 
Nick Kew
diff --git a/mgmt/stats/StatProcessor.cc b/mgmt/stats/StatProcessor.cc
index 268dc29..22d1ff9 100644
--- a/mgmt/stats/StatProcessor.cc
+++ b/mgmt/stats/StatProcessor.cc
@@ -65,7 +65,8 @@ startElement(void * /* userData ATS_UNUSED */, const char *name, const char **at
     statObject = NEW(new StatObject(++statCount));
     Debug(MODULE_INIT, "\nStat #: ----------------------- %d -----------------------\n", statCount);
 
-    for (i = 0; atts[i]; i += 2) {
+    if (atts)
+     for (i = 0; atts[i]; i += 2) {
       ink_assert(atts[i + 1]);    // Attribute comes in pairs, hopefully.
 
       if (!strcmp(atts[i], "minimum")) {
@@ -93,7 +94,8 @@ startElement(void * /* userData ATS_UNUSED */, const char *name, const char **at
     nodeVar = true;
     sumClusterVar = true;       // Should only be used with cluster variable
 
-    for (i = 0; atts[i]; i += 2) {
+    if (atts)
+     for (i = 0; atts[i]; i += 2) {
       ink_assert(atts[i + 1]);    // Attribute comes in pairs, hopefully.
       if (!strcmp(atts[i], "scope")) {
         nodeVar = (!strcmp(atts[i + 1], "node") ? true : false);
@@ -137,14 +139,14 @@ endElement(void * /* userData ATS_UNUSED */, const char */* name ATS_UNUSED */)
 
 
 void
-charDataHandler(void * /* userData ATS_UNUSED */, const XML_Char * name, int /* len ATS_UNUSED */)
+charDataHandler(void * /* userData ATS_UNUSED */, const xmlchar * name, int /* len ATS_UNUSED */)
 {
   if (currentTag != EXPR_TAG && currentTag != DST_TAG) {
     return;
   }
 
   char content[BUFSIZ * 10];
-  if (XML_extractContent(name, content, BUFSIZ * 10) == 0) {
+  if (XML_extractContent((const char*)name, content, BUFSIZ * 10) == 0) {
     return;
   }
 
@@ -184,6 +186,7 @@ StatProcessor::rereadConfig()
   fileBuffer = fileContent->bufPtr();
   fileLen = strlen(fileBuffer);
 
+#ifdef XML_EXPAT
   /*
    * Start the XML Praser -- the package used is EXPAT
    */
@@ -214,6 +217,25 @@ StatProcessor::rereadConfig()
    * Cleaning upt
    */
   XML_ParserFree(parser);
+#else
+  /* Parse XML with libxml2 */
+  xmlSAXHandler sax;
+  memset(&sax, 0, sizeof(xmlSAXHandler));
+  sax.startElement = startElement;
+  sax.endElement = endElement;
+  sax.characters = charDataHandler;
+  sax.initialized = 1;
+  xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(&sax, NULL, NULL, 0, NULL);
+
+  int status = xmlParseChunk(parser, fileBuffer, fileLen, 1);
+  if (status != 0) {
+    xmlErrorPtr errptr = xmlCtxtGetLastError(parser);
+    mgmt_log(stderr, "%s at %s:%d\n", errptr->message, errptr->file, errptr->line);
+  }
+  xmlFreeParserCtxt(parser);
+#endif
+
+
   delete fileContent;
 
   Debug(MODULE_INIT, "\n\n---------- END OF PARSING & INITIALIZING ---------\n\n");
diff --git a/mgmt/stats/StatProcessor.h b/mgmt/stats/StatProcessor.h
index 8e6b0c6..9c34c15 100644
--- a/mgmt/stats/StatProcessor.h
+++ b/mgmt/stats/StatProcessor.h
@@ -46,7 +46,13 @@
 #define _FOOTER
 #include "DynamicStats.h"
 #include "StatType.h"
+#ifdef XML_EXPAT
 #include "expat.h"
+typedef XML_Char xmlchar;
+#else
+#include <libxml/parser.h>
+typedef xmlChar xmlchar;
+#endif
 #include <string.h>
 #include <stdlib.h>
 
diff --git a/mgmt/utils/XmlUtils.cc b/mgmt/utils/XmlUtils.cc
index 528d40c..315e996 100644
--- a/mgmt/utils/XmlUtils.cc
+++ b/mgmt/utils/XmlUtils.cc
@@ -21,7 +21,13 @@
   limitations under the License.
  */
 
+#ifdef XML_EXPAT
 #include "expat.h"
+typedef XML_Char xmlchar;
+#else
+#include <libxml/parser.h>
+typedef xmlChar xmlchar;
+#endif
 #include <stdio.h>
 #include <stdlib.h>
 #include "ink_platform.h"
@@ -378,7 +384,7 @@ XMLNode::getAttributeValueByName(const char *pAName)
 }
 
 void /*XMLDom:: */
-elementStart(void *pObj, const char *el, const char **attr)
+elementStart(void *pObj, const xmlchar *el, const xmlchar **attr)
 {
   XMLDom *pDom = (XMLDom *) pObj;
   int nTagLen;
@@ -392,12 +398,15 @@ elementStart(void *pObj, const char *el, const char **attr)
     pDom->m_pCur = p;
   }
 
-  nTagLen = strlen(el);
+  nTagLen = strlen((const char*)el);
   pTag = new char[nTagLen + 1];
   pDom->m_pCur->m_pNodeName = pTag;
   memcpy(pTag, el, nTagLen);
   pTag[nTagLen] = 0;
 
+  if (!attr)
+    return;
+
   int i;
   int nCount = 0;
   for (i = 0; attr[i]; i += 2)
@@ -410,13 +419,13 @@ elementStart(void *pObj, const char *el, const char **attr)
   pDom->m_pCur->m_pAList = new Attribute[nCount];
   for (i = 0; i < nCount; i++) {
     /* Attribute Name: attr[2*i], Value: attr[2*i+1]; */
-    alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAName, attr[2 * i]);
-    alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAValue, attr[2 * i + 1]);
+    alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAName, (const char*)attr[2 * i]);
+    alloc_and_copy(&pDom->m_pCur->m_pAList[i].pAValue, (const char*)attr[2 * i + 1]);
   }
 }
 
 void /*XMLDom:: */
-elementEnd(void *pObj, const char * /* el ATS_UNUSED */)
+elementEnd(void *pObj, const xmlchar * /* el ATS_UNUSED */)
 {
   /*ASSERT(strcmp(el, pCur->pNodeName) == 0); */
   XMLDom *pDom = (XMLDom *) pObj;
@@ -424,7 +433,7 @@ elementEnd(void *pObj, const char * /* el ATS_UNUSED */)
 }
 
 void /*XMLDom:: */
-charHandler(void *pObj, const char *s, int len)
+charHandler(void *pObj, const xmlchar *s, int len)
 {
   XMLNode *pNode = ((XMLDom *) pObj)->m_pCur;
   int oldlen;
@@ -450,6 +459,9 @@ charHandler(void *pObj, const char *s, int len)
 int
 XMLDom::LoadXML(const char *pXml)
 {
+  int rv = 0;
+#ifdef XML_EXPAT
+  /* Old (expat) xml parsing */
   XML_Parser p = XML_ParserCreate(NULL);
   if (!p)                       /* no Memory. */
     return 1;
@@ -466,11 +478,39 @@ XMLDom::LoadXML(const char *pXml)
                               /*(void (*)(void *, const char *, int)) */ charHandler
     );
 
-  if (!XML_Parse(p, pXml, strlen(pXml), 0)) {
-    /*return 2;     Parse Error: bad xml format. */
+  if (!XML_Parse(p, pXml, strlen(pXml), 1)) {
+    rv = 2; /*return 2;     Parse Error: bad xml format. */
   }
+  XML_ParserFree(p);
+
+#else
+  /* Alternative (libxml2) xml parsing */
+  xmlParserCtxtPtr p;
+  xmlSAXHandler sax;
+  memset(&sax, 0, sizeof(xmlSAXHandler));
+  sax.startElement = elementStart;
+  sax.endElement = elementEnd;
+  sax.characters = charHandler;
+  sax.initialized = 1;
+  p = xmlCreatePushParserCtxt(&sax, this, NULL, 0, NULL);
+  if (!p)                       /* no Memory. */
+    return 1;
 
-  return 0;
+  /* Looks as if this is actually the whole xml and expat version's
+   * use of 0 for is_final is a bug.  We'll use 1.
+   */
+  if (xmlParseChunk(p, pXml, strlen(pXml), 1) != 0) {
+    rv = 2; /* expat version ignores error, so we'll do likewise */
+  }
+  xmlFreeParserCtxt(p);
+#endif
+
+  /* I don't know why the "return 2" in the old (expat) version was
+   * commented out.  Possibly because the call to XML_Parse incorrectly
+   * used 0 for is_final last arg, in which case we should restore it.
+   * If it's a bug in the caller then we'll just have to return 0 always.
+   */
+  return rv;
 }
 
 int

Reply via email to