Hi,

I'd like to show you a patch which adds the functionality to store and retrieve the columns of xml nodes.

As I said in a previous email[1] me, and probably other people are in need of this to highlight a certain text area in an xml document.

For example, if I have an xml document

    <document><node1/></document>

and an xpath expression

    //node1

I'd like to be able to highlight the corresponding part of the document. Without this patch, it was impossible, because the parser did not save the column information.


I applied the following changes in this patch:

- added a new member to _xmlNode called column. I know that this breaks ABI compability, but I think this is an important feature and needs to be added soon. - increased the type size of _xmlNode->line from (unsigned short) to (unsigned int). This is what the idea behind "XML_PARSE_BIG_LINES" is. ( can you tell me why it has been an unsigned short in the first place ? ) - added a function xmlGetColumnNo() which is the counterpart to xmlGetLineNo()
- wrote a unittest for xmlGetColumnNo()


I hope for any feedback and a soon integration.

Thanks,
Daniel Nagy

[1] https://mail.gnome.org/archives/xml/2012-February/msg00045.html
diff --git a/HTMLparser.c b/HTMLparser.c
index 09a9a4b..bb521a3 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -4645,6 +4645,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
     }
     ctxt->html = 1;
     ctxt->linenumbers = 1;
+    ctxt->columnnumbers = 1;
     GROW;
     /*
      * SAX: beginning of the document processing.
@@ -4852,6 +4853,7 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
     ctxt->wellFormed = 1;
     ctxt->replaceEntities = 0;
     ctxt->linenumbers = xmlLineNumbersDefaultValue;
+    ctxt->columnnumbers = xmlColumnNumbersDefaultValue;
     ctxt->html = 1;
     ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
     ctxt->vctxt.userData = ctxt;
diff --git a/SAX2.c b/SAX2.c
index 94db7da..d735384 100644
--- a/SAX2.c
+++ b/SAX2.c
@@ -1626,10 +1626,19 @@ xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
     ctxt->nodemem = -1;
     if (ctxt->linenumbers) {
 	if (ctxt->input != NULL) {
-	    if (ctxt->input->line < 65535)
-		ret->line = (short) ctxt->input->line;
+	    if (ctxt->input->line < UINT_MAX)
+		ret->line = (unsigned int) ctxt->input->line;
+	    else
+	        ret->line = UINT_MAX;
+	}
+    }
+    
+    if (ctxt->columnnumbers) {
+	if (ctxt->input != NULL) {
+	    if (ctxt->input->col < UINT_MAX)
+		ret->column = (unsigned int) ctxt->input->col;
 	    else
-	        ret->line = 65535;
+	        ret->column = UINT_MAX;
 	}
     }
 
@@ -1884,15 +1893,27 @@ skip:
 
     if (ctxt->linenumbers) {
 	if (ctxt->input != NULL) {
-	    if (ctxt->input->line < 65535)
+	    if (ctxt->input->line < UINT_MAX)
 		ret->line = (short) ctxt->input->line;
 	    else {
-	        ret->line = 65535;
+	        ret->line = UINT_MAX;
 		if (ctxt->options & XML_PARSE_BIG_LINES)
 		    ret->psvi = (void *) ctxt->input->line;
 	    }
 	}
     }
+    
+    if (ctxt->columnnumbers) {
+	if (ctxt->input != NULL) {
+	    if (ctxt->input->col < UINT_MAX)
+		ret->column = (short) ctxt->input->col;
+	    else {
+	        ret->column = UINT_MAX;
+		//if (ctxt->options & XML_PARSE_BIG_LINES)
+		//    ret->psvi = (void *) ctxt->input->column;
+	    }
+	}
+    }
 
     if ((__xmlRegisterCallbacks) && (xmlRegisterNodeDefaultValue))
 	xmlRegisterNodeDefaultValue(ret);
@@ -2261,10 +2282,19 @@ xmlSAX2StartElementNs(void *ctx,
     }
     if (ctxt->linenumbers) {
 	if (ctxt->input != NULL) {
-	    if (ctxt->input->line < 65535)
-		ret->line = (short) ctxt->input->line;
+	    if (ctxt->input->line < UINT_MAX)
+		ret->line = (unsigned int) ctxt->input->line;
 	    else
-	        ret->line = 65535;
+	        ret->line = UINT_MAX;
+	}
+    }
+    
+    if (ctxt->columnnumbers) {
+	if (ctxt->input != NULL) {
+	    if (ctxt->input->col < UINT_MAX)
+		ret->column = (unsigned int) ctxt->input->col;
+	    else
+	        ret->column = UINT_MAX;
 	}
     }
 
@@ -2653,12 +2683,22 @@ xmlSAX2ProcessingInstruction(void *ctx, const xmlChar *target,
 
     if (ctxt->linenumbers) {
 	if (ctxt->input != NULL) {
-	    if (ctxt->input->line < 65535)
+	    if (ctxt->input->line < UINT_MAX)
 		ret->line = (short) ctxt->input->line;
 	    else
-	        ret->line = 65535;
+	        ret->line = UINT_MAX;
+	}
+    }
+    
+    if (ctxt->columnnumbers) {
+	if (ctxt->input != NULL) {
+	    if (ctxt->input->col < UINT_MAX)
+		ret->column = (short) ctxt->input->col;
+	    else
+	        ret->column = UINT_MAX;
 	}
     }
+    
     if (ctxt->inSubset == 1) {
 	xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret);
 	return;
@@ -2713,10 +2753,19 @@ xmlSAX2Comment(void *ctx, const xmlChar *value)
     if (ret == NULL) return;
     if (ctxt->linenumbers) {
 	if (ctxt->input != NULL) {
-	    if (ctxt->input->line < 65535)
+	    if (ctxt->input->line < UINT_MAX)
 		ret->line = (short) ctxt->input->line;
 	    else
-	        ret->line = 65535;
+	        ret->line = UINT_MAX;
+	}
+    }
+    
+    if (ctxt->columnnumbers) {
+	if (ctxt->input != NULL) {
+	    if (ctxt->input->col < UINT_MAX)
+		ret->column = (short) ctxt->input->col;
+	    else
+	        ret->column = UINT_MAX;
 	}
     }
 
diff --git a/globals.c b/globals.c
index 69002f0..0584025 100644
--- a/globals.c
+++ b/globals.c
@@ -155,6 +155,7 @@ xmlStrdupFunc xmlMemStrdup = (xmlStrdupFunc) xmlStrdup;
 #undef  xmlTreeIndentString
 #undef	xmlKeepBlanksDefaultValue
 #undef	xmlLineNumbersDefaultValue
+#undef	xmlColumnNumbersDefaultValue
 #undef	xmlLoadExtDtdDefaultValue
 #undef	xmlParserDebugEntities
 #undef	xmlParserVersion
@@ -253,6 +254,16 @@ static int xmlPedanticParserDefaultValueThrDef = 0;
 int xmlLineNumbersDefaultValue = 0;
 static int xmlLineNumbersDefaultValueThrDef = 0;
 /**
+ * xmlColumnNumbersDefaultValue:
+ *
+ * Global setting, indicate that the parser should store the column number
+ * in the content field of elements in the DOM tree. 
+ * Disabled by default since this may not be safe for old classes of
+ * applicaton.
+ */
+int xmlColumnNumbersDefaultValue = 0;
+static int xmlColumnNumbersDefaultValueThrDef = 0;
+/**
  * xmlKeepBlanksDefaultValue:
  *
  * Global setting, indicate that the parser should keep all blanks
@@ -980,6 +991,23 @@ int xmlThrDefLineNumbersDefaultValue(int v) {
     return ret;
 }
 
+#undef	xmlColumnNumbersDefaultValue
+int *
+__xmlColumnNumbersDefaultValue(void) {
+    if (IS_MAIN_THREAD)
+	return (&xmlColumnNumbersDefaultValue);
+    else
+	return (&xmlGetGlobalState()->xmlColumnNumbersDefaultValue);
+}
+int xmlThrDefColumnNumbersDefaultValue(int v) {
+    int ret;
+    xmlMutexLock(xmlThrDefMutex);
+    ret = xmlColumnNumbersDefaultValueThrDef;
+    xmlColumnNumbersDefaultValueThrDef = v;
+    xmlMutexUnlock(xmlThrDefMutex);
+    return ret;
+}
+
 #undef	xmlLoadExtDtdDefaultValue
 int *
 __xmlLoadExtDtdDefaultValue(void) {
diff --git a/include/libxml/globals.h b/include/libxml/globals.h
index 9d688e0..d072924 100644
--- a/include/libxml/globals.h
+++ b/include/libxml/globals.h
@@ -82,6 +82,7 @@ XMLCALL xmlOutputBufferCreateFilenameDefault (xmlOutputBufferCreateFilenameFunc
 #undef  xmlTreeIndentString
 #undef	xmlKeepBlanksDefaultValue
 #undef	xmlLineNumbersDefaultValue
+#undef	xmlColumnNumbersDefaultValue
 #undef	xmlLoadExtDtdDefaultValue
 #undef	xmlMalloc
 #undef	xmlMallocAtomic
@@ -143,6 +144,7 @@ struct _xmlGlobalState
 	int xmlGetWarningsDefaultValue;
 	int xmlKeepBlanksDefaultValue;
 	int xmlLineNumbersDefaultValue;
+	int xmlColumnNumbersDefaultValue;
 	int xmlLoadExtDtdDefaultValue;
 	int xmlParserDebugEntities;
 	int xmlPedanticParserDefaultValue;
@@ -410,6 +412,15 @@ XMLPUBVAR int xmlLineNumbersDefaultValue;
 #endif
 XMLPUBFUN int XMLCALL xmlThrDefLineNumbersDefaultValue(int v);
 
+XMLPUBFUN int * XMLCALL __xmlColumnNumbersDefaultValue(void);
+#ifdef LIBXML_THREAD_ENABLED
+#define xmlColumnNumbersDefaultValue \
+(*(__xmlColumnNumbersDefaultValue()))
+#else
+XMLPUBVAR int xmlColumnNumbersDefaultValue;
+#endif
+XMLPUBFUN int XMLCALL xmlThrDefColumnNumbersDefaultValue(int v);
+
 XMLPUBFUN int * XMLCALL __xmlLoadExtDtdDefaultValue(void);
 #ifdef LIBXML_THREAD_ENABLED
 #define xmlLoadExtDtdDefaultValue \
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index 1f11fd9..987f16a 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -257,6 +257,7 @@ struct _xmlParserCtxt {
 
     int                loadsubset;    /* should the external subset be loaded */
     int                linenumbers;   /* set line number in element content */
+    int                columnnumbers; /* set column number in element content */
     void              *catalogs;      /* document's own catalog */
     int                recovery;      /* run in recovery mode */
     int                progressive;   /* is this a progressive parsing */
diff --git a/include/libxml/tree.h b/include/libxml/tree.h
index 68f92f9..7e18a01 100644
--- a/include/libxml/tree.h
+++ b/include/libxml/tree.h
@@ -502,7 +502,8 @@ struct _xmlNode {
     struct _xmlAttr *properties;/* properties list */
     xmlNs           *nsDef;     /* namespace definitions on this node */
     void            *psvi;	/* for type/PSVI informations */
-    unsigned short   line;	/* line number */
+    unsigned int    line;     /* line number */
+    unsigned int    column;   /* column index */
     unsigned short   extra;	/* extra data for XPath/XSLT */
 };
 
@@ -907,6 +908,8 @@ XMLPUBFUN xmlNodePtr XMLCALL
  */
 XMLPUBFUN long XMLCALL		
 		xmlGetLineNo		(xmlNodePtr node);
+XMLPUBFUN long XMLCALL		
+		xmlGetColumnNo			(xmlNodePtr node);
 #if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_DEBUG_ENABLED)
 XMLPUBFUN xmlChar * XMLCALL	
 		xmlGetNodePath		(xmlNodePtr node);
diff --git a/parser.c b/parser.c
index 9a57b01..0288d66 100644
--- a/parser.c
+++ b/parser.c
@@ -12880,6 +12880,7 @@ xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
     ctxt->attsDefault = ctx->attsDefault;
     ctxt->attsSpecial = ctx->attsSpecial;
     ctxt->linenumbers = ctx->linenumbers;
+    ctxt->columnnumbers = ctx->columnnumbers;
 
     xmlParseContent(ctxt);
 
@@ -13960,6 +13961,7 @@ xmlCreateURLParserCtxt(const char *filename, int options)
     if (options)
 	xmlCtxtUseOptionsInternal(ctxt, options, NULL);
     ctxt->linenumbers = 1;
+    ctxt->columnnumbers = 1;
 
     inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
     if (inputStream == NULL) {
@@ -15011,6 +15013,7 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encodi
         options -= XML_PARSE_BIG_LINES;
     }
     ctxt->linenumbers = 1;
+    ctxt->columnnumbers = 1;
     return (options);
 }
 
diff --git a/parserInternals.c b/parserInternals.c
index b8d5bbc..b8fc1d0 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -1694,6 +1694,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
     ctxt->validate = xmlDoValidityCheckingDefaultValue;
     ctxt->pedantic = xmlPedanticParserDefaultValue;
     ctxt->linenumbers = xmlLineNumbersDefaultValue;
+    ctxt->columnnumbers = xmlColumnNumbersDefaultValue;
     ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
     if (ctxt->keepBlanks == 0)
 	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
diff --git a/testapi.c b/testapi.c
index deb8cca..e20a333 100644
--- a/testapi.c
+++ b/testapi.c
@@ -20580,6 +20580,37 @@ test_xmlGetLineNo(void) {
     return(test_ret);
 }
 
+static int
+test_xmlGetColumnNo(void) {
+    int test_ret = 0;
+
+    int mem_base;
+    long ret_val;
+    xmlNodePtr node; /* valid node */
+    int n_node;
+
+    for (n_node = 0;n_node < gen_nb_xmlNodePtr;n_node++) {
+        mem_base = xmlMemBlocks();
+        node = gen_xmlNodePtr(n_node, 0);
+
+        ret_val = xmlGetColumnNo(node);
+        desret_long(ret_val);
+        call_tests++;
+        des_xmlNodePtr(n_node, node, 0);
+        xmlResetLastError();
+        if (mem_base != xmlMemBlocks()) {
+            printf("Leak of %d blocks found in xmlGetColumnNo",
+	           xmlMemBlocks() - mem_base);
+	    test_ret++;
+            printf(" %d", n_node);
+            printf("\n");
+        }
+    }
+    function_tests++;
+
+    return(test_ret);
+}
+
 
 static int
 test_xmlGetNoNsProp(void) {
@@ -24245,6 +24276,7 @@ test_tree(void) {
     test_ret += test_xmlGetIntSubset();
     test_ret += test_xmlGetLastChild();
     test_ret += test_xmlGetLineNo();
+	test_ret += test_xmlGetColumnNo();
     test_ret += test_xmlGetNoNsProp();
     test_ret += test_xmlGetNodePath();
     test_ret += test_xmlGetNsList();
diff --git a/tree.c b/tree.c
index df6f608..7a76a5d 100644
--- a/tree.c
+++ b/tree.c
@@ -4192,6 +4192,7 @@ xmlStaticCopyNode(const xmlNodePtr node, xmlDocPtr doc, xmlNodePtr parent,
     }else{
       if (node->type == XML_ELEMENT_NODE)
         ret->line = node->line;
+        ret->column = node->column;
     }
     if (parent != NULL) {
 	xmlNodePtr tmp;
@@ -4551,7 +4552,7 @@ xmlGetLineNoInternal(xmlNodePtr node, int depth)
         (node->type == XML_TEXT_NODE) ||
 	(node->type == XML_COMMENT_NODE) ||
 	(node->type == XML_PI_NODE)) {
-	if (node->line == 65535) {
+	if (node->line == UINT_MAX) {
 	    if ((node->type == XML_TEXT_NODE) && (node->psvi != NULL))
 	        result = (long) node->psvi;
 	    else if ((node->type == XML_ELEMENT_NODE) &&
@@ -4562,7 +4563,7 @@ xmlGetLineNoInternal(xmlNodePtr node, int depth)
 	    else if (node->prev != NULL)
 	        result = xmlGetLineNoInternal(node->prev, depth + 1);
 	}
-	if ((result == -1) || (result == 65535))
+	if ((result == -1) || (result == UINT_MAX))
 	    result = (long) node->line;
     } else if ((node->prev != NULL) &&
              ((node->prev->type == XML_ELEMENT_NODE) ||
@@ -4593,6 +4594,40 @@ xmlGetLineNo(xmlNodePtr node)
     return(xmlGetLineNoInternal(node, 0));
 }
 
+/**
+ * xmlGetColumnNo:
+ * @node: valid node
+ *
+ * Get column number of @node. This requires activation of this option
+ * before invoking the parser by calling xmlColumnNumbersDefault(1)
+ *
+ * Returns the col number if successful, -1 otherwise
+ */
+long
+xmlGetColumnNo(xmlNodePtr node)
+{
+    long result = -1;
+
+    if (!node)
+        return result;
+    if ((node->type == XML_ELEMENT_NODE) ||
+        (node->type == XML_TEXT_NODE) ||
+	(node->type == XML_COMMENT_NODE) ||
+	(node->type == XML_PI_NODE))
+        result = (long) node->column;
+    else if ((node->prev != NULL) &&
+             ((node->prev->type == XML_ELEMENT_NODE) ||
+	      (node->prev->type == XML_TEXT_NODE) ||
+	      (node->prev->type == XML_COMMENT_NODE) ||
+	      (node->prev->type == XML_PI_NODE)))
+        result = xmlGetColumnNo(node->prev);
+    else if ((node->parent != NULL) &&
+             (node->parent->type == XML_ELEMENT_NODE))
+        result = xmlGetColumnNo(node->parent);
+
+    return result;
+}
+
 #if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_DEBUG_ENABLED)
 /**
  * xmlGetNodePath:
@@ -9143,6 +9178,7 @@ ns_end:
 		if (cur->type == XML_ELEMENT_NODE) {
 		    cur->psvi = NULL;
 		    cur->line = 0;
+			cur->column = 0;
 		    cur->extra = 0;
 		    /*
 		    * Walk attributes.
diff --git a/xmlreader.c b/xmlreader.c
index c6ca46e..97771ad 100644
--- a/xmlreader.c
+++ b/xmlreader.c
@@ -2140,6 +2140,7 @@ xmlNewTextReader(xmlParserInputBufferPtr input, const char *URI) {
     ret->ctxt->parseMode = XML_PARSE_READER;
     ret->ctxt->_private = ret;
     ret->ctxt->linenumbers = 1;
+    ret->ctxt->columnnumbers = 1;
     ret->ctxt->dictNames = 1;
     ret->allocs = XML_TEXTREADER_CTXT;
     /*
@@ -5217,6 +5218,7 @@ xmlTextReaderSetup(xmlTextReaderPtr reader,
     }
     reader->ctxt->_private = reader;
     reader->ctxt->linenumbers = 1;
+    reader->ctxt->columnnumbers = 1;
     reader->ctxt->dictNames = 1;
     /*
      * use the parser dictionnary to allocate all elements and attributes names
diff --git a/xmlschemas.c b/xmlschemas.c
index c3a080f..0d32870 100644
--- a/xmlschemas.c
+++ b/xmlschemas.c
@@ -28785,6 +28785,7 @@ xmlSchemaValidateStream(xmlSchemaValidCtxtPtr ctxt,
         xmlCtxtUseOptions(pctxt, options);
 #endif
     pctxt->linenumbers = 1;
+    pctxt->columnnumbers = 1;
     xmlSchemaValidateSetLocator(ctxt, xmlSchemaValidateStreamLocator, pctxt);
 
     inputStream = xmlNewIOInputStream(pctxt, input, enc);;
_______________________________________________
xml mailing list, project page  http://xmlsoft.org/
[email protected]
https://mail.gnome.org/mailman/listinfo/xml

Reply via email to