On 3/5/07, Nikolay Samokhvalov <[EMAIL PROTECTED]> wrote:
On 3/4/07, Nikolay Samokhvalov <[EMAIL PROTECTED]> wrote:
> I'll fix these issues and extend the patch with resgression tests and
> docs for xpath_array(). I'll resubmit it very soon.

Here is a new version of the patch. I didn't change any part of docs yet.
Since there were no objections I've changed the name of the function
to xmlpath().

Updated version of the patch contains bugfix: there were a problem
with path queries that pointed to elements (cases when a set of
document parts that correspond to subtrees should be returned).
Example is (included in regression test):

xmltest=# SELECT xmlpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
        xmlpath
-------------------------
{<b>two</b>,<b>etc</b>}
(1 row)

Waiting for more feedback, please check it.

--
Best regards,
Nikolay
Index: src/backend/utils/adt/xml.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/xml.c,v
retrieving revision 1.35
diff -u -r1.35 xml.c
--- src/backend/utils/adt/xml.c	15 Mar 2007 23:12:06 -0000	1.35
+++ src/backend/utils/adt/xml.c	18 Mar 2007 13:32:21 -0000
@@ -47,6 +47,8 @@
 #include <libxml/uri.h>
 #include <libxml/xmlerror.h>
 #include <libxml/xmlwriter.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
 #endif /* USE_LIBXML */
 
 #include "catalog/namespace.h"
@@ -67,6 +69,7 @@
 #include "utils/datetime.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
+#include "access/tupmacs.h"
 #include "utils/xml.h"
 
 
@@ -88,6 +91,7 @@
 static int		parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone);
 static bool		print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone);
 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, xmlChar *encoding);
+static text		*xml_xmlnodetoxmltype(xmlNodePtr cur);
 
 #endif /* USE_LIBXML */
 
@@ -1463,7 +1467,6 @@
 	return buf.data;
 }
 
-
 /*
  * Map SQL value to XML value; see SQL/XML:2003 section 9.16.
  */
@@ -2403,3 +2406,258 @@
 	else
 		appendStringInfoString(result, "</row>\n\n");
 }
+
+
+/*
+ * XPath related functions
+ */
+
+#ifdef USE_LIBXML
+/* 
+ * Convert XML node to text (dump subtree in case of element, return value otherwise)
+ */
+text *
+xml_xmlnodetoxmltype(xmlNodePtr cur)
+{
+	xmlChar    			*str;
+	xmltype				*result;
+	size_t				len;
+	xmlBufferPtr 		buf;
+	
+	if (cur->type == XML_ELEMENT_NODE)
+	{
+		buf = xmlBufferCreate();
+		xmlNodeDump(buf, NULL, cur, 0, 1);
+		result = xmlBuffer_to_xmltype(buf);
+		xmlBufferFree(buf);
+	}
+	else
+	{
+		str = xmlXPathCastNodeToString(cur);
+		len = strlen((char *) str);
+		result = (text *) palloc(len + VARHDRSZ);
+		SET_VARSIZE(result, len + VARHDRSZ);
+		memcpy(VARDATA(result), str, len);
+	}
+	
+	return result;
+}
+#endif
+
+/*
+ * Evaluate XPath expression and return array of XML values.
+ * As we have no support of XQuery sequences yet, this functions seems
+ * to be the most useful one (array of XML functions plays a role of
+ * some kind of substritution for XQuery sequences).
+
+ * Workaround here: we parse XML data in different way to allow XPath for
+ * fragments (see "XPath for fragment" TODO comment inside).
+ */
+Datum
+xmlpath(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+	ArrayBuildState		*astate = NULL;
+	xmlParserCtxtPtr	ctxt = NULL;
+	xmlDocPtr			doc = NULL;
+	xmlXPathContextPtr	xpathctx = NULL;
+	xmlXPathCompExprPtr	xpathcomp = NULL;
+	xmlXPathObjectPtr	xpathobj = NULL;
+	int32				len, xpath_len;
+	xmlChar				*string, *xpath_expr;
+	bool				res_is_null = FALSE;
+	int					i;
+	xmltype				*data;
+	text				*xpath_expr_text;
+	ArrayType			*namespaces;
+	int					*dims, ndims, ns_count = 0, bitmask = 1;
+	char				*ptr;
+	bits8				*bitmap;
+	char				**ns_names = NULL, **ns_uris = NULL;
+	int16				typlen;
+	bool				typbyval;
+	char				typalign;
+	
+	/* the function is not strict, we must check first two args */
+	if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
+		PG_RETURN_NULL();
+	
+	xpath_expr_text = PG_GETARG_TEXT_P(0);
+	data  = PG_GETARG_XML_P(1);
+	
+	/* Namespace mappings passed as text[].
+	 * Assume that 2-dimensional array has been passed, 
+	 * the 1st subarray is array of names, the 2nd -- array of URIs,
+	 * example: ARRAY[ARRAY['myns', 'myns2'], ARRAY['http://example.com', 'http://example2.com']]. 
+	 */
+	if (!PG_ARGISNULL(2))
+	{
+		namespaces = PG_GETARG_ARRAYTYPE_P(2);
+		ndims = ARR_NDIM(namespaces);
+		dims = ARR_DIMS(namespaces);
+		
+		/* Sanity check */
+		if (ndims != 2)
+			ereport(ERROR, (errmsg("invalid array passed for namespace mappings"),
+							errdetail("Only 2-dimensional array may be used for namespace mappings.")));
+		
+		Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
+		
+		ns_count = ArrayGetNItems(ndims, dims) / 2;
+		get_typlenbyvalalign(ARR_ELEMTYPE(namespaces),
+							 &typlen, &typbyval, &typalign);
+		ns_names = (char **) palloc(ns_count * sizeof(char *));
+		ns_uris = (char **) palloc(ns_count * sizeof(char *));
+		ptr = ARR_DATA_PTR(namespaces);
+		bitmap = ARR_NULLBITMAP(namespaces);
+		bitmask = 1;
+		
+		for (i = 0; i < ns_count * 2; i++)
+		{
+			if (bitmap && (*bitmap & bitmask) == 0)
+				ereport(ERROR, (errmsg("neither namespace nor URI may be NULL"))); /* TODO: better message */
+			else
+			{
+				if (i < ns_count)
+					ns_names[i] = DatumGetCString(DirectFunctionCall1(textout,
+														  PointerGetDatum(ptr)));
+				else
+					ns_uris[i - ns_count] = DatumGetCString(DirectFunctionCall1(textout,
+														  PointerGetDatum(ptr)));
+				ptr = att_addlength(ptr, typlen, PointerGetDatum(ptr));
+				ptr = (char *) att_align(ptr, typalign);
+			}
+	
+			/* advance bitmap pointer if any */
+			if (bitmap)
+			{
+				bitmask <<= 1;
+				if (bitmask == 0x100)
+				{
+					bitmap++;
+					bitmask = 1;
+				}
+			}
+		}
+	}
+	
+	len = VARSIZE(data) - VARHDRSZ;
+	xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
+	if (xpath_len == 0)
+		ereport(ERROR, (errmsg("empty XPath expression")));
+	
+	if (xmlStrncmp((xmlChar *) VARDATA(data), (xmlChar *) "<?xml", 5) == 0)
+	{
+		string = palloc(len + 1);
+		memcpy(string, VARDATA(data), len);
+		string[len] = '\0';
+		xpath_expr = palloc(xpath_len + 1);
+		memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
+		xpath_expr[xpath_len] = '\0';
+	}
+	else
+	{
+		/* use "<x>...</x>" as dummy root element to enable XPath for fragments */
+		/* TODO: (XPath for fragment) find better solution to work with XML fragment! */
+		string = xmlStrncatNew((xmlChar *) "<x>", (xmlChar *) VARDATA(data), len);
+		string = xmlStrncat(string, (xmlChar *) "</x>", 5);
+		len += 7;
+		xpath_expr = xmlStrncatNew((xmlChar *) "/x", (xmlChar *) VARDATA(xpath_expr_text), xpath_len);
+		len += 2;
+	}
+	
+	xml_init();
+
+	PG_TRY();
+	{
+		/* redundant XML parsing (two parsings for the same value in the same session are possible) */
+		ctxt = xmlNewParserCtxt();
+		if (ctxt == NULL)
+			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+						"could not allocate parser context");
+		doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
+		if (doc == NULL)
+			xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+						"could not parse XML data");
+		xpathctx = xmlXPathNewContext(doc);
+		if (xpathctx == NULL)
+			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+						"could not allocate XPath context");
+		xpathctx->node = xmlDocGetRootElement(doc);
+		if (xpathctx->node == NULL)
+			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+						"could not find root XML element"); 
+
+		/* register namespaces, if any */
+		if ((ns_count > 0) && ns_names && ns_uris)
+			for (i = 0; i < ns_count; i++)
+				if (0 != xmlXPathRegisterNs(xpathctx, (xmlChar *) ns_names[i], (xmlChar *) ns_uris[i]))
+					ereport(ERROR, 
+						(errmsg("could not register XML namespace with prefix=\"%s\" and href=\"%s\"", ns_names[i], ns_uris[i])));
+		
+		xpathcomp = xmlXPathCompile(xpath_expr);
+		if (xpathcomp == NULL)
+			xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+						"invalid XPath expression"); /* TODO: show proper XPath error details */
+		
+		xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
+		xmlXPathFreeCompExpr(xpathcomp);
+		if (xpathobj == NULL)
+			ereport(ERROR, (errmsg("could not create XPath object")));
+		
+		if (xpathobj->nodesetval == NULL)
+			res_is_null = TRUE;
+		
+		if (!res_is_null && xpathobj->nodesetval->nodeNr == 0)
+			/* TODO maybe empty array should be here, not NULL? (if so -- fix segfault) */
+			/*PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));*/
+			res_is_null = TRUE;
+		
+		if (!res_is_null) 
+			for (i = 0; i < xpathobj->nodesetval->nodeNr; i++)
+			{
+				Datum		elem;
+				bool		elemisnull = false;
+				elem = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
+				astate = accumArrayResult(astate, elem,
+										  elemisnull, XMLOID,
+										  CurrentMemoryContext);
+			}
+		
+		xmlXPathFreeObject(xpathobj);
+		xmlXPathFreeContext(xpathctx);
+		xmlFreeParserCtxt(ctxt);
+		xmlFreeDoc(doc);
+		xmlCleanupParser();
+	}
+	PG_CATCH();
+	{
+		if (xpathcomp)
+			xmlXPathFreeCompExpr(xpathcomp);
+		if (xpathobj)
+			xmlXPathFreeObject(xpathobj);
+		if (xpathctx)
+			xmlXPathFreeContext(xpathctx);
+		if (doc)
+			xmlFreeDoc(doc);
+		if (ctxt)
+			xmlFreeParserCtxt(ctxt);
+		xmlCleanupParser();
+
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+	
+	if (res_is_null)
+	{
+		PG_RETURN_NULL();
+	}
+	else
+	{
+		PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
+	}
+#else
+	NO_XML_SUPPORT();
+	return 0;
+#endif
+}
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.448
diff -u -r1.448 pg_proc.h
--- src/include/catalog/pg_proc.h	16 Mar 2007 17:57:36 -0000	1.448
+++ src/include/catalog/pg_proc.h	18 Mar 2007 13:32:21 -0000
@@ -4083,6 +4083,10 @@
 DATA(insert OID = 2930 (  query_to_xml_and_xmlschema  PGNSP PGUID 12 100 0 f f t f s 4 142 "25 16 16 25" _null_ _null_ "{query,nulls,tableforest,targetns}" query_to_xml_and_xmlschema - _null_ ));
 DESCR("map query result and structure to XML and XML Schema");
 
+DATA(insert OID = 2931 (  xmlpath      PGNSP PGUID 12 1 0 f f f f i 3 143 "25 142 1009" _null_ _null_ _null_ xmlpath - _null_ ));
+DESCR("evaluate XPath expression, with namespaces support");
+DATA(insert OID = 2932 (  xmlpath      PGNSP PGUID 14 1 0 f f f f i 2 143 "25 142" _null_ _null_ _null_ "select pg_catalog.xmlpath($1, $2, NULL)" - _null_ ));
+DESCR("evaluate XPath expression");
 
 /* uuid */ 
 DATA(insert OID = 2952 (  uuid_in		   PGNSP PGUID 12 1 0 f f t f i 1 2950 "2275" _null_ _null_ _null_ uuid_in - _null_ ));
Index: src/test/regress/expected/xml_1.out
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/expected/xml_1.out,v
retrieving revision 1.13
diff -u -r1.13 xml_1.out
--- src/test/regress/expected/xml_1.out	15 Feb 2007 05:05:03 -0000	1.13
+++ src/test/regress/expected/xml_1.out	18 Mar 2007 13:32:21 -0000
@@ -197,3 +197,20 @@
  xmlview5   | SELECT XMLPARSE(CONTENT '<abc>x</abc>'::text STRIP WHITESPACE) AS "xmlparse";
 (2 rows)
 
+-- Text XPath expressions evaluation
+SELECT xmlpath('/value', data) FROM xmltest;
+ xmlpath 
+---------
+(0 rows)
+
+SELECT xmlpath(NULL, NULL) IS NULL FROM xmltest;
+ERROR:  no XML support in this installation
+CONTEXT:  SQL function "xmlpath" statement 1
+SELECT xmlpath('', '<!-- error -->');
+ERROR:  no XML support in this installation
+SELECT xmlpath('//text()', '<local:data xmlns:local="http://127.0.0.1";><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+ERROR:  no XML support in this installation
+SELECT xmlpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1";><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc'], ARRAY['http://127.0.0.1']]);
+ERROR:  no XML support in this installation
+SELECT xmlpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
+ERROR:  no XML support in this installation
Index: src/test/regress/expected/xml.out
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/expected/xml.out,v
retrieving revision 1.15
diff -u -r1.15 xml.out
--- src/test/regress/expected/xml.out	15 Feb 2007 05:05:03 -0000	1.15
+++ src/test/regress/expected/xml.out	18 Mar 2007 13:32:21 -0000
@@ -401,3 +401,39 @@
  xmlview9   | SELECT XMLSERIALIZE(CONTENT 'good'::"xml" AS text) AS "xmlserialize";
 (9 rows)
 
+-- Text XPath expressions evaluation
+SELECT xmlpath('/value', data) FROM xmltest;
+       xmlpath        
+----------------------
+ {<value>one</value>}
+ {<value>two</value>}
+(2 rows)
+
+SELECT xmlpath(NULL, NULL) IS NULL FROM xmltest;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlpath('', '<!-- error -->');
+ERROR:  empty XPath expression
+CONTEXT:  SQL function "xmlpath" statement 1
+SELECT xmlpath('//text()', '<local:data xmlns:local="http://127.0.0.1";><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+    xmlpath     
+----------------
+ {"number one"}
+(1 row)
+
+SELECT xmlpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1";><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc'], ARRAY['http://127.0.0.1']]);
+ xmlpath 
+---------
+ {1,2}
+(1 row)
+
+SELECT xmlpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
+         xmlpath         
+-------------------------
+ {<b>two</b>,<b>etc</b>}
+(1 row)
+
Index: src/include/utils/xml.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/utils/xml.h,v
retrieving revision 1.16
diff -u -r1.16 xml.h
--- src/include/utils/xml.h	16 Feb 2007 07:46:55 -0000	1.16
+++ src/include/utils/xml.h	18 Mar 2007 13:32:21 -0000
@@ -36,6 +36,7 @@
 extern Datum texttoxml(PG_FUNCTION_ARGS);
 extern Datum xmltotext(PG_FUNCTION_ARGS);
 extern Datum xmlvalidate(PG_FUNCTION_ARGS);
+extern Datum xmlpath(PG_FUNCTION_ARGS);
 
 extern Datum table_to_xml(PG_FUNCTION_ARGS);
 extern Datum query_to_xml(PG_FUNCTION_ARGS);
Index: src/test/regress/sql/xml.sql
===================================================================
RCS file: /projects/cvsroot/pgsql/src/test/regress/sql/xml.sql,v
retrieving revision 1.12
diff -u -r1.12 xml.sql
--- src/test/regress/sql/xml.sql	15 Feb 2007 05:05:03 -0000	1.12
+++ src/test/regress/sql/xml.sql	18 Mar 2007 13:32:21 -0000
@@ -144,3 +144,12 @@
 
 SELECT table_name, view_definition FROM information_schema.views
   WHERE table_name LIKE 'xmlview%' ORDER BY 1;
+
+-- Text XPath expressions evaluation
+
+SELECT xmlpath('/value', data) FROM xmltest;
+SELECT xmlpath(NULL, NULL) IS NULL FROM xmltest;
+SELECT xmlpath('', '<!-- error -->');
+SELECT xmlpath('//text()', '<local:data xmlns:local="http://127.0.0.1";><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+SELECT xmlpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1";><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc'], ARRAY['http://127.0.0.1']]);
+SELECT xmlpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
---------------------------(end of broadcast)---------------------------
TIP 5: don't forget to increase your free space map settings

Reply via email to