2014-11-05 21:50 GMT+07:00 Ali Akbar <[email protected]>:
> 2014-11-04 22:16 GMT+07:00 Peter Eisentraut <[email protected]>:
>
>> I think the problem this patch is addressing is real, and your approach
>>> is sound, but I'd ask you to go back to the xmlCopyNode() version, and
>>> try to add a test case for why the second argument = 1 is necessary. I
>>> don't see any other problems.
>>>
>>
>> OK. Because upstream code is fixed in current version, i'll revert to the
>> previous version. Test case added to regression test. With =1 argument, the
>> result is correct:
>> <local:piece xmlns:local=\"http://127.0.0.1\" xmlns=\"http://127.0.0.2\"
>> id=\"1\">
>> <internal>number one</internal>
>> <internal2/>
>> </local:piece>
>>
>> without the argument, the result is not correct, all children will be
>> lost. Because of that, the other regression test will fail too because the
>> children is not copied:
>> *** 584,593 ****
>>
>> -- Text XPath expressions evaluation
>> SELECT xpath('/value', data) FROM xmltest;
>> ! xpath
>> ! ----------------------
>> ! {<value>one</value>}
>> ! {<value>two</value>}
>> (2 rows)
>>
>> SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
>> --- 584,593 ----
>>
>> -- Text XPath expressions evaluation
>> SELECT xpath('/value', data) FROM xmltest;
>> ! xpath
>> ! ------------
>> ! {<value/>}
>> ! {<value/>}
>> (2 rows)
>>
>> SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
>> ***************
>> ... <cut>
>>
>> updated patch attached.
>>
>
> I noticed somewhat big performance regression if the xml is large (i use
> PRODML Product Volume sample from energistics.org):
> * Without patch (tested 3 times):
> select unnest(xpath('//a:flow', x, ARRAY[['a','
> http://www.prodml.org/schemas/1series']])) from u;
>
> unnest
>
> -----------------------------------------------------------------------------------------------
> <flow>
> +
> <kind>gas
> lift</kind> +
> ...
> Time: 84,012 ms
> Time: 85,683 ms
> Time: 88,766 ms
>
>
> * With latest v6 patch (notice the correct result with namespace
> definition):
>
> select unnest(xpath('//a:flow', x, ARRAY[['a','
> http://www.prodml.org/schemas/1series']])) from u;
>
> unnest
>
> -----------------------------------------------------------------------------------------------
> <flow xmlns="http://www.prodml.org/schemas/1series">
> +
> ...
> Time: 108,912 ms
> Time: 108,267 ms
> Time: 114,848 ms
>
>
> It's 23% performance regression.
>
> * Just curious, i'm also testing v5 patch performance (notice the
> namespace in the result):
> select unnest(xpath('//a:flow', x, ARRAY[['a','
> http://www.prodml.org/schemas/1series']])) from u;
>
> unnest
>
> -----------------------------------------------------------------------------------------------
> <flow xmlns="http://www.prodml.org/schemas/1series">
> +
> <kind>gas
> lift</kind> +
> Time: 92,552 ms
> Time: 97,440 ms
> Time: 99,309 ms
>
> The regression is only 13%. I know the xmlCopyNode() version (v6 patch) is
> much more cleaner than v5patch, should we consider the performance benefit?
>
> Anyway, thanks for the review! :)
>
commit bac2739 in master by Tom Lane changes *astate definition
in xml_xpathobjtoxmlarray, this attached v7 patch rebases v6 patch with
latest master.
For performance comparison, i also rebased the v5 patch attached with name
v5-141126.patch
--
Ali Akbar
*** a/src/backend/utils/adt/xml.c
--- b/src/backend/utils/adt/xml.c
***************
*** 141,149 **** static bool print_xml_decl(StringInfo buf, const xmlChar *version,
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
--- 141,151 ----
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur,
! PgXmlErrorContext *xmlerrcxt);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate,
! PgXmlErrorContext *xmlerrcxt);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
***************
*** 3594,3620 **** SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename,
#ifdef USE_LIBXML
/*
* Convert XML node to text (dump subtree in case of element,
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
buf = xmlBufferCreate();
PG_TRY();
{
xmlNodeDump(buf, NULL, cur, 0, 1);
result = xmlBuffer_to_xmltype(buf);
}
PG_CATCH();
{
xmlBufferFree(buf);
PG_RE_THROW();
}
--- 3596,3706 ----
#ifdef USE_LIBXML
+ /* check ns definition of node and its childrens. If any one of ns is
+ * not defined in node and it's children, but in the node's parent,
+ * copy the definition to node.
+ */
+ static void
+ xml_checkandcopyns(xmlNodePtr root,
+ PgXmlErrorContext *xmlerrcxt,
+ xmlNodePtr node,
+ xmlNsPtr lastns_before)
+ {
+ xmlNsPtr ns = NULL;
+ xmlNsPtr cur_ns;
+ xmlNodePtr cur;
+
+ if (node->ns != NULL)
+ {
+ /* check in new nses */
+ cur_ns = lastns_before == NULL ? node->nsDef : lastns_before->next;
+ while (cur_ns != NULL)
+ {
+ if (cur_ns->href != NULL)
+ {
+ if (((cur_ns->prefix == NULL) && (node->ns->prefix == NULL)) ||
+ ((cur_ns->prefix != NULL) && (node->ns->prefix != NULL) &&
+ xmlStrEqual(cur_ns->prefix, node->ns->prefix)))
+ {
+ ns = cur_ns;
+ break;
+ }
+ }
+ cur_ns = cur_ns->next;
+ }
+ if (ns == NULL) /* not in new nses */
+ {
+ ns = xmlSearchNs(NULL, node->parent, node->ns->prefix);
+
+ if (ns != NULL)
+ {
+ ns = xmlNewNs(root, ns->href, ns->prefix);
+
+ if (ns == NULL && xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlNs");
+ }
+ }
+ }
+ /* check and copy ns for children recursively */
+ cur = node->children;
+ while (cur != NULL)
+ {
+ xml_checkandcopyns(root, xmlerrcxt, cur, lastns_before);
+ cur = cur->next;
+ }
+ }
+
/*
* Convert XML node to text (dump subtree in case of element,
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
+ xmlNsPtr lastns_before;
+ xmlNsPtr ns;
+ xmlNsPtr next;
buf = xmlBufferCreate();
+
PG_TRY();
{
+ lastns_before = cur->nsDef;
+ if (lastns_before != NULL)
+ {
+ while (lastns_before->next != NULL)
+ lastns_before = lastns_before->next;
+ }
+ xml_checkandcopyns(cur, xmlerrcxt, cur, lastns_before);
+
xmlNodeDump(buf, NULL, cur, 0, 1);
result = xmlBuffer_to_xmltype(buf);
+
+ /* delete and free new nses */
+ ns = lastns_before == NULL ? cur->nsDef : lastns_before->next;
+ while (ns != NULL)
+ {
+ next = ns->next;
+ xmlFree(ns);
+ ns = next;
+ }
+
+ if (lastns_before == NULL)
+ cur->nsDef = NULL;
+ else
+ lastns_before->next = NULL;
}
PG_CATCH();
{
+ /* new namespaces will be freed while free-ing the node, so we
+ * won't free it here
+ */
xmlBufferFree(buf);
PG_RE_THROW();
}
***************
*** 3660,3666 **** xml_xmlnodetoxmltype(xmlNodePtr cur)
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate)
{
int result = 0;
Datum datum;
--- 3746,3753 ----
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate,
! PgXmlErrorContext *xmlerrcxt)
{
int result = 0;
Datum datum;
***************
*** 3679,3685 **** xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
(void) accumArrayResult(astate, datum, false,
XMLOID, CurrentMemoryContext);
}
--- 3766,3773 ----
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
! xmlerrcxt));
(void) accumArrayResult(astate, datum, false,
XMLOID, CurrentMemoryContext);
}
***************
*** 3881,3889 **** xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate);
}
PG_CATCH();
{
--- 3969,3977 ----
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
}
PG_CATCH();
{
*** a/src/test/regress/expected/xml.out
--- b/src/test/regress/expected/xml.out
***************
*** 612,617 **** SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><loc
--- 612,632 ----
{1,2}
(1 row)
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ xpath
+ ------------------------------------------------------------------------------------------------------------------------------------------------
+ {"<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"}
+ (1 row)
+
+ SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ xpath
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------
+ {"<local:data xmlns:local=\"http://127.0.0.1\"> +
+ <local:piece id=\"1\">number one</local:piece> +
+ <local:piece id=\"2\"/> +
+ </local:data>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"}
+ (1 row)
+
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
xpath
-------------------------
*** a/src/test/regress/expected/xml_1.out
--- b/src/test/regress/expected/xml_1.out
***************
*** 514,519 **** LINE 1: SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="ht...
--- 514,531 ----
^
DETAIL: This functionality requires the server to be built with libxml support.
HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:*', '<local:data xmlns:local="http://127...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
ERROR: unsupported XML feature
LINE 1: SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'...
*** a/src/test/regress/sql/xml.sql
--- b/src/test/regress/sql/xml.sql
***************
*** 178,183 **** SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
--- 178,185 ----
SELECT xpath('', '<!-- error -->');
SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:*', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
SELECT xpath('//text()', '<root><</root>');
SELECT xpath('//@value', '<root value="<"/>');
*** a/src/backend/utils/adt/xml.c
--- b/src/backend/utils/adt/xml.c
***************
*** 141,149 **** static bool print_xml_decl(StringInfo buf, const xmlChar *version,
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
--- 141,151 ----
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
! static text *xml_xmlnodetoxmltype(xmlNodePtr cur,
! PgXmlErrorContext *xmlerrcxt);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate,
! PgXmlErrorContext *xmlerrcxt);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
***************
*** 3599,3624 **** SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename,
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
buf = xmlBufferCreate();
PG_TRY();
{
! xmlNodeDump(buf, NULL, cur, 0, 1);
result = xmlBuffer_to_xmltype(buf);
}
PG_CATCH();
{
xmlBufferFree(buf);
PG_RE_THROW();
}
PG_END_TRY();
xmlBufferFree(buf);
}
else
--- 3601,3640 ----
* return value otherwise)
*/
static text *
! xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
{
xmltype *result;
if (cur->type == XML_ELEMENT_NODE)
{
xmlBufferPtr buf;
+ xmlNodePtr cur_copy;
buf = xmlBufferCreate();
+
+ /* the result of xmlNodeDump won't contain namespace definitions
+ * from parent nodes, but xmlCopyNode duplicates a node along
+ * with its required namespace definitions.
+ */
+ cur_copy = xmlCopyNode(cur, 1);
+
+ if (cur_copy == NULL)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not serialize xml");
+
PG_TRY();
{
! xmlNodeDump(buf, NULL, cur_copy, 0, 1);
result = xmlBuffer_to_xmltype(buf);
}
PG_CATCH();
{
+ xmlFreeNode(cur_copy);
xmlBufferFree(buf);
PG_RE_THROW();
}
PG_END_TRY();
+ xmlFreeNode(cur_copy);
xmlBufferFree(buf);
}
else
***************
*** 3660,3666 **** xml_xmlnodetoxmltype(xmlNodePtr cur)
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate)
{
int result = 0;
Datum datum;
--- 3676,3683 ----
*/
static int
xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
! ArrayBuildState *astate,
! PgXmlErrorContext *xmlerrcxt)
{
int result = 0;
Datum datum;
***************
*** 3679,3685 **** xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
(void) accumArrayResult(astate, datum, false,
XMLOID, CurrentMemoryContext);
}
--- 3696,3703 ----
for (i = 0; i < result; i++)
{
! datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
! xmlerrcxt));
(void) accumArrayResult(astate, datum, false,
XMLOID, CurrentMemoryContext);
}
***************
*** 3881,3889 **** xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate);
}
PG_CATCH();
{
--- 3899,3907 ----
* Extract the results as requested.
*/
if (res_nitems != NULL)
! *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
else
! (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
}
PG_CATCH();
{
*** a/src/test/regress/expected/xml.out
--- b/src/test/regress/expected/xml.out
***************
*** 612,617 **** SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><loc
--- 612,623 ----
{1,2}
(1 row)
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ xpath
+ ------------------------------------------------------------------------------------------------------------------------------------------------
+ {"<local:piece xmlns:local=\"http://127.0.0.1\" id=\"1\">number one</local:piece>","<local:piece xmlns:local=\"http://127.0.0.1\" id=\"2\"/>"}
+ (1 row)
+
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
xpath
-------------------------
*** a/src/test/regress/expected/xml_1.out
--- b/src/test/regress/expected/xml_1.out
***************
*** 514,519 **** LINE 1: SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="ht...
--- 514,531 ----
^
DETAIL: This functionality requires the server to be built with libxml support.
HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1" xmlns="http://127.0.0.2"><local:piece id="1"><internal>number one</internal><internal2/></local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ ERROR: unsupported XML feature
+ LINE 1: SELECT xpath('//loc:piece', '<local:data xmlns:local="http:/...
+ ^
+ DETAIL: This functionality requires the server to be built with libxml support.
+ HINT: You need to rebuild PostgreSQL using --with-libxml.
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
ERROR: unsupported XML feature
LINE 1: SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'...
*** a/src/test/regress/sql/xml.sql
--- b/src/test/regress/sql/xml.sql
***************
*** 178,183 **** SELECT xpath(NULL, NULL) IS NULL FROM xmltest;
--- 178,185 ----
SELECT xpath('', '<!-- error -->');
SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
+ SELECT xpath('//loc:piece', '<local:data xmlns:local="http://127.0.0.1" xmlns="http://127.0.0.2"><local:piece id="1"><internal>number one</internal><internal2/></local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]);
SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>');
SELECT xpath('//text()', '<root><</root>');
SELECT xpath('//@value', '<root value="<"/>');
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers