On Tue, Oct 17, 2017 at 06:06:40AM +0200, Pavel Stehule wrote:
> Please, if you can, try it write. I am little bit lost :)

I'm attaching the patch I desired.  Please review.  This will probably miss
this week's minor releases.  If there's significant support, I could instead
push before the wrap.
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 24229c2..3e3aed8 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -3845,6 +3845,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, 
ArrayType *namespaces,
        int32           xpath_len;
        xmlChar    *string;
        xmlChar    *xpath_expr;
+       size_t          xmldecl_len = 0;
        int                     i;
        int                     ndim;
        Datum      *ns_names_uris;
@@ -3900,6 +3901,16 @@ xpath_internal(text *xpath_expr_text, xmltype *data, 
ArrayType *namespaces,
        string = pg_xmlCharStrndup(datastr, len);
        xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
 
+       /*
+        * In a UTF8 database, skip any xml declaration, which might assert
+        * another encoding.  Ignore parse_xml_decl() failure, letting
+        * xmlCtxtReadMemory() report parse errors.  Documentation disclaims
+        * xpath() support for non-ASCII data in non-UTF8 databases, so leave
+        * those scenarios bug-compatible with historical behavior.
+        */
+       if (GetDatabaseEncoding() == PG_UTF8)
+               parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
+
        xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
        PG_TRY();
@@ -3914,7 +3925,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, 
ArrayType *namespaces,
                if (ctxt == NULL || xmlerrcxt->err_occurred)
                        xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
                                                "could not allocate parser 
context");
-               doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 
0);
+               doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
+                                                               len - 
xmldecl_len, NULL, NULL, 0);
                if (doc == NULL || xmlerrcxt->err_occurred)
                        xml_ereport(xmlerrcxt, ERROR, 
ERRCODE_INVALID_XML_DOCUMENT,
                                                "could not parse XML document");
diff --git a/src/test/regress/expected/xml.out 
b/src/test/regress/expected/xml.out
index bcc585d..f7a8c38 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -670,6 +670,37 @@ SELECT xpath('/nosuchtag', '<root/>');
  {}
 (1 row)
 
+-- Round-trip non-ASCII data through xpath().
+DO $$
+DECLARE
+  xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
+  degree_symbol text;
+  res xml[];
+BEGIN
+  -- Per the documentation, xpath() doesn't work on non-ASCII data when
+  -- the server encoding is not UTF8.  The EXCEPTION block below,
+  -- currently dead code, will be relevant if we remove this limitation.
+  IF current_setting('server_encoding') <> 'UTF8' THEN
+    RAISE LOG 'skip: encoding % unsupported for xml',
+      current_setting('server_encoding');
+    RETURN;
+  END IF;
+
+  degree_symbol := convert_from('\xc2b0', 'UTF8');
+  res := xpath('text()', (xml_declaration ||
+    '<x>' || degree_symbol || '</x>')::xml);
+  IF degree_symbol <> res[1]::text THEN
+    RAISE 'expected % (%), got % (%)',
+      degree_symbol, convert_to(degree_symbol, 'UTF8'),
+      res[1], convert_to(res[1]::text, 'UTF8');
+  END IF;
+EXCEPTION
+  -- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no 
equivalent in encoding "LATIN8"
+  WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
+  -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does 
not exist
+  WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
+END
+$$;
 -- Test xmlexists and xpath_exists
 SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF 
'<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
  xmlexists 
diff --git a/src/test/regress/expected/xml_1.out 
b/src/test/regress/expected/xml_1.out
index d3bd8c9..1a9efa2 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -576,6 +576,41 @@ LINE 1: SELECT xpath('/nosuchtag', '<root/>');
                                    ^
 DETAIL:  This functionality requires the server to be built with libxml 
support.
 HINT:  You need to rebuild PostgreSQL using --with-libxml.
+-- Round-trip non-ASCII data through xpath().
+DO $$
+DECLARE
+  xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
+  degree_symbol text;
+  res xml[];
+BEGIN
+  -- Per the documentation, xpath() doesn't work on non-ASCII data when
+  -- the server encoding is not UTF8.  The EXCEPTION block below,
+  -- currently dead code, will be relevant if we remove this limitation.
+  IF current_setting('server_encoding') <> 'UTF8' THEN
+    RAISE LOG 'skip: encoding % unsupported for xml',
+      current_setting('server_encoding');
+    RETURN;
+  END IF;
+
+  degree_symbol := convert_from('\xc2b0', 'UTF8');
+  res := xpath('text()', (xml_declaration ||
+    '<x>' || degree_symbol || '</x>')::xml);
+  IF degree_symbol <> res[1]::text THEN
+    RAISE 'expected % (%), got % (%)',
+      degree_symbol, convert_to(degree_symbol, 'UTF8'),
+      res[1], convert_to(res[1]::text, 'UTF8');
+  END IF;
+EXCEPTION
+  -- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no 
equivalent in encoding "LATIN8"
+  WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
+  -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does 
not exist
+  WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
+END
+$$;
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml 
support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+CONTEXT:  PL/pgSQL function inline_code_block line 17 at assignment
 -- Test xmlexists and xpath_exists
 SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF 
'<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 ERROR:  unsupported XML feature
diff --git a/src/test/regress/expected/xml_2.out 
b/src/test/regress/expected/xml_2.out
index ff77132..3868da1 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -650,6 +650,37 @@ SELECT xpath('/nosuchtag', '<root/>');
  {}
 (1 row)
 
+-- Round-trip non-ASCII data through xpath().
+DO $$
+DECLARE
+  xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
+  degree_symbol text;
+  res xml[];
+BEGIN
+  -- Per the documentation, xpath() doesn't work on non-ASCII data when
+  -- the server encoding is not UTF8.  The EXCEPTION block below,
+  -- currently dead code, will be relevant if we remove this limitation.
+  IF current_setting('server_encoding') <> 'UTF8' THEN
+    RAISE LOG 'skip: encoding % unsupported for xml',
+      current_setting('server_encoding');
+    RETURN;
+  END IF;
+
+  degree_symbol := convert_from('\xc2b0', 'UTF8');
+  res := xpath('text()', (xml_declaration ||
+    '<x>' || degree_symbol || '</x>')::xml);
+  IF degree_symbol <> res[1]::text THEN
+    RAISE 'expected % (%), got % (%)',
+      degree_symbol, convert_to(degree_symbol, 'UTF8'),
+      res[1], convert_to(res[1]::text, 'UTF8');
+  END IF;
+EXCEPTION
+  -- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no 
equivalent in encoding "LATIN8"
+  WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
+  -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does 
not exist
+  WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
+END
+$$;
 -- Test xmlexists and xpath_exists
 SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF 
'<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
  xmlexists 
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index eb4687f..fdb51c9 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -189,6 +189,38 @@ SELECT xpath('count(//*)=3', '<root><sub/><sub/></root>');
 SELECT xpath('name(/*)', '<root><sub/><sub/></root>');
 SELECT xpath('/nosuchtag', '<root/>');
 
+-- Round-trip non-ASCII data through xpath().
+DO $$
+DECLARE
+  xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
+  degree_symbol text;
+  res xml[];
+BEGIN
+  -- Per the documentation, xpath() doesn't work on non-ASCII data when
+  -- the server encoding is not UTF8.  The EXCEPTION block below,
+  -- currently dead code, will be relevant if we remove this limitation.
+  IF current_setting('server_encoding') <> 'UTF8' THEN
+    RAISE LOG 'skip: encoding % unsupported for xml',
+      current_setting('server_encoding');
+    RETURN;
+  END IF;
+
+  degree_symbol := convert_from('\xc2b0', 'UTF8');
+  res := xpath('text()', (xml_declaration ||
+    '<x>' || degree_symbol || '</x>')::xml);
+  IF degree_symbol <> res[1]::text THEN
+    RAISE 'expected % (%), got % (%)',
+      degree_symbol, convert_to(degree_symbol, 'UTF8'),
+      res[1], convert_to(res[1]::text, 'UTF8');
+  END IF;
+EXCEPTION
+  -- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no 
equivalent in encoding "LATIN8"
+  WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
+  -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does 
not exist
+  WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
+END
+$$;
+
 -- Test xmlexists and xpath_exists
 SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF 
'<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 SELECT xmlexists('//town[text() = ''Cwmbran'']' PASSING BY REF 
'<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to