Heikki Linnakangas wrote: > Heikki Linnakangas wrote: >> BTW, the encoding of the XML datatype looks pretty funky. xml_recv first >> reads the xml string with pq_getmsgtext, which applies a client->server >> conversion. Then the xml declaration is parsed, extracting the encoding >> attribute. Then the string is converted again from that encoding (or >> UTF-8 if none was specified) to server encoding. I don't understand how >> it's supposed to work, but ISTM there's one conversion too much, > > And it's got an unfortunate typo in it as well: it calls "free(result)" > instead of pfree. I think we need regression tests for the more complex > send/recv functions...
According to the docs, xml_send is supposed to output the XML document in client encoding, with that encoding specified in the xml declaration, eg. ?<xml encoding="LATIN1"?>. xml_recv is supposed to ignore client encoding, and parse the document according to the encoding specified in the declaration. Here's a patch that fixes the send/recv functions to work like the manual says. It fixes the free/pfree typo as well. Included is a new regression test for the send/recv functions as well (unpatched CVS HEAD fails it BTW). It can be merged with the existing xml test, but I didn't want to do it in the patch because it would've involved moving the current xml test from sql/ to input/, and made the patch longer to review. -- Heikki Linnakangas EnterpriseDB http://www.enterprisedb.com
Index: src/backend/utils/adt/xml.c =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/utils/adt/xml.c,v retrieving revision 1.46 diff -c -r1.46 xml.c *** src/backend/utils/adt/xml.c 13 Jul 2007 03:43:23 -0000 1.46 --- src/backend/utils/adt/xml.c 14 Sep 2007 11:23:48 -0000 *************** *** 232,238 **** xmlDocPtr doc; xmlChar *encoding = NULL; ! str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); result = palloc(nbytes + VARHDRSZ); SET_VARSIZE(result, nbytes + VARHDRSZ); --- 232,244 ---- xmlDocPtr doc; xmlChar *encoding = NULL; ! /* ! * Read the data in raw format. We don't know yet what the encoding ! * is, that information is embedded in the xml declaration, so we ! * have to parse that before converting to server encoding. ! */ ! nbytes = buf->len - buf->cursor; ! str = (char *) pq_getmsgbytes(buf, nbytes); result = palloc(nbytes + VARHDRSZ); SET_VARSIZE(result, nbytes + VARHDRSZ); *************** *** 247,262 **** doc = xml_parse(result, xmloption, true, encoding); xmlFreeDoc(doc); newstr = (char *) pg_do_encoding_conversion((unsigned char *) str, nbytes, encoding ? pg_char_to_encoding((char *) encoding) : PG_UTF8, GetDatabaseEncoding()); - pfree(str); - if (newstr != str) { ! free(result); nbytes = strlen(newstr); --- 253,267 ---- doc = xml_parse(result, xmloption, true, encoding); xmlFreeDoc(doc); + /* Now that we know what we're dealing with, convert to server encoding */ newstr = (char *) pg_do_encoding_conversion((unsigned char *) str, nbytes, encoding ? pg_char_to_encoding((char *) encoding) : PG_UTF8, GetDatabaseEncoding()); if (newstr != str) { ! pfree(result); nbytes = strlen(newstr); *************** *** 277,287 **** xml_send(PG_FUNCTION_ARGS) { xmltype *x = PG_GETARG_XML_P(0); ! char *outval = xml_out_internal(x, pg_get_client_encoding()); StringInfoData buf; pq_begintypsend(&buf); ! pq_sendstring(&buf, outval); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } --- 282,299 ---- xml_send(PG_FUNCTION_ARGS) { xmltype *x = PG_GETARG_XML_P(0); ! char *outval; StringInfoData buf; + + /* + * xml_out_internal doesn't convert the encoding, it just prints + * the right declaration. pq_sendtext will do the conversion. + */ + outval = xml_out_internal(x, pg_get_client_encoding()); pq_begintypsend(&buf); ! pq_sendtext(&buf, outval, strlen(outval)); ! pfree(outval); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } Index: src/test/regress/parallel_schedule =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/test/regress/parallel_schedule,v retrieving revision 1.44 diff -c -r1.44 parallel_schedule *** src/test/regress/parallel_schedule 11 Sep 2007 11:54:42 -0000 1.44 --- src/test/regress/parallel_schedule 14 Sep 2007 11:29:10 -0000 *************** *** 85,90 **** --- 85,93 ---- # "plpgsql" cannot run concurrently with "rules", nor can "plancache" test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid conversion truncate alter_table sequence polymorphism rowtypes returning largeobject xml + # "xmlio" needs to run after xml + test: xmlio + # run stats by itself because its delay may be insufficient under heavy load test: stats Index: src/test/regress/serial_schedule =================================================================== RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/test/regress/serial_schedule,v retrieving revision 1.41 diff -c -r1.41 serial_schedule *** src/test/regress/serial_schedule 11 Sep 2007 11:54:42 -0000 1.41 --- src/test/regress/serial_schedule 14 Sep 2007 11:28:19 -0000 *************** *** 111,115 **** --- 111,116 ---- test: returning test: largeobject test: xml + test: xmlio test: stats test: tablespace Index: src/test/regress/expected/xmlio.out =================================================================== RCS file: src/test/regress/expected/xmlio.out diff -N src/test/regress/expected/xmlio.out *** /dev/null 1 Jan 1970 00:00:00 -0000 --- src/test/regress/expected/xmlio.out 14 Sep 2007 11:36:47 -0000 *************** *** 0 **** --- 1,22 ---- + -- Test binary I/O functions of XML datatype + -- + -- This needs to be run after 'xml' test, because this depends + -- on the 'xmltest' table it creates + SELECT * FROM xmltest; + id | data + ----+-------------------- + 1 | <value>one</value> + 2 | <value>two</value> + (2 rows) + + -- Basic test + COPY xmltest TO '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY; + TRUNCATE xmltest; + COPY xmltest FROM '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY; + SELECT * FROM xmltest; + id | data + ----+-------------------- + 1 | <value>one</value> + 2 | <value>two</value> + (2 rows) + Index: src/test/regress/expected/xmlio_1.out =================================================================== RCS file: src/test/regress/expected/xmlio_1.out diff -N src/test/regress/expected/xmlio_1.out *** /dev/null 1 Jan 1970 00:00:00 -0000 --- src/test/regress/expected/xmlio_1.out 14 Sep 2007 11:44:13 -0000 *************** *** 0 **** --- 1,18 ---- + -- Test binary I/O functions of XML datatype + -- + -- This needs to be run after 'xml' test, because this depends + -- on the 'xmltest' table it creates + SELECT * FROM xmltest; + id | data + ----+------ + (0 rows) + + -- Basic test + COPY xmltest TO '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY; + TRUNCATE xmltest; + COPY xmltest FROM '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY; + SELECT * FROM xmltest; + id | data + ----+------ + (0 rows) + Index: src/test/regress/input/xmlio.source =================================================================== RCS file: src/test/regress/input/xmlio.source diff -N src/test/regress/input/xmlio.source *** /dev/null 1 Jan 1970 00:00:00 -0000 --- src/test/regress/input/xmlio.source 14 Sep 2007 11:34:37 -0000 *************** *** 0 **** --- 1,12 ---- + -- Test binary I/O functions of XML datatype + -- + -- This needs to be run after 'xml' test, because this depends + -- on the 'xmltest' table it creates + + SELECT * FROM xmltest; + + -- Basic test + COPY xmltest TO '@abs_builddir@/results/xmltest.data' WITH BINARY; + TRUNCATE xmltest; + COPY xmltest FROM '@abs_builddir@/results/xmltest.data' WITH BINARY; + SELECT * FROM xmltest;
---------------------------(end of broadcast)--------------------------- TIP 3: Have you checked our extensive FAQ? http://www.postgresql.org/docs/faq