Heikki Linnakangas wrote:
> Heikki Linnakangas wrote:
>> BTW, the encoding of the XML datatype looks pretty funky. xml_recv first
>> reads the xml string with pq_getmsgtext, which applies a client->server
>> conversion. Then the xml declaration is parsed, extracting the encoding
>> attribute. Then the string is converted again from that encoding (or
>> UTF-8 if none was specified) to server encoding. I don't understand how
>> it's supposed to work, but ISTM there's one conversion too much,
> 
> And it's got an unfortunate typo in it as well: it calls "free(result)"
> instead of pfree. I think we need regression tests for the more complex
> send/recv functions...

According to the docs, xml_send is supposed to output the XML document
in client encoding, with that encoding specified in the xml declaration,
eg. ?<xml encoding="LATIN1"?>. xml_recv is supposed to ignore client
encoding, and parse the document according to the encoding specified in
the declaration.

Here's a patch that fixes the send/recv functions to work like the
manual says. It fixes the free/pfree typo as well.

Included is a new regression test for the send/recv functions as well
(unpatched CVS HEAD fails it BTW). It can be merged with the existing
xml test, but I didn't want to do it in the patch because it would've
involved moving the current xml test from sql/ to input/, and made the
patch longer to review.

-- 
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
Index: src/backend/utils/adt/xml.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/utils/adt/xml.c,v
retrieving revision 1.46
diff -c -r1.46 xml.c
*** src/backend/utils/adt/xml.c	13 Jul 2007 03:43:23 -0000	1.46
--- src/backend/utils/adt/xml.c	14 Sep 2007 11:23:48 -0000
***************
*** 232,238 ****
  	xmlDocPtr	doc;
  	xmlChar	   *encoding = NULL;
  
! 	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
  
  	result = palloc(nbytes + VARHDRSZ);
  	SET_VARSIZE(result, nbytes + VARHDRSZ);
--- 232,244 ----
  	xmlDocPtr	doc;
  	xmlChar	   *encoding = NULL;
  
! 	/*
! 	 * Read the data in raw format. We don't know yet what the encoding
! 	 * is, that information is embedded in the xml declaration, so we
! 	 * have to parse that before converting to server encoding.
! 	 */
! 	nbytes = buf->len - buf->cursor;
! 	str = (char *) pq_getmsgbytes(buf, nbytes);
  
  	result = palloc(nbytes + VARHDRSZ);
  	SET_VARSIZE(result, nbytes + VARHDRSZ);
***************
*** 247,262 ****
  	doc = xml_parse(result, xmloption, true, encoding);
  	xmlFreeDoc(doc);
  
  	newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
  												nbytes,
  												encoding ? pg_char_to_encoding((char *) encoding) : PG_UTF8,
  												GetDatabaseEncoding());
  
- 	pfree(str);
- 
  	if (newstr != str)
  	{
! 		free(result);
  
  		nbytes = strlen(newstr);
  
--- 253,267 ----
  	doc = xml_parse(result, xmloption, true, encoding);
  	xmlFreeDoc(doc);
  
+ 	/* Now that we know what we're dealing with, convert to server encoding */
  	newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
  												nbytes,
  												encoding ? pg_char_to_encoding((char *) encoding) : PG_UTF8,
  												GetDatabaseEncoding());
  
  	if (newstr != str)
  	{
! 		pfree(result);
  
  		nbytes = strlen(newstr);
  
***************
*** 277,287 ****
  xml_send(PG_FUNCTION_ARGS)
  {
  	xmltype	   *x = PG_GETARG_XML_P(0);
! 	char	   *outval = xml_out_internal(x, pg_get_client_encoding());
  	StringInfoData buf;
  
  	pq_begintypsend(&buf);
! 	pq_sendstring(&buf, outval);
  	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
  }
  
--- 282,299 ----
  xml_send(PG_FUNCTION_ARGS)
  {
  	xmltype	   *x = PG_GETARG_XML_P(0);
! 	char	   *outval;
  	StringInfoData buf;
+ 	
+ 	/*
+ 	 * xml_out_internal doesn't convert the encoding, it just prints
+ 	 * the right declaration. pq_sendtext will do the conversion.
+ 	 */
+ 	outval = xml_out_internal(x, pg_get_client_encoding());
  
  	pq_begintypsend(&buf);
! 	pq_sendtext(&buf, outval, strlen(outval));
! 	pfree(outval);
  	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
  }
  
Index: src/test/regress/parallel_schedule
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/test/regress/parallel_schedule,v
retrieving revision 1.44
diff -c -r1.44 parallel_schedule
*** src/test/regress/parallel_schedule	11 Sep 2007 11:54:42 -0000	1.44
--- src/test/regress/parallel_schedule	14 Sep 2007 11:29:10 -0000
***************
*** 85,90 ****
--- 85,93 ----
  # "plpgsql" cannot run concurrently with "rules", nor can "plancache"
  test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid conversion truncate alter_table sequence polymorphism rowtypes returning largeobject xml
  
+ # "xmlio" needs to run after xml
+ test: xmlio
+ 
  # run stats by itself because its delay may be insufficient under heavy load
  test: stats
  
Index: src/test/regress/serial_schedule
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/test/regress/serial_schedule,v
retrieving revision 1.41
diff -c -r1.41 serial_schedule
*** src/test/regress/serial_schedule	11 Sep 2007 11:54:42 -0000	1.41
--- src/test/regress/serial_schedule	14 Sep 2007 11:28:19 -0000
***************
*** 111,115 ****
--- 111,116 ----
  test: returning
  test: largeobject
  test: xml
+ test: xmlio
  test: stats
  test: tablespace
Index: src/test/regress/expected/xmlio.out
===================================================================
RCS file: src/test/regress/expected/xmlio.out
diff -N src/test/regress/expected/xmlio.out
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- src/test/regress/expected/xmlio.out	14 Sep 2007 11:36:47 -0000
***************
*** 0 ****
--- 1,22 ----
+ -- Test binary I/O functions of XML datatype
+ --
+ -- This needs to be run after 'xml' test, because this depends
+ -- on the 'xmltest' table it creates
+ SELECT * FROM xmltest;
+  id |        data        
+ ----+--------------------
+   1 | <value>one</value>
+   2 | <value>two</value>
+ (2 rows)
+ 
+ -- Basic test
+ COPY xmltest TO '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY;
+ TRUNCATE xmltest;
+ COPY xmltest FROM '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY;
+ SELECT * FROM xmltest;
+  id |        data        
+ ----+--------------------
+   1 | <value>one</value>
+   2 | <value>two</value>
+ (2 rows)
+ 
Index: src/test/regress/expected/xmlio_1.out
===================================================================
RCS file: src/test/regress/expected/xmlio_1.out
diff -N src/test/regress/expected/xmlio_1.out
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- src/test/regress/expected/xmlio_1.out	14 Sep 2007 11:44:13 -0000
***************
*** 0 ****
--- 1,18 ----
+ -- Test binary I/O functions of XML datatype
+ --
+ -- This needs to be run after 'xml' test, because this depends
+ -- on the 'xmltest' table it creates
+ SELECT * FROM xmltest;
+  id | data 
+ ----+------
+ (0 rows)
+ 
+ -- Basic test
+ COPY xmltest TO '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY;
+ TRUNCATE xmltest;
+ COPY xmltest FROM '/home/hlinnaka/pg_sandbox/pgsql.cvshead/src/test/regress/results/xmltest.data' WITH BINARY;
+ SELECT * FROM xmltest;
+  id | data 
+ ----+------
+ (0 rows)
+ 
Index: src/test/regress/input/xmlio.source
===================================================================
RCS file: src/test/regress/input/xmlio.source
diff -N src/test/regress/input/xmlio.source
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- src/test/regress/input/xmlio.source	14 Sep 2007 11:34:37 -0000
***************
*** 0 ****
--- 1,12 ----
+ -- Test binary I/O functions of XML datatype
+ --
+ -- This needs to be run after 'xml' test, because this depends
+ -- on the 'xmltest' table it creates
+ 
+ SELECT * FROM xmltest;
+ 
+ -- Basic test
+ COPY xmltest TO '@abs_builddir@/results/xmltest.data' WITH BINARY;
+ TRUNCATE xmltest;
+ COPY xmltest FROM '@abs_builddir@/results/xmltest.data' WITH BINARY;
+ SELECT * FROM xmltest;
---------------------------(end of broadcast)---------------------------
TIP 3: Have you checked our extensive FAQ?

               http://www.postgresql.org/docs/faq

Reply via email to