and this time the patch is attached


Andrew Dunstan wrote:


Tom Lane wrote:
What I think we'd need to have a complete solution is

convert(text, name) returns bytea
    -- convert from DB encoding to arbitrary encoding

convert(bytea, name, name) returns bytea
    -- convert between any two encodings

convert(bytea, name) returns text
    -- convert from arbitrary encoding to DB encoding

The second and third would need to do a verify step before
converting, of course.



Here's a patch that implements the above. It actually does the verify step for all three cases - if that bothers people I can remove it at the cost of a little code complexity.

It also fixes the "convert ... using ..." case in a similar way (makes it return a bytea).

On reflection I think we also need to provide length(bytea, name) as has been suggested, so we can check the length in the foreign encoding of a bytea we have converted this way. That shouldn't be too difficult to add.

cheers

andrew

Index: src/backend/catalog/pg_conversion.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/catalog/pg_conversion.c,v
retrieving revision 1.36
diff -c -r1.36 pg_conversion.c
*** src/backend/catalog/pg_conversion.c	27 Feb 2007 23:48:07 -0000	1.36
--- src/backend/catalog/pg_conversion.c	16 Sep 2007 01:43:24 -0000
***************
*** 282,288 ****
   * CONVERT <left paren> <character value expression>
   * USING <form-of-use conversion name> <right paren>
   *
!  * TEXT convert_using(TEXT string, TEXT conversion_name)
   */
  Datum
  pg_convert_using(PG_FUNCTION_ARGS)
--- 282,291 ----
   * CONVERT <left paren> <character value expression>
   * USING <form-of-use conversion name> <right paren>
   *
!  * BYTEA convert_using(TEXT string, TEXT conversion_name)
!  *
!  * bytea is returned so we don't give a value that is
!  * not valid in the database encoding.
   */
  Datum
  pg_convert_using(PG_FUNCTION_ARGS)
***************
*** 344,348 ****
  	pfree(result);
  	pfree(str);
  
! 	PG_RETURN_TEXT_P(retval);
  }
--- 347,351 ----
  	pfree(result);
  	pfree(str);
  
! 	PG_RETURN_BYTEA_P(retval);
  }
Index: src/backend/utils/mb/mbutils.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/mb/mbutils.c,v
retrieving revision 1.63
diff -c -r1.63 mbutils.c
*** src/backend/utils/mb/mbutils.c	28 May 2007 16:43:24 -0000	1.63
--- src/backend/utils/mb/mbutils.c	16 Sep 2007 01:43:25 -0000
***************
*** 292,303 ****
  }
  
  /*
!  * Convert string using encoding_nanme. We assume that string's
!  * encoding is same as DB encoding.
   *
!  * TEXT convert(TEXT string, NAME encoding_name) */
  Datum
! pg_convert(PG_FUNCTION_ARGS)
  {
  	Datum		string = PG_GETARG_DATUM(0);
  	Datum		dest_encoding_name = PG_GETARG_DATUM(1);
--- 292,303 ----
  }
  
  /*
!  * Convert string using encoding_name. The source
!  * encoding is the DB encoding.
   *
!  * BYTEA convert(TEXT string, NAME encoding_name) */
  Datum
! pg_convert_from_db(PG_FUNCTION_ARGS)
  {
  	Datum		string = PG_GETARG_DATUM(0);
  	Datum		dest_encoding_name = PG_GETARG_DATUM(1);
***************
*** 306,312 ****
  	Datum		result;
  
  	result = DirectFunctionCall3(
! 				 pg_convert2, string, src_encoding_name, dest_encoding_name);
  
  	/* free memory allocated by namein */
  	pfree((void *) src_encoding_name);
--- 306,335 ----
  	Datum		result;
  
  	result = DirectFunctionCall3(
! 				 pg_convert, string, src_encoding_name, dest_encoding_name);
! 
! 	/* free memory allocated by namein */
! 	pfree((void *) src_encoding_name);
! 
! 	PG_RETURN_BYTEA_P(result);
! }
! 
! /*
!  * Convert string using encoding_name. The destination
!  * encoding is the DB encoding.
!  *
!  * TEXT convert(BYTEA string, NAME encoding_name) */
! Datum
! pg_convert_to_db(PG_FUNCTION_ARGS)
! {
! 	Datum		string = PG_GETARG_DATUM(0);
! 	Datum		src_encoding_name = PG_GETARG_DATUM(1);
! 	Datum		dest_encoding_name = DirectFunctionCall1(
! 							namein, CStringGetDatum(DatabaseEncoding->name));
! 	Datum		result;
! 
! 	result = DirectFunctionCall3(
! 				 pg_convert, string, src_encoding_name, dest_encoding_name);
  
  	/* free memory allocated by namein */
  	pfree((void *) src_encoding_name);
***************
*** 315,334 ****
  }
  
  /*
!  * Convert string using encoding_name.
   *
!  * TEXT convert2(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
   */
  Datum
! pg_convert2(PG_FUNCTION_ARGS)
  {
! 	text	   *string = PG_GETARG_TEXT_P(0);
  	char	   *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
  	int			src_encoding = pg_char_to_encoding(src_encoding_name);
  	char	   *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
  	int			dest_encoding = pg_char_to_encoding(dest_encoding_name);
  	unsigned char *result;
! 	text	   *retval;
  	unsigned char *str;
  	int			len;
  
--- 338,357 ----
  }
  
  /*
!  * Convert string using encoding_names.
   *
!  * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
   */
  Datum
! pg_convert(PG_FUNCTION_ARGS)
  {
! 	bytea	   *string = PG_GETARG_TEXT_P(0);
  	char	   *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
  	int			src_encoding = pg_char_to_encoding(src_encoding_name);
  	char	   *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
  	int			dest_encoding = pg_char_to_encoding(dest_encoding_name);
  	unsigned char *result;
! 	bytea	   *retval;
  	unsigned char *str;
  	int			len;
  
***************
*** 343,350 ****
  				 errmsg("invalid destination encoding name \"%s\"",
  						dest_encoding_name)));
  
! 	/* make sure that source string is null terminated */
  	len = VARSIZE(string) - VARHDRSZ;
  	str = palloc(len + 1);
  	memcpy(str, VARDATA(string), len);
  	*(str + len) = '\0';
--- 366,374 ----
  				 errmsg("invalid destination encoding name \"%s\"",
  						dest_encoding_name)));
  
! 	/* make sure that source string is valid and null terminated */
  	len = VARSIZE(string) - VARHDRSZ;
+ 	pg_verify_mbstr(src_encoding,VARDATA(string),len,false);
  	str = palloc(len + 1);
  	memcpy(str, VARDATA(string), len);
  	*(str + len) = '\0';
***************
*** 354,361 ****
  		elog(ERROR, "encoding conversion failed");
  
  	/*
! 	 * build text data type structure. we cannot use textin() here, since
! 	 * textin assumes that input string encoding is same as database encoding.
  	 */
  	len = strlen((char *) result) + VARHDRSZ;
  	retval = palloc(len);
--- 378,384 ----
  		elog(ERROR, "encoding conversion failed");
  
  	/*
! 	 * build bytea data type structure.
  	 */
  	len = strlen((char *) result) + VARHDRSZ;
  	retval = palloc(len);
***************
*** 369,375 ****
  	/* free memory if allocated by the toaster */
  	PG_FREE_IF_COPY(string, 0);
  
! 	PG_RETURN_TEXT_P(retval);
  }
  
  /*
--- 392,398 ----
  	/* free memory if allocated by the toaster */
  	PG_FREE_IF_COPY(string, 0);
  
! 	PG_RETURN_BYTEA_P(retval);
  }
  
  /*
Index: src/include/catalog/catversion.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/catversion.h,v
retrieving revision 1.423
diff -c -r1.423 catversion.h
*** src/include/catalog/catversion.h	5 Sep 2007 18:10:48 -0000	1.423
--- src/include/catalog/catversion.h	16 Sep 2007 01:43:25 -0000
***************
*** 53,58 ****
   */
  
  /*							yyyymmddN */
! #define CATALOG_VERSION_NO	200709042
  
  #endif
--- 53,58 ----
   */
  
  /*							yyyymmddN */
! #define CATALOG_VERSION_NO	200709151
  
  #endif
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.468
diff -c -r1.468 pg_proc.h
*** src/include/catalog/pg_proc.h	4 Sep 2007 16:41:42 -0000	1.468
--- src/include/catalog/pg_proc.h	16 Sep 2007 01:43:25 -0000
***************
*** 2232,2244 ****
  DATA(insert OID = 810 (  pg_client_encoding    PGNSP PGUID 12 1 0 f f t f s 0 19 "" _null_ _null_ _null_ pg_client_encoding - _null_ _null_ ));
  DESCR("encoding name of current database");
  
! DATA(insert OID = 1717 (  convert		   PGNSP PGUID 12 1 0 f f t f s 2 25 "25 19" _null_ _null_ _null_ pg_convert - _null_ _null_ ));
  DESCR("convert string with specified destination encoding name");
  
! DATA(insert OID = 1813 (  convert		   PGNSP PGUID 12 1 0 f f t f s 3 25 "25 19 19" _null_ _null_ _null_	pg_convert2 - _null_ _null_ ));
  DESCR("convert string with specified encoding names");
  
! DATA(insert OID = 1619 (  convert_using    PGNSP PGUID 12 1 0 f f t f s 2 25 "25 25" _null_ _null_ _null_  pg_convert_using - _null_ _null_ ));
  DESCR("convert string with specified conversion name");
  
  DATA(insert OID = 1264 (  pg_char_to_encoding	   PGNSP PGUID 12 1 0 f f t f s 1 23 "19" _null_ _null_ _null_	PG_char_to_encoding - _null_ _null_ ));
--- 2232,2247 ----
  DATA(insert OID = 810 (  pg_client_encoding    PGNSP PGUID 12 1 0 f f t f s 0 19 "" _null_ _null_ _null_ pg_client_encoding - _null_ _null_ ));
  DESCR("encoding name of current database");
  
! DATA(insert OID = 1717 (  convert		   PGNSP PGUID 12 1 0 f f t f s 2 17 "25 19" _null_ _null_ _null_ pg_convert_from_db - _null_ _null_ ));
  DESCR("convert string with specified destination encoding name");
  
! DATA(insert OID = 1713 (  convert		   PGNSP PGUID 12 1 0 f f t f s 2 25 "17 19" _null_ _null_ _null_ pg_convert_to_db - _null_ _null_ ));
! DESCR("convert string with specified source encoding name");
! 
! DATA(insert OID = 1813 (  convert		   PGNSP PGUID 12 1 0 f f t f s 3 17 "17 19 19" _null_ _null_ _null_ pg_convert - _null_ _null_ ));
  DESCR("convert string with specified encoding names");
  
! DATA(insert OID = 1619 (  convert_using    PGNSP PGUID 12 1 0 f f t f s 2 17 "25 25" _null_ _null_ _null_ pg_convert_using - _null_ _null_ ));
  DESCR("convert string with specified conversion name");
  
  DATA(insert OID = 1264 (  pg_char_to_encoding	   PGNSP PGUID 12 1 0 f f t f s 1 23 "19" _null_ _null_ _null_	PG_char_to_encoding - _null_ _null_ ));
Index: src/include/utils/builtins.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.302
diff -c -r1.302 builtins.h
*** src/include/utils/builtins.h	4 Sep 2007 16:41:43 -0000	1.302
--- src/include/utils/builtins.h	16 Sep 2007 01:43:26 -0000
***************
*** 902,908 ****
  extern Datum PG_character_set_name(PG_FUNCTION_ARGS);
  extern Datum PG_character_set_id(PG_FUNCTION_ARGS);
  extern Datum pg_convert(PG_FUNCTION_ARGS);
! extern Datum pg_convert2(PG_FUNCTION_ARGS);
  
  /* format_type.c */
  extern Datum format_type(PG_FUNCTION_ARGS);
--- 902,909 ----
  extern Datum PG_character_set_name(PG_FUNCTION_ARGS);
  extern Datum PG_character_set_id(PG_FUNCTION_ARGS);
  extern Datum pg_convert(PG_FUNCTION_ARGS);
! extern Datum pg_convert_to_db(PG_FUNCTION_ARGS);
! extern Datum pg_convert_from_db(PG_FUNCTION_ARGS);
  
  /* format_type.c */
  extern Datum format_type(PG_FUNCTION_ARGS);
---------------------------(end of broadcast)---------------------------
TIP 7: You can help support the PostgreSQL project by donating at

                http://www.postgresql.org/about/donate

Reply via email to