and this time the patch is attached
Andrew Dunstan wrote:
Tom Lane wrote:
What I think we'd need to have a complete solution is
convert(text, name) returns bytea
-- convert from DB encoding to arbitrary encoding
convert(bytea, name, name) returns bytea
-- convert between any two encodings
convert(bytea, name) returns text
-- convert from arbitrary encoding to DB encoding
The second and third would need to do a verify step before
converting, of course.
Here's a patch that implements the above. It actually does the verify
step for all three cases - if that bothers people I can remove it at
the cost of a little code complexity.
It also fixes the "convert ... using ..." case in a similar way (makes
it return a bytea).
On reflection I think we also need to provide length(bytea, name) as
has been suggested, so we can check the length in the foreign encoding
of a bytea we have converted this way. That shouldn't be too difficult
to add.
cheers
andrew
Index: src/backend/catalog/pg_conversion.c
===
RCS file: /cvsroot/pgsql/src/backend/catalog/pg_conversion.c,v
retrieving revision 1.36
diff -c -r1.36 pg_conversion.c
*** src/backend/catalog/pg_conversion.c 27 Feb 2007 23:48:07 - 1.36
--- src/backend/catalog/pg_conversion.c 16 Sep 2007 01:43:24 -
***
*** 282,288
* CONVERT
* USING
*
! * TEXT convert_using(TEXT string, TEXT conversion_name)
*/
Datum
pg_convert_using(PG_FUNCTION_ARGS)
--- 282,291
* CONVERT
* USING
*
! * BYTEA convert_using(TEXT string, TEXT conversion_name)
! *
! * bytea is returned so we don't give a value that is
! * not valid in the database encoding.
*/
Datum
pg_convert_using(PG_FUNCTION_ARGS)
***
*** 344,348
pfree(result);
pfree(str);
! PG_RETURN_TEXT_P(retval);
}
--- 347,351
pfree(result);
pfree(str);
! PG_RETURN_BYTEA_P(retval);
}
Index: src/backend/utils/mb/mbutils.c
===
RCS file: /cvsroot/pgsql/src/backend/utils/mb/mbutils.c,v
retrieving revision 1.63
diff -c -r1.63 mbutils.c
*** src/backend/utils/mb/mbutils.c 28 May 2007 16:43:24 - 1.63
--- src/backend/utils/mb/mbutils.c 16 Sep 2007 01:43:25 -
***
*** 292,303
}
/*
! * Convert string using encoding_nanme. We assume that string's
! * encoding is same as DB encoding.
*
! * TEXT convert(TEXT string, NAME encoding_name) */
Datum
! pg_convert(PG_FUNCTION_ARGS)
{
Datum string = PG_GETARG_DATUM(0);
Datum dest_encoding_name = PG_GETARG_DATUM(1);
--- 292,303
}
/*
! * Convert string using encoding_name. The source
! * encoding is the DB encoding.
*
! * BYTEA convert(TEXT string, NAME encoding_name) */
Datum
! pg_convert_from_db(PG_FUNCTION_ARGS)
{
Datum string = PG_GETARG_DATUM(0);
Datum dest_encoding_name = PG_GETARG_DATUM(1);
***
*** 306,312
Datum result;
result = DirectFunctionCall3(
! pg_convert2, string, src_encoding_name, dest_encoding_name);
/* free memory allocated by namein */
pfree((void *) src_encoding_name);
--- 306,335
Datum result;
result = DirectFunctionCall3(
! pg_convert, string, src_encoding_name, dest_encoding_name);
!
! /* free memory allocated by namein */
! pfree((void *) src_encoding_name);
!
! PG_RETURN_BYTEA_P(result);
! }
!
! /*
! * Convert string using encoding_name. The destination
! * encoding is the DB encoding.
! *
! * TEXT convert(BYTEA string, NAME encoding_name) */
! Datum
! pg_convert_to_db(PG_FUNCTION_ARGS)
! {
! Datum string = PG_GETARG_DATUM(0);
! Datum src_encoding_name = PG_GETARG_DATUM(1);
! Datum dest_encoding_name = DirectFunctionCall1(
! namein, CStringGetDatum(DatabaseEncoding->name));
! Datum result;
!
! result = DirectFunctionCall3(
! pg_convert, string, src_encoding_name, dest_encoding_name);
/* free memory allocated by namein */
pfree((void *) src_encoding_name);
***
*** 315,334
}
/*
! * Convert string using encoding_name.
*
! * TEXT convert2(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
*/
Datum
! pg_convert2(PG_FUNCTION_ARGS)
{
! text *string = PG_GETARG_TEXT_P(0);
char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
int src_encoding = pg_char_to_encoding(src_encoding_name);
char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
int dest_encoding = pg_char_to_encoding(dest_encoding_name);
unsigned char *result;
! text *retval;
unsigned char *str;
int len;
--- 338,357
}
/*
! * Convert string using encoding_names.
*
! * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
*/
Datum
! pg_convert(PG_FUNCTION_ARGS)
{
! bytea *string = PG_GETARG_TEXT_P(0);
char *src_encoding_name = NameStr(