Today someone posted (or tried to post, I didn't get the attachment) an implementation of strxfrm using setlocale again. I think this is the second or third time someone has tried their hand at this. Clearly there's a demand for it and I fear some of the users trying to do this aren't aware of all the problems that were identified a couple years ago on this list.
I think the best implementation so far is Joe Conway's that used sigsetjmp/siglongjmp to catch errors safely. I would strongly suggest putting this implementation in contrib to save all those people from repeating the same mistakes I made and others have made. Especially since the consequences of those mistakes include potentially corrupting the database. I would suggest it be put in the core system except that as has been pointed out the performance isn't satisfactory on every platform. The only platform we have confirmation that the performance is reasonable is glibc-based systems.
/* * Joe Conway <[EMAIL PROTECTED]> * * Copyright (c) 2004, Joseph E. Conway * ALL RIGHTS RESERVED * * Permission to use, copy, modify, and distribute this software and its * documentation for any purpose, without fee, and without a written agreement * is hereby granted, provided that the above copyright notice and this * paragraph and the following two paragraphs appear in all copies. * * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS * DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS * ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. * */ /* * * If your libc strxfrm() overflows the buffer provided ignoring the length * argument then add this define. It causes this function to do an extra * strxfrm() call on every execution to get the actual size of buffer needed. * * #define DONT_TRUST_STRXFRM */ /* * * CREATE OR REPLACE function pg_strxfrm(text,text) RETURNS bytea * AS 'strxfrm2.so', 'pg_strxfrm' LANGUAGE c IMMUTABLE STRICT; * * Usage: pg_strxfrm(string, locale) */ #include <setjmp.h> #include <string.h> #include "postgres.h" #include "fmgr.h" #include "tcop/tcopprot.h" #include "utils/builtins.h" #define GET_STR(textp) \ DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp))) #define GET_BYTEA(str_) \ DatumGetTextP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) #define MAX_BYTEA_LEN 0x3fffffff /* * pg_strxfrm - Function to convert string similar to the strxfrm C * function using a specified locale. */ extern Datum pg_strxfrm(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(pg_strxfrm); Datum pg_strxfrm(PG_FUNCTION_ARGS) { char *str = GET_STR(PG_GETARG_TEXT_P(0)); char *localestr = GET_STR(PG_GETARG_TEXT_P(1)); size_t approx_trans_len; char *trans; size_t actual_trans_len; char *oldlocale; char *newlocale; sigjmp_buf save_restart; #ifndef DONT_TRUST_STRXFRM size_t str_len = strlen(str); static unsigned guess = 1; approx_trans_len = guess + guess * str_len + 1; if (approx_trans_len > MAX_BYTEA_LEN) elog(ERROR, "source string too long to transform"); trans = (char *) palloc(approx_trans_len); #else approx_trans_len = 0; trans = NULL; #endif oldlocale = setlocale(LC_COLLATE, NULL); if (!oldlocale) elog(ERROR, "setlocale failed to return a locale"); oldlocale = pstrdup(oldlocale); /* catch elog while locale is set other than the default */ memcpy(&save_restart, &Warn_restart, sizeof(save_restart)); if (sigsetjmp(Warn_restart, 1) != 0) { memcpy(&Warn_restart, &save_restart, sizeof(Warn_restart)); newlocale = setlocale(LC_COLLATE, oldlocale); if (!newlocale) elog(PANIC, "setlocale failed to reset locale: %s", localestr); siglongjmp(Warn_restart, 1); } newlocale = setlocale(LC_COLLATE, localestr); if (!newlocale) elog(ERROR, "setlocale failed to set a locale: %s", localestr); actual_trans_len = strxfrm(trans, str, approx_trans_len); /* if the buffer was not large enough, resize it and try again */ if (actual_trans_len >= approx_trans_len) { approx_trans_len = actual_trans_len + 1; if (approx_trans_len > MAX_BYTEA_LEN) elog(ERROR, "source string too long to transform"); if (trans) trans = (char *) repalloc(trans, approx_trans_len); else trans = (char *) palloc(approx_trans_len); actual_trans_len = strxfrm(trans, str, approx_trans_len); #ifndef DONT_TRUST_STRXFRM while(actual_trans_len >= guess + guess * str_len) guess++; elog(INFO, "strxfrm seems to need %d*n+%d sized buffer", guess, guess + 1); #endif /* if the buffer still not large enough, punt */ if (actual_trans_len >= approx_trans_len) elog(ERROR, "strxfrm failed, buffer insufficient"); } newlocale = setlocale(LC_COLLATE, oldlocale); if (!newlocale) elog(PANIC, "setlocale failed to reset locale: %s", localestr); /* restore normal error handling */ memcpy(&Warn_restart, &save_restart, sizeof(Warn_restart)); PG_RETURN_BYTEA_P(GET_BYTEA(trans)); }
-- greg
---------------------------(end of broadcast)--------------------------- TIP 5: Have you checked our extensive FAQ? http://www.postgresql.org/docs/faq