Today someone posted (or tried to post, I didn't get the attachment) an
implementation of strxfrm using setlocale again. I think this is the second or
third time someone has tried their hand at this. Clearly there's a demand for
it and I fear some of the users trying to do this aren't aware of all the
problems that were identified a couple years ago on this list.

I think the best implementation so far is Joe Conway's that used
sigsetjmp/siglongjmp to catch errors safely. I would strongly suggest putting
this implementation in contrib to save all those people from repeating the
same mistakes I made and others have made. Especially since the consequences
of those mistakes include potentially corrupting the database.

I would suggest it be put in the core system except that as has been pointed
out the performance isn't satisfactory on every platform. The only platform we
have confirmation that the performance is reasonable is glibc-based systems.


/*
  * Joe Conway <[EMAIL PROTECTED]>
  *
  * Copyright (c) 2004, Joseph E. Conway
  * ALL RIGHTS RESERVED
  *
  * Permission to use, copy, modify, and distribute this software and its
  * documentation for any purpose, without fee, and without a written agreement
  * is hereby granted, provided that the above copyright notice and this
  * paragraph and the following two paragraphs appear in all copies.
  *
  * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR
  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
  * DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  * ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO
  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  *
  */

/*
 * 
 * If your libc strxfrm() overflows the buffer provided ignoring the length
 * argument then add this define. It causes this function to do an extra
 * strxfrm() call on every execution to get the actual size of buffer needed.
 * 
 * #define DONT_TRUST_STRXFRM
*/

/*
 *
 * CREATE OR REPLACE function pg_strxfrm(text,text) RETURNS bytea 
 *     AS 'strxfrm2.so', 'pg_strxfrm' LANGUAGE c IMMUTABLE STRICT;
 * 
 * Usage: pg_strxfrm(string, locale)
 */

#include <setjmp.h>
#include <string.h>
#include "postgres.h"
#include "fmgr.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"

#define GET_STR(textp) \
  DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
#define GET_BYTEA(str_) \
  DatumGetTextP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
#define MAX_BYTEA_LEN    0x3fffffff

/*
 * pg_strxfrm - Function to convert string similar to the strxfrm C
 * function using a specified locale.
 */

extern Datum pg_strxfrm(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(pg_strxfrm);

Datum
pg_strxfrm(PG_FUNCTION_ARGS)
{
	char       *str = GET_STR(PG_GETARG_TEXT_P(0));
	char       *localestr = GET_STR(PG_GETARG_TEXT_P(1));
	size_t      approx_trans_len;
	
	char       *trans;
	
	size_t      actual_trans_len;
	char       *oldlocale;
	char       *newlocale;
	sigjmp_buf  save_restart;

#ifndef DONT_TRUST_STRXFRM
	size_t      str_len = strlen(str);
	static unsigned guess = 1;

	approx_trans_len = guess + guess * str_len + 1;
	if (approx_trans_len > MAX_BYTEA_LEN)
		elog(ERROR, "source string too long to transform");
	trans = (char *) palloc(approx_trans_len);
#else
	approx_trans_len = 0;
	trans = NULL;
#endif

	oldlocale = setlocale(LC_COLLATE, NULL);
	if (!oldlocale)
		elog(ERROR, "setlocale failed to return a locale");
	oldlocale = pstrdup(oldlocale);

	/* catch elog while locale is set other than the default */
	memcpy(&save_restart, &Warn_restart, sizeof(save_restart));
	if (sigsetjmp(Warn_restart, 1) != 0)
	{
		memcpy(&Warn_restart, &save_restart, sizeof(Warn_restart));
		newlocale = setlocale(LC_COLLATE, oldlocale);
		if (!newlocale)
			elog(PANIC, "setlocale failed to reset locale: %s", localestr);
		siglongjmp(Warn_restart, 1);
	}

	newlocale = setlocale(LC_COLLATE, localestr);
	if (!newlocale)
		elog(ERROR, "setlocale failed to set a locale: %s", localestr);

	actual_trans_len = strxfrm(trans, str, approx_trans_len);
	/* if the buffer was not large enough, resize it and try again */
	if (actual_trans_len >= approx_trans_len)
	{
		approx_trans_len = actual_trans_len + 1;
		if (approx_trans_len > MAX_BYTEA_LEN)
			elog(ERROR, "source string too long to transform");

		if (trans)
			trans = (char *) repalloc(trans, approx_trans_len);
		else
			trans = (char *) palloc(approx_trans_len);

		actual_trans_len = strxfrm(trans, str, approx_trans_len);

#ifndef DONT_TRUST_STRXFRM
		while(actual_trans_len >= guess + guess * str_len)
			guess++;
		elog(INFO, "strxfrm seems to need %d*n+%d sized buffer", guess, guess + 1);
#endif

		/* if the buffer still not large enough, punt */
		if (actual_trans_len >= approx_trans_len)
			elog(ERROR, "strxfrm failed, buffer insufficient");
	}
	newlocale = setlocale(LC_COLLATE, oldlocale);
	if (!newlocale)
		elog(PANIC, "setlocale failed to reset locale: %s", localestr);

	/* restore normal error handling */
	memcpy(&Warn_restart, &save_restart, sizeof(Warn_restart));

	PG_RETURN_BYTEA_P(GET_BYTEA(trans));
}

-- 
greg
---------------------------(end of broadcast)---------------------------
TIP 5: Have you checked our extensive FAQ?

               http://www.postgresql.org/docs/faq

Reply via email to