"Filip Rembiaâkowski" <[EMAIL PROTECTED]> writes:
> let's assume that we keep Unicode text data in the column.
> sometimes we want to sort it according to specific collation order.
> how can we force collation when running a query?
> ideal solution would be having SQL92 standard COLLATE clauses in
> SELECT statements. I heard it's work in progress, what's the status?
>
> are there any "partial" solutions to the problem?
I don't know the status of the full COLLATE support.
But the closest partial solution suggested so far is the pg_xfrm function that
has been implemented and posted at least three times by three different
posters to the postgres mailing lists. In the interest of avoiding a fourth
independent implementation I'll attach the one I use below, it's not big.
/*
* Joe Conway <[EMAIL PROTECTED]>
*
* Copyright (c) 2004, Joseph E. Conway
* ALL RIGHTS RESERVED
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without a written agreement
* is hereby granted, provided that the above copyright notice and this
* paragraph and the following two paragraphs appear in all copies.
*
* IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
* LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
* DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
*/
/*
*
* If your libc strxfrm() overflows the buffer provided ignoring the length
* argument then add this define. It causes this function to do an extra
* strxfrm() call on every execution to get the actual size of buffer needed.
*
* #define DONT_TRUST_STRXFRM
*/
/*
*
* CREATE OR REPLACE function pg_strxfrm(text,text) RETURNS bytea
* AS 'strxfrm2.so', 'pg_strxfrm' LANGUAGE c IMMUTABLE STRICT;
*
* Usage: pg_strxfrm(string, locale)
*/
#include <setjmp.h>
#include <string.h>
#include "postgres.h"
#include "fmgr.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
#define GET_STR(textp) \
DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
#define GET_BYTEA(str_) \
DatumGetTextP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
#define MAX_BYTEA_LEN 0x3fffffff
/*
* pg_strxfrm - Function to convert string similar to the strxfrm C
* function using a specified locale.
*/
extern Datum pg_strxfrm(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(pg_strxfrm);
Datum
pg_strxfrm(PG_FUNCTION_ARGS)
{
char *str = GET_STR(PG_GETARG_TEXT_P(0));
char *localestr = GET_STR(PG_GETARG_TEXT_P(1));
size_t approx_trans_len;
char *trans;
size_t actual_trans_len;
char *oldlocale;
char *newlocale;
sigjmp_buf save_restart;
#ifndef DONT_TRUST_STRXFRM
size_t str_len = strlen(str);
static unsigned guess = 1;
approx_trans_len = guess + guess * str_len + 1;
if (approx_trans_len > MAX_BYTEA_LEN)
elog(ERROR, "source string too long to transform");
trans = (char *) palloc(approx_trans_len);
#else
approx_trans_len = 0;
trans = NULL;
#endif
oldlocale = setlocale(LC_COLLATE, NULL);
if (!oldlocale)
elog(ERROR, "setlocale failed to return a locale");
oldlocale = pstrdup(oldlocale);
/* catch elog while locale is set other than the default */
memcpy(&save_restart, &Warn_restart, sizeof(save_restart));
if (sigsetjmp(Warn_restart, 1) != 0)
{
memcpy(&Warn_restart, &save_restart, sizeof(Warn_restart));
newlocale = setlocale(LC_COLLATE, oldlocale);
if (!newlocale)
elog(PANIC, "setlocale failed to reset locale: %s", localestr);
siglongjmp(Warn_restart, 1);
}
newlocale = setlocale(LC_COLLATE, localestr);
if (!newlocale)
elog(ERROR, "setlocale failed to set a locale: %s", localestr);
actual_trans_len = strxfrm(trans, str, approx_trans_len);
/* if the buffer was not large enough, resize it and try again */
if (actual_trans_len >= approx_trans_len)
{
approx_trans_len = actual_trans_len + 1;
if (approx_trans_len > MAX_BYTEA_LEN)
elog(ERROR, "source string too long to transform");
if (trans)
trans = (char *) repalloc(trans, approx_trans_len);
else
trans = (char *) palloc(approx_trans_len);
actual_trans_len = strxfrm(trans, str, approx_trans_len);
#ifndef DONT_TRUST_STRXFRM
while(actual_trans_len >= guess + guess * str_len)
guess++;
elog(INFO, "strxfrm seems to need %d*n+%d sized buffer", guess, guess + 1);
#endif
/* if the buffer still not large enough, punt */
if (actual_trans_len >= approx_trans_len)
elog(ERROR, "strxfrm failed, buffer insufficient");
}
newlocale = setlocale(LC_COLLATE, oldlocale);
if (!newlocale)
elog(PANIC, "setlocale failed to reset locale: %s", localestr);
/* restore normal error handling */
memcpy(&Warn_restart, &save_restart, sizeof(Warn_restart));
PG_RETURN_BYTEA_P(GET_BYTEA(trans));
}
SET search_path = public;
SET autocommit TO 'on';
CREATE OR REPLACE FUNCTION pg_strxfrm(text, text) RETURNS bytea
AS 'pg_strxfrm.so', 'pg_strxfrm'
LANGUAGE 'C' STRICT IMMUTABLE ;
--
greg
---------------------------(end of broadcast)---------------------------
TIP 5: don't forget to increase your free space map settings