So, I needed a way to sort using collation rules other than the one the database was built with. So I wrote up the following function exposing strxfrm with an extra parameter to specify the LC_COLLATE value to use.
This is my first C function so I'm really unsure that I've done the right thing. For the most part I pattern-matched off the string_io code in the contrib directory. In particular I'm unsure about the code postgres-interfacing code in c_varcharxfrm which makes an extra copy of both parameters that are passed in and an extra copy of the result value. Are varchars guaranteed to be nul-terminated? If so I can dispose of two of the copies. And I can probably eliminate the copying of the result by alloting extra space when I allocate it initially. But more generally. Would it make more sense to use text or bytea or something else to store these opaque binary strings? At least with glibc they tend to be unreadable anyways. Other caveats: It's condemned to be permanently non-threadsafe because the whole locale system is a non thread-safe API. Also I fear some systems will leak memory like a sieve when you call setlocale a few thousand times instead of the 1 time at initialization that they foresaw. At least glibc doesn't seem to leak in my brief testing. If it's deemed a reasonable approach and nobody has any fatal flaws then I expect it would be useful to put in the contrib directory?
/* * This software is distributed under the GNU General Public License * either version 2, or (at your option) any later version. */ #include "postgres.h" #include <locale.h> #include "utils/builtins.h" static unsigned char * xfrm(unsigned char *data, int size, const unsigned char *locale, int localesize); unsigned char * c_varcharxfrm(unsigned char *s, const unsigned char *locale); static unsigned char * xfrm(unsigned char *data, int size, const unsigned char *locale, int localesize) { size_t length = size*3+4; char *transformed; size_t transformed_length; char *oldlocale, *newlocale; /* First try a buffer perhaps big enough. */ transformed = palloc (length); oldlocale = setlocale(LC_COLLATE, NULL); if (!oldlocale) { elog(ERROR, "setlocale(LC_COLLATE,NULL) failed to return a locale"); return NULL; } newlocale = setlocale(LC_COLLATE, locale); if (!newlocale) { elog(ERROR, "setlocale(LC_COLLATE,%s) failed to return a locale", locale); return NULL; } transformed_length = strxfrm (transformed, data, length); /* If the buffer was not large enough, resize it and try again. */ if (transformed_length >= length) { elog(INFO, "Calling strxfrm again because result didn't fit (%d>%d)", transformed_length, length); length = transformed_length + 1; transformed = palloc(length); strxfrm (transformed, data, length); } newlocale = setlocale(LC_COLLATE, oldlocale); Assert(newlocale && !strcmp(newlocale,oldlocale)); if (!newlocale || strcmp(newlocale,oldlocale)) { elog(ERROR, "Failed to reset locale (trying to reset locale to %s from %s instead got %s)", oldlocale, locale, newlocale); } return transformed; } unsigned char * c_varcharxfrm(unsigned char *s, const unsigned char *l) { int lens = 0, lenl = 0, lenr = 0; unsigned char *str, *locale, *retval, *retval2; if (s) { lens = *(int32 *) s - 4; str = palloc(lens+1); memcpy(str, s+4, lens); str[lens]='\0'; } if (l) { lenl = *(int32 *) l - 4; locale = palloc(lenl+1); memcpy(locale, l+4, lenl); locale[lenl]='\0'; } retval = xfrm(str, lens, locale, lenl); lenr = strlen(retval); retval2 = palloc(lenr+5); memcpy(retval2+4, retval, lenr+1); *(int32 *)retval2 = lenr; return retval2; } /* * Local Variables: * tab-width: 4 * c-indent-level: 4 * c-basic-offset: 4 * End: */
SET search_path = public; SET autocommit TO 'on'; CREATE OR REPLACE FUNCTION xfrm(varchar, varchar) RETURNS varchar AS 'strxfrm.so', 'c_varcharxfrm' LANGUAGE 'C' STRICT IMMUTABLE ;
-- greg
---------------------------(end of broadcast)--------------------------- TIP 6: Have you searched our list archives? http://archives.postgresql.org