So, I needed a way to sort using collation rules other than the one the
database was built with. So I wrote up the following function exposing strxfrm
with an extra parameter to specify the LC_COLLATE value to use.

This is my first C function so I'm really unsure that I've done the right
thing. For the most part I pattern-matched off the string_io code in the
contrib directory. 

In particular I'm unsure about the code postgres-interfacing code in
c_varcharxfrm which makes an extra copy of both parameters that are passed in
and an extra copy of the result value. Are varchars guaranteed to be
nul-terminated? If so I can dispose of two of the copies. And I can probably
eliminate the copying of the result by alloting extra space when I allocate it
initially.

But more generally. Would it make more sense to use text or bytea or something
else to store these opaque binary strings? At least with glibc they tend to be
unreadable anyways.

Other caveats: It's condemned to be permanently non-threadsafe because the
whole locale system is a non thread-safe API. Also I fear some systems will
leak memory like a sieve when you call setlocale a few thousand times instead
of the 1 time at initialization that they foresaw. At least glibc doesn't seem
to leak in my brief testing.

If it's deemed a reasonable approach and nobody has any fatal flaws then I
expect it would be useful to put in the contrib directory?


/*
 * This software is distributed under the GNU General Public License
 * either version 2, or (at your option) any later version.
 */

#include "postgres.h"

#include <locale.h>

#include "utils/builtins.h"

static 
unsigned char * xfrm(unsigned char *data, int size, const unsigned char *locale, int localesize);

unsigned char * c_varcharxfrm(unsigned char *s, const unsigned char *locale);


static unsigned char *
xfrm(unsigned char *data, int size, const unsigned char *locale, int localesize)
{
  size_t length = size*3+4;
  char *transformed;
  size_t transformed_length;
  char *oldlocale, *newlocale;
     
  /* First try a buffer perhaps big enough.  */
  transformed = palloc (length);
     
  oldlocale = setlocale(LC_COLLATE, NULL);
  if (!oldlocale) {
    elog(ERROR, "setlocale(LC_COLLATE,NULL) failed to return a locale");
    return NULL;
  }
  
  newlocale = setlocale(LC_COLLATE, locale);
  if (!newlocale) {
    elog(ERROR, "setlocale(LC_COLLATE,%s) failed to return a locale", locale);
    return NULL;
  }

  transformed_length = strxfrm (transformed, data, length);

  /* If the buffer was not large enough, resize it and try again.  */
  if (transformed_length >= length) {
    elog(INFO, "Calling strxfrm again because result didn't fit (%d>%d)", transformed_length, length);
    length = transformed_length + 1;
    transformed = palloc(length);
    strxfrm (transformed, data, length);
  }
     
  newlocale = setlocale(LC_COLLATE, oldlocale);

  Assert(newlocale && !strcmp(newlocale,oldlocale));
  if (!newlocale || strcmp(newlocale,oldlocale)) {
    elog(ERROR, "Failed to reset locale (trying to reset locale to %s from %s instead got %s)", oldlocale, locale, newlocale);
  }
  
  return transformed;
}


unsigned char *
c_varcharxfrm(unsigned char *s, const unsigned char *l)
{
  int lens = 0, lenl = 0, lenr = 0;
  unsigned char *str, *locale, *retval, *retval2;

  if (s) {
    lens = *(int32 *) s - 4;
    str = palloc(lens+1);
    memcpy(str, s+4, lens);
    str[lens]='\0';
  }

  if (l) {
    lenl = *(int32 *) l - 4;
    locale = palloc(lenl+1);
    memcpy(locale, l+4, lenl);
    locale[lenl]='\0';
  }

  retval = xfrm(str, lens, locale, lenl);
  
  lenr = strlen(retval);
  retval2 = palloc(lenr+5);
  memcpy(retval2+4, retval, lenr+1);
  *(int32 *)retval2 = lenr;
  
  return retval2;
}





/*
 * Local Variables:
 *	tab-width: 4
 *	c-indent-level: 4
 *	c-basic-offset: 4
 * End:
 */
SET search_path = public;

SET autocommit TO 'on';

CREATE OR REPLACE FUNCTION xfrm(varchar, varchar)
RETURNS varchar
AS 'strxfrm.so', 'c_varcharxfrm'
LANGUAGE 'C' STRICT IMMUTABLE ;


-- 
greg
---------------------------(end of broadcast)---------------------------
TIP 6: Have you searched our list archives?

               http://archives.postgresql.org

Reply via email to