Bruce Momjian wrote:
> > I am starting to think that the simplest case is to keep the single-copy
> > version in there for single-byte encodings and not worry about the
> > overhead of the multi-byte case.
>
> My new idea is if we pass the length to str_initcap, we can eliminate
> the string copy from text to char *. That leaves us with just one extra
> string copy from char * to text, which seems acceptable. We still have
> the wide char copy but I don't see any easy way to eliminate that
> because the multi-byte code is complex and not something we want to
> duplicate.
I ended up going in this direction, and did the same for upper and
lower. Patch attached and applied. I don't see any other cleanups in
this area.
--
Bruce Momjian <[EMAIL PROTECTED]> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ If your life is a hard drive, Christ can be your backup. +
Index: src/backend/utils/adt/formatting.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.142
diff -c -c -r1.142 formatting.c
*** src/backend/utils/adt/formatting.c 17 Jun 2008 16:09:06 -0000 1.142
--- src/backend/utils/adt/formatting.c 23 Jun 2008 19:24:35 -0000
***************
*** 925,933 ****
static char *str_numth(char *dest, char *num, int type);
static int strspace_len(char *str);
static int strdigits_len(char *str);
- static char *str_toupper(char *buff);
- static char *str_tolower(char *buff);
- static char *str_initcap(char *buff);
static int seq_search(char *name, char **array, int type, int max, int *len);
static void do_to_timestamp(text *date_txt, text *fmt,
--- 925,930 ----
***************
*** 1424,1435 ****
return dest;
}
/* ----------
! * Convert string to upper case. It is designed to be multibyte-aware.
* ----------
*/
! static char *
! str_toupper(char *buff)
{
char *result;
--- 1421,1444 ----
return dest;
}
+ /*
+ * If the system provides the needed functions for wide-character manipulation
+ * (which are all standardized by C99), then we implement upper/lower/initcap
+ * using wide-character functions, if necessary. Otherwise we use the
+ * traditional <ctype.h> functions, which of course will not work as desired
+ * in multibyte character sets. Note that in either case we are effectively
+ * assuming that the database character encoding matches the encoding implied
+ * by LC_CTYPE.
+ */
+
/* ----------
! * wide-character-aware lower function
! * We pass the number of bytes so we can pass varlena and char*
! * to this function.
* ----------
*/
! char *
! str_tolower(char *buff, size_t nbytes)
{
char *result;
***************
*** 1438,1464 ****
#ifdef USE_WIDE_UPPER_LOWER
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! result = wstring_upper(buff);
else
#endif /* USE_WIDE_UPPER_LOWER */
{
char *p;
! result = pstrdup(buff);
for (p = result; *p; p++)
! *p = pg_toupper((unsigned char) *p);
}
return result;
}
/* ----------
! * Convert string to lower case. It is designed to be multibyte-aware.
* ----------
*/
! static char *
! str_tolower(char *buff)
{
char *result;
--- 1447,1492 ----
#ifdef USE_WIDE_UPPER_LOWER
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! {
! wchar_t *workspace;
! int curr_char = 0;
!
! /* Output workspace cannot have more codes than input bytes */
! workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
!
! char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
!
! for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
! workspace[curr_char] = towlower(workspace[curr_char]);
!
! /* Make result large enough; case change might change number of bytes */
! result = palloc(curr_char * MB_CUR_MAX + 1);
!
! wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
! pfree(workspace);
! }
else
#endif /* USE_WIDE_UPPER_LOWER */
{
char *p;
! result = pnstrdup(buff, nbytes);
for (p = result; *p; p++)
! *p = pg_tolower((unsigned char) *p);
}
return result;
}
/* ----------
! * wide-character-aware upper function
! * We pass the number of bytes so we can pass varlena and char*
! * to this function.
* ----------
*/
! char *
! str_toupper(char *buff, size_t nbytes)
{
char *result;
***************
*** 1467,1493 ****
#ifdef USE_WIDE_UPPER_LOWER
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! result = wstring_lower(buff);
else
#endif /* USE_WIDE_UPPER_LOWER */
{
char *p;
! result = pstrdup(buff);
for (p = result; *p; p++)
! *p = pg_tolower((unsigned char) *p);
}
return result;
}
!
/* ----------
* wide-character-aware initcap function
* ----------
*/
! static char *
! str_initcap(char *buff)
{
char *result;
bool wasalnum = false;
--- 1495,1540 ----
#ifdef USE_WIDE_UPPER_LOWER
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! {
! wchar_t *workspace;
! int curr_char = 0;
!
! /* Output workspace cannot have more codes than input bytes */
! workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
!
! char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
!
! for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
! workspace[curr_char] = towupper(workspace[curr_char]);
!
! /* Make result large enough; case change might change number of bytes */
! result = palloc(curr_char * MB_CUR_MAX + 1);
!
! wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
! pfree(workspace);
! }
else
#endif /* USE_WIDE_UPPER_LOWER */
{
char *p;
! result = pnstrdup(buff, nbytes);
for (p = result; *p; p++)
! *p = pg_toupper((unsigned char) *p);
}
return result;
}
!
/* ----------
* wide-character-aware initcap function
+ * We pass the number of bytes so we can pass varlena and char*
+ * to this function.
* ----------
*/
! char *
! str_initcap(char *buff, size_t nbytes)
{
char *result;
bool wasalnum = false;
***************
*** 1499,1533 ****
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
{
wchar_t *workspace;
! text *in_text;
! text *out_text;
! int i;
! in_text = cstring_to_text(buff);
! workspace = texttowcs(in_text);
! for (i = 0; workspace[i] != 0; i++)
{
if (wasalnum)
! workspace[i] = towlower(workspace[i]);
else
! workspace[i] = towupper(workspace[i]);
! wasalnum = iswalnum(workspace[i]);
}
! out_text = wcstotext(workspace, i);
! result = text_to_cstring(out_text);
pfree(workspace);
- pfree(in_text);
- pfree(out_text);
}
else
#endif /* USE_WIDE_UPPER_LOWER */
{
char *p;
! result = pstrdup(buff);
for (p = result; *p; p++)
{
--- 1546,1579 ----
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
{
wchar_t *workspace;
! int curr_char = 0;
!
! /* Output workspace cannot have more codes than input bytes */
! workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
! char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
! for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
if (wasalnum)
! workspace[curr_char] = towlower(workspace[curr_char]);
else
! workspace[curr_char] = towupper(workspace[curr_char]);
! wasalnum = iswalnum(workspace[curr_char]);
}
! /* Make result large enough; case change might change number of bytes */
! result = palloc(curr_char * MB_CUR_MAX + 1);
+ wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
pfree(workspace);
}
else
#endif /* USE_WIDE_UPPER_LOWER */
{
char *p;
! result = pnstrdup(buff, nbytes);
for (p = result; *p; p++)
{
***************
*** 1851,1857 ****
{
char *p = pstrdup(tmtcTzn(in));
! strcpy(s, str_tolower(p));
pfree(p);
s += strlen(s);
}
--- 1897,1903 ----
{
char *p = pstrdup(tmtcTzn(in));
! strcpy(s, str_tolower(p, strlen(p)));
pfree(p);
s += strlen(s);
}
***************
*** 1893,1903 ****
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1]));
else
{
strcpy(workbuff, months_full[tm->tm_mon - 1]);
! sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
}
s += strlen(s);
break;
--- 1939,1951 ----
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1],
! strlen(localized_full_months[tm->tm_mon - 1])));
else
{
strcpy(workbuff, months_full[tm->tm_mon - 1]);
! sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
! str_toupper(workbuff, strlen(workbuff)));
}
s += strlen(s);
break;
***************
*** 1906,1912 ****
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1]));
else
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
s += strlen(s);
--- 1954,1961 ----
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1],
! strlen(localized_full_months[tm->tm_mon - 1])));
else
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
s += strlen(s);
***************
*** 1916,1922 ****
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1]));
else
{
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
--- 1965,1972 ----
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1],
! strlen(localized_full_months[tm->tm_mon - 1])));
else
{
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
***************
*** 1929,1937 ****
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1]));
else
! strcpy(s, str_toupper(months[tm->tm_mon - 1]));
s += strlen(s);
break;
case DCH_Mon:
--- 1979,1989 ----
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1],
! strlen(localized_abbrev_months[tm->tm_mon - 1])));
else
! strcpy(s, str_toupper(months[tm->tm_mon - 1],
! strlen(months[tm->tm_mon - 1])));
s += strlen(s);
break;
case DCH_Mon:
***************
*** 1939,1945 ****
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1]));
else
strcpy(s, months[tm->tm_mon - 1]);
s += strlen(s);
--- 1991,1998 ----
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1],
! strlen(localized_abbrev_months[tm->tm_mon - 1])));
else
strcpy(s, months[tm->tm_mon - 1]);
s += strlen(s);
***************
*** 1949,1955 ****
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1]));
else
{
strcpy(s, months[tm->tm_mon - 1]);
--- 2002,2009 ----
if (!tm->tm_mon)
break;
if (S_TM(n->suffix))
! strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1],
! strlen(localized_abbrev_months[tm->tm_mon - 1])));
else
{
strcpy(s, months[tm->tm_mon - 1]);
***************
*** 1966,1983 ****
case DCH_DAY:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_toupper(localized_full_days[tm->tm_wday]));
else
{
strcpy(workbuff, days[tm->tm_wday]);
! sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
}
s += strlen(s);
break;
case DCH_Day:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_initcap(localized_full_days[tm->tm_wday]));
else
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
s += strlen(s);
--- 2020,2040 ----
case DCH_DAY:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_toupper(localized_full_days[tm->tm_wday],
! strlen(localized_full_days[tm->tm_wday])));
else
{
strcpy(workbuff, days[tm->tm_wday]);
! sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
! str_toupper(workbuff, strlen(workbuff)));
}
s += strlen(s);
break;
case DCH_Day:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_initcap(localized_full_days[tm->tm_wday],
! strlen(localized_full_days[tm->tm_wday])));
else
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
s += strlen(s);
***************
*** 1985,1991 ****
case DCH_day:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_tolower(localized_full_days[tm->tm_wday]));
else
{
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
--- 2042,2049 ----
case DCH_day:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_tolower(localized_full_days[tm->tm_wday],
! strlen(localized_full_days[tm->tm_wday])));
else
{
sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
***************
*** 1996,2010 ****
case DCH_DY:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday]));
else
! strcpy(s, str_toupper(days_short[tm->tm_wday]));
s += strlen(s);
break;
case DCH_Dy:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday]));
else
strcpy(s, days_short[tm->tm_wday]);
s += strlen(s);
--- 2054,2071 ----
case DCH_DY:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday],
! strlen(localized_abbrev_days[tm->tm_wday])));
else
! strcpy(s, str_toupper(days_short[tm->tm_wday],
! strlen(days_short[tm->tm_wday])));
s += strlen(s);
break;
case DCH_Dy:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday],
! strlen(localized_abbrev_days[tm->tm_wday])));
else
strcpy(s, days_short[tm->tm_wday]);
s += strlen(s);
***************
*** 2012,2018 ****
case DCH_dy:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday]));
else
{
strcpy(s, days_short[tm->tm_wday]);
--- 2073,2080 ----
case DCH_dy:
INVALID_FOR_INTERVAL;
if (S_TM(n->suffix))
! strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday],
! strlen(localized_abbrev_days[tm->tm_wday])));
else
{
strcpy(s, days_short[tm->tm_wday]);
***************
*** 4277,4288 ****
case NUM_rn:
if (IS_FILLMODE(Np->Num))
{
! strcpy(Np->inout_p, str_tolower(Np->number_p));
Np->inout_p += strlen(Np->inout_p) - 1;
}
else
{
! sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p));
Np->inout_p += strlen(Np->inout_p) - 1;
}
break;
--- 4339,4352 ----
case NUM_rn:
if (IS_FILLMODE(Np->Num))
{
! strcpy(Np->inout_p, str_tolower(Np->number_p,
! strlen(Np->number_p)));
Np->inout_p += strlen(Np->inout_p) - 1;
}
else
{
! sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p,
! strlen(Np->number_p)));
Np->inout_p += strlen(Np->inout_p) - 1;
}
break;
Index: src/backend/utils/adt/oracle_compat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v
retrieving revision 1.80
diff -c -c -r1.80 oracle_compat.c
*** src/backend/utils/adt/oracle_compat.c 17 Jun 2008 16:09:06 -0000 1.80
--- src/backend/utils/adt/oracle_compat.c 23 Jun 2008 19:24:35 -0000
***************
*** 29,320 ****
#endif
#include "utils/builtins.h"
#include "utils/pg_locale.h"
#include "mb/pg_wchar.h"
- /*
- * If the system provides the needed functions for wide-character manipulation
- * (which are all standardized by C99), then we implement upper/lower/initcap
- * using wide-character functions. Otherwise we use the traditional <ctype.h>
- * functions, which of course will not work as desired in multibyte character
- * sets. Note that in either case we are effectively assuming that the
- * database character encoding matches the encoding implied by LC_CTYPE.
- */
- #ifdef USE_WIDE_UPPER_LOWER
- char *wstring_lower(char *str);
- char *wstring_upper(char *str);
- wchar_t *texttowcs(const text *txt);
- text *wcstotext(const wchar_t *str, int ncodes);
- #endif
-
static text *dotrim(const char *string, int stringlen,
const char *set, int setlen,
bool doltrim, bool dortrim);
- #ifdef USE_WIDE_UPPER_LOWER
-
- /*
- * Convert a TEXT value into a palloc'd wchar string.
- */
- wchar_t *
- texttowcs(const text *txt)
- {
- int nbytes = VARSIZE_ANY_EXHDR(txt);
- char *workstr;
- wchar_t *result;
- size_t ncodes;
-
- /* Overflow paranoia */
- if (nbytes < 0 ||
- nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
-
- /* Need a null-terminated version of the input */
- workstr = text_to_cstring(txt);
-
- /* Output workspace cannot have more codes than input bytes */
- result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
- /* Do the conversion */
- ncodes = mbstowcs(result, workstr, nbytes + 1);
-
- if (ncodes == (size_t) -1)
- {
- /*
- * Invalid multibyte character encountered. We try to give a useful
- * error message by letting pg_verifymbstr check the string. But it's
- * possible that the string is OK to us, and not OK to mbstowcs ---
- * this suggests that the LC_CTYPE locale is different from the
- * database encoding. Give a generic error message if verifymbstr
- * can't find anything wrong.
- */
- pg_verifymbstr(workstr, nbytes, false);
- ereport(ERROR,
- (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- errmsg("invalid multibyte character for locale"),
- errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
- }
-
- Assert(ncodes <= (size_t) nbytes);
-
- return result;
- }
-
-
- /*
- * Convert a wchar string into a palloc'd TEXT value. The wchar string
- * must be zero-terminated, but we also require the caller to pass the string
- * length, since it will know it anyway in current uses.
- */
- text *
- wcstotext(const wchar_t *str, int ncodes)
- {
- text *result;
- size_t nbytes;
-
- /* Overflow paranoia */
- if (ncodes < 0 ||
- ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
-
- /* Make workspace certainly large enough for result */
- result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
-
- /* Do the conversion */
- nbytes = wcstombs((char *) VARDATA(result), str,
- (ncodes + 1) * MB_CUR_MAX);
-
- if (nbytes == (size_t) -1)
- {
- /* Invalid multibyte character encountered ... shouldn't happen */
- ereport(ERROR,
- (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- errmsg("invalid multibyte character for locale")));
- }
-
- Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
-
- SET_VARSIZE(result, nbytes + VARHDRSZ);
-
- return result;
- }
- #endif /* USE_WIDE_UPPER_LOWER */
-
-
- /*
- * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
- * To make use of the upper/lower functionality, we need to map UTF8 to
- * UTF16, which for some reason mbstowcs and wcstombs won't do for us.
- * This conversion layer takes care of it.
- */
-
- #ifdef WIN32
-
- /* texttowcs for the case of UTF8 to UTF16 */
- static wchar_t *
- win32_utf8_texttowcs(const text *txt)
- {
- int nbytes = VARSIZE_ANY_EXHDR(txt);
- wchar_t *result;
- int r;
-
- /* Overflow paranoia */
- if (nbytes < 0 ||
- nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
-
- /* Output workspace cannot have more codes than input bytes */
- result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
- /* stupid Microsloth API does not work for zero-length input */
- if (nbytes == 0)
- r = 0;
- else
- {
- /* Do the conversion */
- r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes,
- result, nbytes);
-
- if (r <= 0) /* assume it's NO_UNICODE_TRANSLATION */
- {
- /* see notes above about error reporting */
- pg_verifymbstr(VARDATA_ANY(txt), nbytes, false);
- ereport(ERROR,
- (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- errmsg("invalid multibyte character for locale"),
- errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
- }
- }
-
- /* Append trailing null wchar (MultiByteToWideChar won't have) */
- Assert(r <= nbytes);
- result[r] = 0;
-
- return result;
- }
-
- /* wcstotext for the case of UTF16 to UTF8 */
- static text *
- win32_utf8_wcstotext(const wchar_t *str)
- {
- text *result;
- int nbytes;
- int r;
-
- /* Compute size of output string (this *will* include trailing null) */
- nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
- if (nbytes <= 0) /* shouldn't happen */
- ereport(ERROR,
- (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- errmsg("UTF-16 to UTF-8 translation failed: %lu",
- GetLastError())));
-
- result = palloc(nbytes + VARHDRSZ);
-
- r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
- NULL, NULL);
- if (r != nbytes) /* shouldn't happen */
- ereport(ERROR,
- (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- errmsg("UTF-16 to UTF-8 translation failed: %lu",
- GetLastError())));
-
- SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */
-
- return result;
- }
-
- /* interface layer to check which encoding is in use */
-
- static wchar_t *
- win32_texttowcs(const text *txt)
- {
- if (GetDatabaseEncoding() == PG_UTF8)
- return win32_utf8_texttowcs(txt);
- else
- return texttowcs(txt);
- }
-
- static text *
- win32_wcstotext(const wchar_t *str, int ncodes)
- {
- if (GetDatabaseEncoding() == PG_UTF8)
- return win32_utf8_wcstotext(str);
- else
- return wcstotext(str, ncodes);
- }
-
- /* use macros to cause routines below to call interface layer */
-
- #define texttowcs win32_texttowcs
- #define wcstotext win32_wcstotext
- #endif /* WIN32 */
-
- #ifdef USE_WIDE_UPPER_LOWER
- /*
- * string_upper and string_lower are used for correct multibyte upper/lower
- * transformations localized strings. Returns pointers to transformated
- * string.
- */
- char *
- wstring_upper(char *str)
- {
- wchar_t *workspace;
- text *in_text;
- text *out_text;
- char *result;
- int i;
-
- in_text = cstring_to_text(str);
- workspace = texttowcs(in_text);
-
- for (i = 0; workspace[i] != 0; i++)
- workspace[i] = towupper(workspace[i]);
-
- out_text = wcstotext(workspace, i);
- result = text_to_cstring(out_text);
-
- pfree(workspace);
- pfree(in_text);
- pfree(out_text);
-
- return result;
- }
-
- char *
- wstring_lower(char *str)
- {
- wchar_t *workspace;
- text *in_text;
- text *out_text;
- char *result;
- int i;
-
- in_text = cstring_to_text(str);
- workspace = texttowcs(in_text);
-
- for (i = 0; workspace[i] != 0; i++)
- workspace[i] = towlower(workspace[i]);
-
- out_text = wcstotext(workspace, i);
- result = text_to_cstring(out_text);
-
- pfree(workspace);
- pfree(in_text);
- pfree(out_text);
-
- return result;
- }
- #endif /* USE_WIDE_UPPER_LOWER */
-
/********************************************************************
*
* lower
--- 29,44 ----
#endif
#include "utils/builtins.h"
+ #include "utils/formatting.h"
#include "utils/pg_locale.h"
#include "mb/pg_wchar.h"
static text *dotrim(const char *string, int stringlen,
const char *set, int setlen,
bool doltrim, bool dortrim);
/********************************************************************
*
* lower
***************
*** 332,383 ****
Datum
lower(PG_FUNCTION_ARGS)
{
! #ifdef USE_WIDE_UPPER_LOWER
!
! /*
! * Use wide char code only when max encoding length > 1 and ctype != C.
! * Some operating systems fail with multi-byte encodings and a C locale.
! * Also, for a C locale there is no need to process as multibyte.
! */
! if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! {
! text *string = PG_GETARG_TEXT_PP(0);
! text *result;
! wchar_t *workspace;
! int i;
!
! workspace = texttowcs(string);
!
! for (i = 0; workspace[i] != 0; i++)
! workspace[i] = towlower(workspace[i]);
! result = wcstotext(workspace, i);
!
! pfree(workspace);
!
! PG_RETURN_TEXT_P(result);
! }
! else
! #endif /* USE_WIDE_UPPER_LOWER */
! {
! text *string = PG_GETARG_TEXT_P_COPY(0);
! char *ptr;
! int m;
!
! /*
! * Since we copied the string, we can scribble directly on the value
! */
! ptr = VARDATA(string);
! m = VARSIZE(string) - VARHDRSZ;
!
! while (m-- > 0)
! {
! *ptr = tolower((unsigned char) *ptr);
! ptr++;
! }
!
! PG_RETURN_TEXT_P(string);
! }
}
--- 56,70 ----
Datum
lower(PG_FUNCTION_ARGS)
{
! text *in_string = PG_GETARG_TEXT_PP(0);
! char *out_string;
! text *result;
!
! out_string = str_tolower(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
! result = cstring_to_text(out_string);
! pfree(out_string);
! PG_RETURN_TEXT_P(result);
}
***************
*** 398,449 ****
Datum
upper(PG_FUNCTION_ARGS)
{
! #ifdef USE_WIDE_UPPER_LOWER
! /*
! * Use wide char code only when max encoding length > 1 and ctype != C.
! * Some operating systems fail with multi-byte encodings and a C locale.
! * Also, for a C locale there is no need to process as multibyte.
! */
! if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! {
! text *string = PG_GETARG_TEXT_PP(0);
! text *result;
! wchar_t *workspace;
! int i;
!
! workspace = texttowcs(string);
!
! for (i = 0; workspace[i] != 0; i++)
! workspace[i] = towupper(workspace[i]);
!
! result = wcstotext(workspace, i);
!
! pfree(workspace);
!
! PG_RETURN_TEXT_P(result);
! }
! else
! #endif /* USE_WIDE_UPPER_LOWER */
! {
! text *string = PG_GETARG_TEXT_P_COPY(0);
! char *ptr;
! int m;
!
! /*
! * Since we copied the string, we can scribble directly on the value
! */
! ptr = VARDATA(string);
! m = VARSIZE(string) - VARHDRSZ;
!
! while (m-- > 0)
! {
! *ptr = toupper((unsigned char) *ptr);
! ptr++;
! }
!
! PG_RETURN_TEXT_P(string);
! }
}
--- 85,99 ----
Datum
upper(PG_FUNCTION_ARGS)
{
! text *in_string = PG_GETARG_TEXT_PP(0);
! char *out_string;
! text *result;
!
! out_string = str_toupper(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
! result = cstring_to_text(out_string);
! pfree(out_string);
! PG_RETURN_TEXT_P(result);
}
***************
*** 467,530 ****
Datum
initcap(PG_FUNCTION_ARGS)
{
! #ifdef USE_WIDE_UPPER_LOWER
! /*
! * Use wide char code only when max encoding length > 1 and ctype != C.
! * Some operating systems fail with multi-byte encodings and a C locale.
! * Also, for a C locale there is no need to process as multibyte.
! */
! if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! {
! text *string = PG_GETARG_TEXT_PP(0);
! text *result;
! wchar_t *workspace;
! int wasalnum = 0;
! int i;
!
! workspace = texttowcs(string);
!
! for (i = 0; workspace[i] != 0; i++)
! {
! if (wasalnum)
! workspace[i] = towlower(workspace[i]);
! else
! workspace[i] = towupper(workspace[i]);
! wasalnum = iswalnum(workspace[i]);
! }
!
! result = wcstotext(workspace, i);
!
! pfree(workspace);
!
! PG_RETURN_TEXT_P(result);
! }
! else
! #endif /* USE_WIDE_UPPER_LOWER */
! {
! text *string = PG_GETARG_TEXT_P_COPY(0);
! int wasalnum = 0;
! char *ptr;
! int m;
!
! /*
! * Since we copied the string, we can scribble directly on the value
! */
! ptr = VARDATA(string);
! m = VARSIZE(string) - VARHDRSZ;
!
! while (m-- > 0)
! {
! if (wasalnum)
! *ptr = tolower((unsigned char) *ptr);
! else
! *ptr = toupper((unsigned char) *ptr);
! wasalnum = isalnum((unsigned char) *ptr);
! ptr++;
! }
!
! PG_RETURN_TEXT_P(string);
! }
}
--- 117,131 ----
Datum
initcap(PG_FUNCTION_ARGS)
{
! text *in_string = PG_GETARG_TEXT_PP(0);
! char *out_string;
! text *result;
!
! out_string = str_initcap(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
! result = cstring_to_text(out_string);
! pfree(out_string);
! PG_RETURN_TEXT_P(result);
}
Index: src/include/utils/formatting.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/formatting.h,v
retrieving revision 1.18
diff -c -c -r1.18 formatting.h
*** src/include/utils/formatting.h 1 Jan 2008 19:45:59 -0000 1.18
--- src/include/utils/formatting.h 23 Jun 2008 19:24:36 -0000
***************
*** 21,26 ****
--- 21,30 ----
#include "fmgr.h"
+ extern char *str_tolower(char *buff, size_t nbytes);
+ extern char *str_toupper(char *buff, size_t nbytes);
+ extern char *str_initcap(char *buff, size_t nbytes);
+
extern Datum timestamp_to_char(PG_FUNCTION_ARGS);
extern Datum timestamptz_to_char(PG_FUNCTION_ARGS);
extern Datum interval_to_char(PG_FUNCTION_ARGS);
--
Sent via pgsql-patches mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-patches