Bruce Momjian wrote: > > I am starting to think that the simplest case is to keep the single-copy > > version in there for single-byte encodings and not worry about the > > overhead of the multi-byte case. > > My new idea is if we pass the length to str_initcap, we can eliminate > the string copy from text to char *. That leaves us with just one extra > string copy from char * to text, which seems acceptable. We still have > the wide char copy but I don't see any easy way to eliminate that > because the multi-byte code is complex and not something we want to > duplicate.
I ended up going in this direction, and did the same for upper and lower. Patch attached and applied. I don't see any other cleanups in this area. -- Bruce Momjian <[EMAIL PROTECTED]> http://momjian.us EnterpriseDB http://enterprisedb.com + If your life is a hard drive, Christ can be your backup. +
Index: src/backend/utils/adt/formatting.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v retrieving revision 1.142 diff -c -c -r1.142 formatting.c *** src/backend/utils/adt/formatting.c 17 Jun 2008 16:09:06 -0000 1.142 --- src/backend/utils/adt/formatting.c 23 Jun 2008 19:24:35 -0000 *************** *** 925,933 **** static char *str_numth(char *dest, char *num, int type); static int strspace_len(char *str); static int strdigits_len(char *str); - static char *str_toupper(char *buff); - static char *str_tolower(char *buff); - static char *str_initcap(char *buff); static int seq_search(char *name, char **array, int type, int max, int *len); static void do_to_timestamp(text *date_txt, text *fmt, --- 925,930 ---- *************** *** 1424,1435 **** return dest; } /* ---------- ! * Convert string to upper case. It is designed to be multibyte-aware. * ---------- */ ! static char * ! str_toupper(char *buff) { char *result; --- 1421,1444 ---- return dest; } + /* + * If the system provides the needed functions for wide-character manipulation + * (which are all standardized by C99), then we implement upper/lower/initcap + * using wide-character functions, if necessary. Otherwise we use the + * traditional <ctype.h> functions, which of course will not work as desired + * in multibyte character sets. Note that in either case we are effectively + * assuming that the database character encoding matches the encoding implied + * by LC_CTYPE. + */ + /* ---------- ! * wide-character-aware lower function ! * We pass the number of bytes so we can pass varlena and char* ! * to this function. * ---------- */ ! char * ! str_tolower(char *buff, size_t nbytes) { char *result; *************** *** 1438,1464 **** #ifdef USE_WIDE_UPPER_LOWER if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) ! result = wstring_upper(buff); else #endif /* USE_WIDE_UPPER_LOWER */ { char *p; ! result = pstrdup(buff); for (p = result; *p; p++) ! *p = pg_toupper((unsigned char) *p); } return result; } /* ---------- ! * Convert string to lower case. It is designed to be multibyte-aware. * ---------- */ ! static char * ! str_tolower(char *buff) { char *result; --- 1447,1492 ---- #ifdef USE_WIDE_UPPER_LOWER if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) ! { ! wchar_t *workspace; ! int curr_char = 0; ! ! /* Output workspace cannot have more codes than input bytes */ ! workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); ! ! char2wchar(workspace, nbytes + 1, buff, nbytes + 1); ! ! for (curr_char = 0; workspace[curr_char] != 0; curr_char++) ! workspace[curr_char] = towlower(workspace[curr_char]); ! ! /* Make result large enough; case change might change number of bytes */ ! result = palloc(curr_char * MB_CUR_MAX + 1); ! ! wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1); ! pfree(workspace); ! } else #endif /* USE_WIDE_UPPER_LOWER */ { char *p; ! result = pnstrdup(buff, nbytes); for (p = result; *p; p++) ! *p = pg_tolower((unsigned char) *p); } return result; } /* ---------- ! * wide-character-aware upper function ! * We pass the number of bytes so we can pass varlena and char* ! * to this function. * ---------- */ ! char * ! str_toupper(char *buff, size_t nbytes) { char *result; *************** *** 1467,1493 **** #ifdef USE_WIDE_UPPER_LOWER if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) ! result = wstring_lower(buff); else #endif /* USE_WIDE_UPPER_LOWER */ { char *p; ! result = pstrdup(buff); for (p = result; *p; p++) ! *p = pg_tolower((unsigned char) *p); } return result; } ! /* ---------- * wide-character-aware initcap function * ---------- */ ! static char * ! str_initcap(char *buff) { char *result; bool wasalnum = false; --- 1495,1540 ---- #ifdef USE_WIDE_UPPER_LOWER if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) ! { ! wchar_t *workspace; ! int curr_char = 0; ! ! /* Output workspace cannot have more codes than input bytes */ ! workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); ! ! char2wchar(workspace, nbytes + 1, buff, nbytes + 1); ! ! for (curr_char = 0; workspace[curr_char] != 0; curr_char++) ! workspace[curr_char] = towupper(workspace[curr_char]); ! ! /* Make result large enough; case change might change number of bytes */ ! result = palloc(curr_char * MB_CUR_MAX + 1); ! ! wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1); ! pfree(workspace); ! } else #endif /* USE_WIDE_UPPER_LOWER */ { char *p; ! result = pnstrdup(buff, nbytes); for (p = result; *p; p++) ! *p = pg_toupper((unsigned char) *p); } return result; } ! /* ---------- * wide-character-aware initcap function + * We pass the number of bytes so we can pass varlena and char* + * to this function. * ---------- */ ! char * ! str_initcap(char *buff, size_t nbytes) { char *result; bool wasalnum = false; *************** *** 1499,1533 **** if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { wchar_t *workspace; ! text *in_text; ! text *out_text; ! int i; ! in_text = cstring_to_text(buff); ! workspace = texttowcs(in_text); ! for (i = 0; workspace[i] != 0; i++) { if (wasalnum) ! workspace[i] = towlower(workspace[i]); else ! workspace[i] = towupper(workspace[i]); ! wasalnum = iswalnum(workspace[i]); } ! out_text = wcstotext(workspace, i); ! result = text_to_cstring(out_text); pfree(workspace); - pfree(in_text); - pfree(out_text); } else #endif /* USE_WIDE_UPPER_LOWER */ { char *p; ! result = pstrdup(buff); for (p = result; *p; p++) { --- 1546,1579 ---- if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { wchar_t *workspace; ! int curr_char = 0; ! ! /* Output workspace cannot have more codes than input bytes */ ! workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); ! char2wchar(workspace, nbytes + 1, buff, nbytes + 1); ! for (curr_char = 0; workspace[curr_char] != 0; curr_char++) { if (wasalnum) ! workspace[curr_char] = towlower(workspace[curr_char]); else ! workspace[curr_char] = towupper(workspace[curr_char]); ! wasalnum = iswalnum(workspace[curr_char]); } ! /* Make result large enough; case change might change number of bytes */ ! result = palloc(curr_char * MB_CUR_MAX + 1); + wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1); pfree(workspace); } else #endif /* USE_WIDE_UPPER_LOWER */ { char *p; ! result = pnstrdup(buff, nbytes); for (p = result; *p; p++) { *************** *** 1851,1857 **** { char *p = pstrdup(tmtcTzn(in)); ! strcpy(s, str_tolower(p)); pfree(p); s += strlen(s); } --- 1897,1903 ---- { char *p = pstrdup(tmtcTzn(in)); ! strcpy(s, str_tolower(p, strlen(p))); pfree(p); s += strlen(s); } *************** *** 1893,1903 **** if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1])); else { strcpy(workbuff, months_full[tm->tm_mon - 1]); ! sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff)); } s += strlen(s); break; --- 1939,1951 ---- if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1], ! strlen(localized_full_months[tm->tm_mon - 1]))); else { strcpy(workbuff, months_full[tm->tm_mon - 1]); ! sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, ! str_toupper(workbuff, strlen(workbuff))); } s += strlen(s); break; *************** *** 1906,1912 **** if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1])); else sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]); s += strlen(s); --- 1954,1961 ---- if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1], ! strlen(localized_full_months[tm->tm_mon - 1]))); else sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]); s += strlen(s); *************** *** 1916,1922 **** if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1])); else { sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]); --- 1965,1972 ---- if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1], ! strlen(localized_full_months[tm->tm_mon - 1]))); else { sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]); *************** *** 1929,1937 **** if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1])); else ! strcpy(s, str_toupper(months[tm->tm_mon - 1])); s += strlen(s); break; case DCH_Mon: --- 1979,1989 ---- if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1], ! strlen(localized_abbrev_months[tm->tm_mon - 1]))); else ! strcpy(s, str_toupper(months[tm->tm_mon - 1], ! strlen(months[tm->tm_mon - 1]))); s += strlen(s); break; case DCH_Mon: *************** *** 1939,1945 **** if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1])); else strcpy(s, months[tm->tm_mon - 1]); s += strlen(s); --- 1991,1998 ---- if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1], ! strlen(localized_abbrev_months[tm->tm_mon - 1]))); else strcpy(s, months[tm->tm_mon - 1]); s += strlen(s); *************** *** 1949,1955 **** if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1])); else { strcpy(s, months[tm->tm_mon - 1]); --- 2002,2009 ---- if (!tm->tm_mon) break; if (S_TM(n->suffix)) ! strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1], ! strlen(localized_abbrev_months[tm->tm_mon - 1]))); else { strcpy(s, months[tm->tm_mon - 1]); *************** *** 1966,1983 **** case DCH_DAY: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_toupper(localized_full_days[tm->tm_wday])); else { strcpy(workbuff, days[tm->tm_wday]); ! sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff)); } s += strlen(s); break; case DCH_Day: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_initcap(localized_full_days[tm->tm_wday])); else sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]); s += strlen(s); --- 2020,2040 ---- case DCH_DAY: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_toupper(localized_full_days[tm->tm_wday], ! strlen(localized_full_days[tm->tm_wday]))); else { strcpy(workbuff, days[tm->tm_wday]); ! sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, ! str_toupper(workbuff, strlen(workbuff))); } s += strlen(s); break; case DCH_Day: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_initcap(localized_full_days[tm->tm_wday], ! strlen(localized_full_days[tm->tm_wday]))); else sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]); s += strlen(s); *************** *** 1985,1991 **** case DCH_day: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_tolower(localized_full_days[tm->tm_wday])); else { sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]); --- 2042,2049 ---- case DCH_day: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_tolower(localized_full_days[tm->tm_wday], ! strlen(localized_full_days[tm->tm_wday]))); else { sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]); *************** *** 1996,2010 **** case DCH_DY: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday])); else ! strcpy(s, str_toupper(days_short[tm->tm_wday])); s += strlen(s); break; case DCH_Dy: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday])); else strcpy(s, days_short[tm->tm_wday]); s += strlen(s); --- 2054,2071 ---- case DCH_DY: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday], ! strlen(localized_abbrev_days[tm->tm_wday]))); else ! strcpy(s, str_toupper(days_short[tm->tm_wday], ! strlen(days_short[tm->tm_wday]))); s += strlen(s); break; case DCH_Dy: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday], ! strlen(localized_abbrev_days[tm->tm_wday]))); else strcpy(s, days_short[tm->tm_wday]); s += strlen(s); *************** *** 2012,2018 **** case DCH_dy: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday])); else { strcpy(s, days_short[tm->tm_wday]); --- 2073,2080 ---- case DCH_dy: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) ! strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday], ! strlen(localized_abbrev_days[tm->tm_wday]))); else { strcpy(s, days_short[tm->tm_wday]); *************** *** 4277,4288 **** case NUM_rn: if (IS_FILLMODE(Np->Num)) { ! strcpy(Np->inout_p, str_tolower(Np->number_p)); Np->inout_p += strlen(Np->inout_p) - 1; } else { ! sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p)); Np->inout_p += strlen(Np->inout_p) - 1; } break; --- 4339,4352 ---- case NUM_rn: if (IS_FILLMODE(Np->Num)) { ! strcpy(Np->inout_p, str_tolower(Np->number_p, ! strlen(Np->number_p))); Np->inout_p += strlen(Np->inout_p) - 1; } else { ! sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p, ! strlen(Np->number_p))); Np->inout_p += strlen(Np->inout_p) - 1; } break; Index: src/backend/utils/adt/oracle_compat.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v retrieving revision 1.80 diff -c -c -r1.80 oracle_compat.c *** src/backend/utils/adt/oracle_compat.c 17 Jun 2008 16:09:06 -0000 1.80 --- src/backend/utils/adt/oracle_compat.c 23 Jun 2008 19:24:35 -0000 *************** *** 29,320 **** #endif #include "utils/builtins.h" #include "utils/pg_locale.h" #include "mb/pg_wchar.h" - /* - * If the system provides the needed functions for wide-character manipulation - * (which are all standardized by C99), then we implement upper/lower/initcap - * using wide-character functions. Otherwise we use the traditional <ctype.h> - * functions, which of course will not work as desired in multibyte character - * sets. Note that in either case we are effectively assuming that the - * database character encoding matches the encoding implied by LC_CTYPE. - */ - #ifdef USE_WIDE_UPPER_LOWER - char *wstring_lower(char *str); - char *wstring_upper(char *str); - wchar_t *texttowcs(const text *txt); - text *wcstotext(const wchar_t *str, int ncodes); - #endif - static text *dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim); - #ifdef USE_WIDE_UPPER_LOWER - - /* - * Convert a TEXT value into a palloc'd wchar string. - */ - wchar_t * - texttowcs(const text *txt) - { - int nbytes = VARSIZE_ANY_EXHDR(txt); - char *workstr; - wchar_t *result; - size_t ncodes; - - /* Overflow paranoia */ - if (nbytes < 0 || - nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - /* Need a null-terminated version of the input */ - workstr = text_to_cstring(txt); - - /* Output workspace cannot have more codes than input bytes */ - result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - - /* Do the conversion */ - ncodes = mbstowcs(result, workstr, nbytes + 1); - - if (ncodes == (size_t) -1) - { - /* - * Invalid multibyte character encountered. We try to give a useful - * error message by letting pg_verifymbstr check the string. But it's - * possible that the string is OK to us, and not OK to mbstowcs --- - * this suggests that the LC_CTYPE locale is different from the - * database encoding. Give a generic error message if verifymbstr - * can't find anything wrong. - */ - pg_verifymbstr(workstr, nbytes, false); - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"), - errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); - } - - Assert(ncodes <= (size_t) nbytes); - - return result; - } - - - /* - * Convert a wchar string into a palloc'd TEXT value. The wchar string - * must be zero-terminated, but we also require the caller to pass the string - * length, since it will know it anyway in current uses. - */ - text * - wcstotext(const wchar_t *str, int ncodes) - { - text *result; - size_t nbytes; - - /* Overflow paranoia */ - if (ncodes < 0 || - ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - /* Make workspace certainly large enough for result */ - result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ); - - /* Do the conversion */ - nbytes = wcstombs((char *) VARDATA(result), str, - (ncodes + 1) * MB_CUR_MAX); - - if (nbytes == (size_t) -1) - { - /* Invalid multibyte character encountered ... shouldn't happen */ - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"))); - } - - Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX)); - - SET_VARSIZE(result, nbytes + VARHDRSZ); - - return result; - } - #endif /* USE_WIDE_UPPER_LOWER */ - - - /* - * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding. - * To make use of the upper/lower functionality, we need to map UTF8 to - * UTF16, which for some reason mbstowcs and wcstombs won't do for us. - * This conversion layer takes care of it. - */ - - #ifdef WIN32 - - /* texttowcs for the case of UTF8 to UTF16 */ - static wchar_t * - win32_utf8_texttowcs(const text *txt) - { - int nbytes = VARSIZE_ANY_EXHDR(txt); - wchar_t *result; - int r; - - /* Overflow paranoia */ - if (nbytes < 0 || - nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - /* Output workspace cannot have more codes than input bytes */ - result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - - /* stupid Microsloth API does not work for zero-length input */ - if (nbytes == 0) - r = 0; - else - { - /* Do the conversion */ - r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes, - result, nbytes); - - if (r <= 0) /* assume it's NO_UNICODE_TRANSLATION */ - { - /* see notes above about error reporting */ - pg_verifymbstr(VARDATA_ANY(txt), nbytes, false); - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"), - errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); - } - } - - /* Append trailing null wchar (MultiByteToWideChar won't have) */ - Assert(r <= nbytes); - result[r] = 0; - - return result; - } - - /* wcstotext for the case of UTF16 to UTF8 */ - static text * - win32_utf8_wcstotext(const wchar_t *str) - { - text *result; - int nbytes; - int r; - - /* Compute size of output string (this *will* include trailing null) */ - nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); - if (nbytes <= 0) /* shouldn't happen */ - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("UTF-16 to UTF-8 translation failed: %lu", - GetLastError()))); - - result = palloc(nbytes + VARHDRSZ); - - r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes, - NULL, NULL); - if (r != nbytes) /* shouldn't happen */ - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("UTF-16 to UTF-8 translation failed: %lu", - GetLastError()))); - - SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */ - - return result; - } - - /* interface layer to check which encoding is in use */ - - static wchar_t * - win32_texttowcs(const text *txt) - { - if (GetDatabaseEncoding() == PG_UTF8) - return win32_utf8_texttowcs(txt); - else - return texttowcs(txt); - } - - static text * - win32_wcstotext(const wchar_t *str, int ncodes) - { - if (GetDatabaseEncoding() == PG_UTF8) - return win32_utf8_wcstotext(str); - else - return wcstotext(str, ncodes); - } - - /* use macros to cause routines below to call interface layer */ - - #define texttowcs win32_texttowcs - #define wcstotext win32_wcstotext - #endif /* WIN32 */ - - #ifdef USE_WIDE_UPPER_LOWER - /* - * string_upper and string_lower are used for correct multibyte upper/lower - * transformations localized strings. Returns pointers to transformated - * string. - */ - char * - wstring_upper(char *str) - { - wchar_t *workspace; - text *in_text; - text *out_text; - char *result; - int i; - - in_text = cstring_to_text(str); - workspace = texttowcs(in_text); - - for (i = 0; workspace[i] != 0; i++) - workspace[i] = towupper(workspace[i]); - - out_text = wcstotext(workspace, i); - result = text_to_cstring(out_text); - - pfree(workspace); - pfree(in_text); - pfree(out_text); - - return result; - } - - char * - wstring_lower(char *str) - { - wchar_t *workspace; - text *in_text; - text *out_text; - char *result; - int i; - - in_text = cstring_to_text(str); - workspace = texttowcs(in_text); - - for (i = 0; workspace[i] != 0; i++) - workspace[i] = towlower(workspace[i]); - - out_text = wcstotext(workspace, i); - result = text_to_cstring(out_text); - - pfree(workspace); - pfree(in_text); - pfree(out_text); - - return result; - } - #endif /* USE_WIDE_UPPER_LOWER */ - /******************************************************************** * * lower --- 29,44 ---- #endif #include "utils/builtins.h" + #include "utils/formatting.h" #include "utils/pg_locale.h" #include "mb/pg_wchar.h" static text *dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim); /******************************************************************** * * lower *************** *** 332,383 **** Datum lower(PG_FUNCTION_ARGS) { ! #ifdef USE_WIDE_UPPER_LOWER ! ! /* ! * Use wide char code only when max encoding length > 1 and ctype != C. ! * Some operating systems fail with multi-byte encodings and a C locale. ! * Also, for a C locale there is no need to process as multibyte. ! */ ! if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) ! { ! text *string = PG_GETARG_TEXT_PP(0); ! text *result; ! wchar_t *workspace; ! int i; ! ! workspace = texttowcs(string); ! ! for (i = 0; workspace[i] != 0; i++) ! workspace[i] = towlower(workspace[i]); ! result = wcstotext(workspace, i); ! ! pfree(workspace); ! ! PG_RETURN_TEXT_P(result); ! } ! else ! #endif /* USE_WIDE_UPPER_LOWER */ ! { ! text *string = PG_GETARG_TEXT_P_COPY(0); ! char *ptr; ! int m; ! ! /* ! * Since we copied the string, we can scribble directly on the value ! */ ! ptr = VARDATA(string); ! m = VARSIZE(string) - VARHDRSZ; ! ! while (m-- > 0) ! { ! *ptr = tolower((unsigned char) *ptr); ! ptr++; ! } ! ! PG_RETURN_TEXT_P(string); ! } } --- 56,70 ---- Datum lower(PG_FUNCTION_ARGS) { ! text *in_string = PG_GETARG_TEXT_PP(0); ! char *out_string; ! text *result; ! ! out_string = str_tolower(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string)); ! result = cstring_to_text(out_string); ! pfree(out_string); ! PG_RETURN_TEXT_P(result); } *************** *** 398,449 **** Datum upper(PG_FUNCTION_ARGS) { ! #ifdef USE_WIDE_UPPER_LOWER ! /* ! * Use wide char code only when max encoding length > 1 and ctype != C. ! * Some operating systems fail with multi-byte encodings and a C locale. ! * Also, for a C locale there is no need to process as multibyte. ! */ ! if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) ! { ! text *string = PG_GETARG_TEXT_PP(0); ! text *result; ! wchar_t *workspace; ! int i; ! ! workspace = texttowcs(string); ! ! for (i = 0; workspace[i] != 0; i++) ! workspace[i] = towupper(workspace[i]); ! ! result = wcstotext(workspace, i); ! ! pfree(workspace); ! ! PG_RETURN_TEXT_P(result); ! } ! else ! #endif /* USE_WIDE_UPPER_LOWER */ ! { ! text *string = PG_GETARG_TEXT_P_COPY(0); ! char *ptr; ! int m; ! ! /* ! * Since we copied the string, we can scribble directly on the value ! */ ! ptr = VARDATA(string); ! m = VARSIZE(string) - VARHDRSZ; ! ! while (m-- > 0) ! { ! *ptr = toupper((unsigned char) *ptr); ! ptr++; ! } ! ! PG_RETURN_TEXT_P(string); ! } } --- 85,99 ---- Datum upper(PG_FUNCTION_ARGS) { ! text *in_string = PG_GETARG_TEXT_PP(0); ! char *out_string; ! text *result; ! ! out_string = str_toupper(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string)); ! result = cstring_to_text(out_string); ! pfree(out_string); ! PG_RETURN_TEXT_P(result); } *************** *** 467,530 **** Datum initcap(PG_FUNCTION_ARGS) { ! #ifdef USE_WIDE_UPPER_LOWER ! /* ! * Use wide char code only when max encoding length > 1 and ctype != C. ! * Some operating systems fail with multi-byte encodings and a C locale. ! * Also, for a C locale there is no need to process as multibyte. ! */ ! if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) ! { ! text *string = PG_GETARG_TEXT_PP(0); ! text *result; ! wchar_t *workspace; ! int wasalnum = 0; ! int i; ! ! workspace = texttowcs(string); ! ! for (i = 0; workspace[i] != 0; i++) ! { ! if (wasalnum) ! workspace[i] = towlower(workspace[i]); ! else ! workspace[i] = towupper(workspace[i]); ! wasalnum = iswalnum(workspace[i]); ! } ! ! result = wcstotext(workspace, i); ! ! pfree(workspace); ! ! PG_RETURN_TEXT_P(result); ! } ! else ! #endif /* USE_WIDE_UPPER_LOWER */ ! { ! text *string = PG_GETARG_TEXT_P_COPY(0); ! int wasalnum = 0; ! char *ptr; ! int m; ! ! /* ! * Since we copied the string, we can scribble directly on the value ! */ ! ptr = VARDATA(string); ! m = VARSIZE(string) - VARHDRSZ; ! ! while (m-- > 0) ! { ! if (wasalnum) ! *ptr = tolower((unsigned char) *ptr); ! else ! *ptr = toupper((unsigned char) *ptr); ! wasalnum = isalnum((unsigned char) *ptr); ! ptr++; ! } ! ! PG_RETURN_TEXT_P(string); ! } } --- 117,131 ---- Datum initcap(PG_FUNCTION_ARGS) { ! text *in_string = PG_GETARG_TEXT_PP(0); ! char *out_string; ! text *result; ! ! out_string = str_initcap(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string)); ! result = cstring_to_text(out_string); ! pfree(out_string); ! PG_RETURN_TEXT_P(result); } Index: src/include/utils/formatting.h =================================================================== RCS file: /cvsroot/pgsql/src/include/utils/formatting.h,v retrieving revision 1.18 diff -c -c -r1.18 formatting.h *** src/include/utils/formatting.h 1 Jan 2008 19:45:59 -0000 1.18 --- src/include/utils/formatting.h 23 Jun 2008 19:24:36 -0000 *************** *** 21,26 **** --- 21,30 ---- #include "fmgr.h" + extern char *str_tolower(char *buff, size_t nbytes); + extern char *str_toupper(char *buff, size_t nbytes); + extern char *str_initcap(char *buff, size_t nbytes); + extern Datum timestamp_to_char(PG_FUNCTION_ARGS); extern Datum timestamptz_to_char(PG_FUNCTION_ARGS); extern Datum interval_to_char(PG_FUNCTION_ARGS);
-- Sent via pgsql-patches mailing list (pgsql-patches@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches