> Sorry for delay, I was on holidays :) > > Did you test patch on Windows platform?
No. I myself does not use Windows platform. Do you have any concern on Windows regarding my patches? -- Tatsuo Ishii SRA OSS, Inc. Japan > Tatsuo Ishii wrote: > > I have tested with local-enabled environment and found a bug. Included > > is the new version of patches. > > > > Teodor, Oleg, what do you think about these patches? > > If ok, shall I commit to CVS head? > > -- > > Tatsuo Ishii > > SRA OSS, Inc. Japan > > > >> Hi, > >> > >> Here are patches against tsearch2 with CVS head. Currently tsearch2 > >> does not work with multibyte encoding which uses C locale. These > >> patches are intended to solve the problem by using PostgreSQL in-house > >> multibyte function instead of mbstowcs which does not work with C > >> locale. Also iswalpha etc. will not be called in case of C locale > >> since they are not working with it. Tested with the EUC_JP encoding > >> (should be working with any multibye encodings). Existing single byte > >> encodings should not be broken by the patches, I did not test though. > >> -- > >> Tatsuo Ishii > >> SRA OSS, Inc. Japan > >> > >> ------------------------------------------------------------------------ > >> > >> Index: ts_locale.c > >> =================================================================== > >> RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.c,v > >> retrieving revision 1.7 > >> diff -c -r1.7 ts_locale.c > >> *** ts_locale.c 20 Nov 2006 14:03:30 -0000 1.7 > >> --- ts_locale.c 4 Jan 2007 12:16:00 -0000 > >> *************** > >> *** 63,68 **** > >> --- 63,101 ---- > >> > >> return mbstowcs(to, from, len); > >> } > >> + > >> + #else /* WIN32 */ > >> + > >> + size_t > >> + char2wchar(wchar_t *to, const char *from, size_t len) > >> + { > >> + wchar_t *result; > >> + size_t n; > >> + > >> + if (to == NULL) > >> + return 0; > >> + > >> + if (lc_ctype_is_c()) > >> + { > >> + /* allocate neccesary memory for "to" including NULL terminate > >> */ > >> + result = (wchar_t *)palloc((len+1)*sizeof(wchar_t)); > >> + > >> + /* do the conversion */ > >> + n = (size_t)pg_mb2wchar_with_len(from, (pg_wchar *)result, len); > >> + if (n > 0) > >> + { > >> + /* store the result */ > >> + if (n > len) > >> + n = len; > >> + memcpy(to, result, n*sizeof(wchar_t)); > >> + pfree(result); > >> + *(to + n) = '\0'; > >> + } > >> + return n; > >> + } > >> + return mbstowcs(to, from, len); > >> + } > >> + > >> #endif /* WIN32 */ > >> > >> int > >> *************** > >> *** 70,75 **** > >> --- 103,113 ---- > >> { > >> wchar_t character; > >> > >> + if (lc_ctype_is_c()) > >> + { > >> + return isalpha(TOUCHAR(ptr)); > >> + } > >> + > >> char2wchar(&character, ptr, 1); > >> > >> return iswalpha((wint_t) character); > >> *************** > >> *** 80,85 **** > >> --- 118,128 ---- > >> { > >> wchar_t character; > >> > >> + if (lc_ctype_is_c()) > >> + { > >> + return isprint(TOUCHAR(ptr)); > >> + } > >> + > >> char2wchar(&character, ptr, 1); > >> > >> return iswprint((wint_t) character); > >> *************** > >> *** 126,132 **** > >> if ( wlen < 0 ) > >> ereport(ERROR, > >> > >> (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), > >> ! errmsg("transalation failed from > >> server encoding to wchar_t"))); > >> > >> Assert(wlen<=len); > >> wstr[wlen] = 0; > >> --- 169,175 ---- > >> if ( wlen < 0 ) > >> ereport(ERROR, > >> > >> (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), > >> ! errmsg("translation failed from server > >> encoding to wchar_t"))); > >> > >> Assert(wlen<=len); > >> wstr[wlen] = 0; > >> *************** > >> *** 152,158 **** > >> if ( wlen < 0 ) > >> ereport(ERROR, > >> > >> (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), > >> ! errmsg("transalation failed from > >> wchar_t to server encoding %d", errno))); > >> Assert(wlen<=len); > >> out[wlen]='\0'; > >> } > >> --- 195,201 ---- > >> if ( wlen < 0 ) > >> ereport(ERROR, > >> > >> (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), > >> ! errmsg("translation failed from > >> wchar_t to server encoding %d", errno))); > >> Assert(wlen<=len); > >> out[wlen]='\0'; > >> } > >> Index: ts_locale.h > >> =================================================================== > >> RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.h,v > >> retrieving revision 1.7 > >> diff -c -r1.7 ts_locale.h > >> *** ts_locale.h 4 Oct 2006 00:29:47 -0000 1.7 > >> --- ts_locale.h 4 Jan 2007 12:16:00 -0000 > >> *************** > >> *** 38,45 **** > >> #else /* WIN32 */ > >> > >> /* correct mbstowcs */ > >> - #define char2wchar mbstowcs > >> #define wchar2char wcstombs > >> #endif /* WIN32 */ > >> > >> #define t_isdigit(x) ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) ) > >> --- 38,46 ---- > >> #else /* WIN32 */ > >> > >> /* correct mbstowcs */ > >> #define wchar2char wcstombs > >> + size_t char2wchar(wchar_t *to, const char *from, size_t len); > >> + > >> #endif /* WIN32 */ > >> > >> #define t_isdigit(x) ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) ) > >> *************** > >> *** 54,59 **** > >> --- 55,61 ---- > >> * t_iseq() should be called only for ASCII symbols > >> */ > >> #define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned > >> char)(c)) ) : false ) > >> + /*#define t_iseq(x,c) ( TOUCHAR(x) == ((unsigned char)(c)))*/ > >> > >> #define COPYCHAR(d,s) do { \ > >> int lll = pg_mblen( s ); \ > >> Index: wordparser/parser.c > >> =================================================================== > >> RCS file: /cvsroot/pgsql/contrib/tsearch2/wordparser/parser.c,v > >> retrieving revision 1.11 > >> diff -c -r1.11 parser.c > >> *** wordparser/parser.c 4 Oct 2006 00:29:47 -0000 1.11 > >> --- wordparser/parser.c 4 Jan 2007 12:16:01 -0000 > >> *************** > >> *** 44,52 **** > >> * Some operating systems fail with multi-byte encodings and a C locale. > >> * Also, for a C locale there is no need to process as multibyte. From > >> * backend/utils/adt/oracle_compat.c Teodor > >> */ > >> > >> ! if (prs->charmaxlen > 1 && !lc_ctype_is_c()) > >> { > >> prs->usewide = true; > >> prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs->lenstr); > >> --- 44,54 ---- > >> * Some operating systems fail with multi-byte encodings and a C locale. > >> * Also, for a C locale there is no need to process as multibyte. From > >> * backend/utils/adt/oracle_compat.c Teodor > >> + * > >> + * This is wrong assumption. even if locale is C, multibyte is necceary. > >> */ > >> > >> ! if (prs->charmaxlen > 1) > >> { > >> prs->usewide = true; > >> prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs->lenstr); > >> *************** > >> *** 92,98 **** > >> static int > >> \ > >> p_is##type(TParser *prs) { > >> \ > >> Assert( prs->state ); > >> \ > >> ! return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + > >> prs->state->poschar ) ) : \ > >> is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) > >> ); \ > >> } \ > >> > >> \ > >> --- 94,102 ---- > >> static int > >> \ > >> p_is##type(TParser *prs) { > >> \ > >> Assert( prs->state ); > >> \ > >> ! return ( ( prs->usewide ) ? \ > >> ! (lc_ctype_is_c()? is##type( 0xff & *( prs->wstr + > >> prs->state->poschar)): \ > >> ! isw##type( (wint_t)*( prs->wstr + > >> prs->state->poschar))): \ > >> is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) > >> ); \ > >> } \ > >> > >> \ > >> *************** > >> *** 134,141 **** > >> } > >> #endif /* TS_USE_WIDE */ > >> > >> ! p_iswhat(alnum) > >> ! p_iswhat(alpha) > >> p_iswhat(digit) > >> p_iswhat(lower) > >> p_iswhat(print) > >> --- 138,197 ---- > >> } > >> #endif /* TS_USE_WIDE */ > >> > >> ! static int p_isalnum(TParser *prs) { > >> ! Assert( prs->state ); > >> ! > >> ! if (prs->usewide) > >> ! { > >> ! unsigned int c; > >> ! > >> ! c = *(prs->wstr + prs->state->poschar); > >> ! > >> ! if (lc_ctype_is_c()) > >> ! { > >> ! if (c > 0x7f) > >> ! return 1; > >> ! return isalnum(0xff & c); > >> ! } > >> ! else > >> ! return iswalnum( (wint_t)*( prs->wstr + > >> prs->state->poschar)); > >> ! } > >> ! else > >> ! return isalnum( (unsigned char)*( prs->str + > >> prs->state->posbyte )); > >> ! } > >> ! > >> ! static int p_isnotalnum(TParser *prs) > >> ! { > >> ! return !p_isalnum(prs); > >> ! } > >> ! > >> ! static int p_isalpha(TParser *prs) { > >> ! Assert( prs->state ); > >> ! > >> ! if (prs->usewide) > >> ! { > >> ! unsigned int c; > >> ! > >> ! c = *(prs->wstr + prs->state->poschar); > >> ! > >> ! if (lc_ctype_is_c()) > >> ! { > >> ! if (c > 0x7f) > >> ! return 1; > >> ! return isalpha(0xff & c); > >> ! } > >> ! else > >> ! return iswalpha( (wint_t)*( prs->wstr + > >> prs->state->poschar)); > >> ! } > >> ! else > >> ! return isalpha( (unsigned char)*( prs->str + > >> prs->state->posbyte )); > >> ! } > >> ! > >> ! static int p_isnotalpha(TParser *prs) > >> ! { > >> ! return !p_isalpha(prs); > >> ! } > >> ! > >> p_iswhat(digit) > >> p_iswhat(lower) > >> p_iswhat(print) > >> > >> ------------------------------------------------------------------------ > >> > >> > >> ---------------------------(end of broadcast)--------------------------- > >> TIP 9: In versions below 8.0, the planner will ignore your desire to > >> choose an index scan if your joining column's datatypes do not > >> match > > -- > Teodor Sigaev E-mail: [EMAIL PROTECTED] > WWW: http://www.sigaev.ru/ > ---------------------------(end of broadcast)--------------------------- TIP 4: Have you searched our list archives? http://archives.postgresql.org