Re: [HACKERS] Request for review: tsearch2 patch

2007-01-12 Thread Teodor Sigaev

Yeah, it's a workaround. Since there's no concept other than
alpha/numeric/latin in tsearch2, Asian characters have to be fall in
one of them.


Ok, I see.

Pls, test attached patch - if it is good then I'll commit it at Monday to HEAD 
and 8.2 branches.


PS. Magnus, may I ask you to test under Windows? Thank you.

--
Teodor Sigaev   E-mail: [EMAIL PROTECTED]
   WWW: http://www.sigaev.ru/
diff -c -r -N ../tsearch2.orig/ts_locale.c ./ts_locale.c
*** ../tsearch2.orig/ts_locale.cFri Jan 12 10:53:11 2007
--- ./ts_locale.c   Fri Jan 12 18:10:27 2007
***
*** 12,24 
  size_t
  wchar2char(char *to, const wchar_t *from, size_t len)
  {
if (GetDatabaseEncoding() == PG_UTF8)
{
int r;
  
-   if (len == 0)
-   return 0;
- 
r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len,
NULL, NULL);
  
--- 12,24 
  size_t
  wchar2char(char *to, const wchar_t *from, size_t len)
  {
+   if (len == 0)
+   return 0;
+ 
if (GetDatabaseEncoding() == PG_UTF8)
{
int r;
  
r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len,
NULL, NULL);
  
***
*** 34,50 
  
return wcstombs(to, from, len);
  }
  
  size_t
  char2wchar(wchar_t *to, const char *from, size_t len)
  {
if (GetDatabaseEncoding() == PG_UTF8)
{
int r;
  
-   if (len == 0)
-   return 0;
- 
r = MultiByteToWideChar(CP_UTF8, 0, from, len, to, len);
  
if (!r)
--- 34,52 
  
return wcstombs(to, from, len);
  }
+ #endif   /* WIN32 */
  
  size_t
  char2wchar(wchar_t *to, const char *from, size_t len)
  {
+   if (len == 0)
+   return 0;
+ 
+ #ifdef WIN32
if (GetDatabaseEncoding() == PG_UTF8)
{
int r;
  
r = MultiByteToWideChar(CP_UTF8, 0, from, len, to, len);
  
if (!r)
***
*** 60,88 
  
return r;
}
  
return mbstowcs(to, from, len);
  }
- #endif   /* WIN32 */
  
  int
  _t_isalpha(const char *ptr)
  {
!   wchar_t character;
  
!   char2wchar(character, ptr, 1);
  
!   return iswalpha((wint_t) character);
  }
  
  int
  _t_isprint(const char *ptr)
  {
!   wchar_t character;
  
!   char2wchar(character, ptr, 1);
  
!   return iswprint((wint_t) character);
  }
  #endif   /* TS_USE_WIDE */
  
--- 62,105 
  
return r;
}
+   else 
+ #endif /* WIN32 */
+   if ( lc_ctype_is_c() )
+   {
+   /*
+* pg_mb2wchar_with_len always adds trailing '\0', so 
+* 'to' should be allocated with sufficient space 
+*/
+   return pg_mb2wchar_with_len(from, (pg_wchar *)to, len);
+   }
  
return mbstowcs(to, from, len);
  }
  
  int
  _t_isalpha(const char *ptr)
  {
!   wchar_t character[2];
! 
!   if (lc_ctype_is_c())
!   return isalpha(TOUCHAR(ptr));
  
!   char2wchar(character, ptr, 1);
  
!   return iswalpha((wint_t) *character);
  }
  
  int
  _t_isprint(const char *ptr)
  {
!   wchar_t character[2];
! 
!   if (lc_ctype_is_c())
!   return isprint(TOUCHAR(ptr));
  
!   char2wchar(character, ptr, 1);
  
!   return iswprint((wint_t) *character);
  }
  #endif   /* TS_USE_WIDE */
  
***
*** 126,132 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(transalation failed from 
server encoding to wchar_t)));
  
Assert(wlen=len);
wstr[wlen] = 0;
--- 143,149 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(translation failed from server 
encoding to wchar_t)));
  
Assert(wlen=len);
wstr[wlen] = 0;
***
*** 152,158 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(transalation failed from 
wchar_t to server encoding %d, errno)));
Assert(wlen=len);
out[wlen]='\0';
}
--- 169,175 
if ( wlen  0 )
ereport(ERROR,
   

Re: [HACKERS] Request for review: tsearch2 patch

2007-01-12 Thread Tatsuo Ishii
  Yeah, it's a workaround. Since there's no concept other than
  alpha/numeric/latin in tsearch2, Asian characters have to be fall in
  one of them.
 
 Ok, I see.
 
 Pls, test attached patch - if it is good then I'll commit it at Monday to 
 HEAD 
 and 8.2 branches.

I have tested on a Linux box running PostgreSQL 8.2.1 (C locale,
EUC_JP encoding), and it worked great!

BTW, is your patch supposed to work with PostgreSQL 8.1?
--
Tatsuo Ishii
SRA OSS, Inc. Japan

 PS. Magnus, may I ask you to test under Windows? Thank you.
 
 -- 
 Teodor Sigaev   E-mail: [EMAIL PROTECTED]
 WWW: http://www.sigaev.ru/

---(end of broadcast)---
TIP 7: You can help support the PostgreSQL project by donating at

http://www.postgresql.org/about/donate


Re: [HACKERS] Request for review: tsearch2 patch

2007-01-10 Thread Teodor Sigaev

Sorry for delay, I was on holidays :)

Did you test patch on Windows platform?

Tatsuo Ishii wrote:

I have tested with local-enabled environment and found a bug. Included
is the new version of patches. 


Teodor, Oleg, what do you think about these patches?
If ok, shall I commit to CVS head?
--
Tatsuo Ishii
SRA OSS, Inc. Japan


Hi,

Here are patches against tsearch2 with CVS head.  Currently tsearch2
does not work with multibyte encoding which uses C locale. These
patches are intended to solve the problem by using PostgreSQL in-house
multibyte function instead of mbstowcs which does not work with C
locale. Also iswalpha etc. will not be called in case of C locale
since they are not working with it. Tested with the EUC_JP encoding
(should be working with any multibye encodings). Existing single byte
encodings should not be broken by the patches, I did not test though.
--
Tatsuo Ishii
SRA OSS, Inc. Japan



Index: ts_locale.c
===
RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.c,v
retrieving revision 1.7
diff -c -r1.7 ts_locale.c
*** ts_locale.c 20 Nov 2006 14:03:30 -  1.7
--- ts_locale.c 4 Jan 2007 12:16:00 -
***
*** 63,68 
--- 63,101 
  
  	return mbstowcs(to, from, len);

  }
+ 
+ #else	/* WIN32 */
+ 
+ size_t

+ char2wchar(wchar_t *to, const char *from, size_t len)
+ {
+   wchar_t *result;
+   size_t n;
+ 
+ 	if (to == NULL)

+   return 0;
+ 
+ 	if (lc_ctype_is_c())

+   {
+   /* allocate neccesary memory for to including NULL terminate 
*/
+   result = (wchar_t *)palloc((len+1)*sizeof(wchar_t));
+ 
+ 		/* do the conversion */

+   n = (size_t)pg_mb2wchar_with_len(from, (pg_wchar *)result, len);
+   if (n  0)
+   {
+   /* store the result */
+   if (n  len)
+   n = len;
+   memcpy(to, result, n*sizeof(wchar_t));
+   pfree(result);
+   *(to + n) = '\0';
+   }
+   return n;
+   }
+   return mbstowcs(to, from, len);
+ }
+ 
  #endif   /* WIN32 */
  
  int

***
*** 70,75 
--- 103,113 
  {
wchar_t character;
  
+ 	if (lc_ctype_is_c())

+   {
+   return isalpha(TOUCHAR(ptr));
+   }
+ 
  	char2wchar(character, ptr, 1);
  
  	return iswalpha((wint_t) character);

***
*** 80,85 
--- 118,128 
  {
wchar_t character;
  
+ 	if (lc_ctype_is_c())

+   {
+   return isprint(TOUCHAR(ptr));
+   }
+ 
  	char2wchar(character, ptr, 1);
  
  	return iswprint((wint_t) character);

***
*** 126,132 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(transalation failed from server 
encoding to wchar_t)));
  
  		Assert(wlen=len);

wstr[wlen] = 0;
--- 169,175 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(translation failed from server 
encoding to wchar_t)));
  
  		Assert(wlen=len);

wstr[wlen] = 0;
***
*** 152,158 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(transalation failed from wchar_t to 
server encoding %d, errno)));
Assert(wlen=len);
out[wlen]='\0';
}
--- 195,201 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(translation failed from wchar_t to 
server encoding %d, errno)));
Assert(wlen=len);
out[wlen]='\0';
}
Index: ts_locale.h
===
RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.h,v
retrieving revision 1.7
diff -c -r1.7 ts_locale.h
*** ts_locale.h 4 Oct 2006 00:29:47 -   1.7
--- ts_locale.h 4 Jan 2007 12:16:00 -
***
*** 38,45 
  #else /* WIN32 */
  
  /* correct mbstowcs */

- #define char2wchar mbstowcs
  #define wchar2char wcstombs
  #endif   /* WIN32 */
  
  #define t_isdigit(x)	( pg_mblen(x)==1  isdigit( TOUCHAR(x) ) )

--- 38,46 
  #else /* WIN32 */
  
  /* correct mbstowcs */

  #define wchar2char wcstombs
+ size_t  

Re: [HACKERS] Request for review: tsearch2 patch

2007-01-10 Thread Tatsuo Ishii
 Sorry for delay, I was on holidays :)
 
 Did you test patch on Windows platform?

No. I myself does not use Windows platform.

Do you have any concern on Windows regarding my patches?
--
Tatsuo Ishii
SRA OSS, Inc. Japan

 Tatsuo Ishii wrote:
  I have tested with local-enabled environment and found a bug. Included
  is the new version of patches. 
  
  Teodor, Oleg, what do you think about these patches?
  If ok, shall I commit to CVS head?
  --
  Tatsuo Ishii
  SRA OSS, Inc. Japan
  
  Hi,
 
  Here are patches against tsearch2 with CVS head.  Currently tsearch2
  does not work with multibyte encoding which uses C locale. These
  patches are intended to solve the problem by using PostgreSQL in-house
  multibyte function instead of mbstowcs which does not work with C
  locale. Also iswalpha etc. will not be called in case of C locale
  since they are not working with it. Tested with the EUC_JP encoding
  (should be working with any multibye encodings). Existing single byte
  encodings should not be broken by the patches, I did not test though.
  --
  Tatsuo Ishii
  SRA OSS, Inc. Japan
 
  
 
  Index: ts_locale.c
  ===
  RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.c,v
  retrieving revision 1.7
  diff -c -r1.7 ts_locale.c
  *** ts_locale.c20 Nov 2006 14:03:30 -  1.7
  --- ts_locale.c4 Jan 2007 12:16:00 -
  ***
  *** 63,68 
  --- 63,101 

 return mbstowcs(to, from, len);
}
  + 
  + #else/* WIN32 */
  + 
  + size_t
  + char2wchar(wchar_t *to, const char *from, size_t len)
  + {
  +  wchar_t *result;
  +  size_t n;
  + 
  +  if (to == NULL)
  +  return 0;
  + 
  +  if (lc_ctype_is_c())
  +  {
  +  /* allocate neccesary memory for to including NULL terminate 
  */
  +  result = (wchar_t *)palloc((len+1)*sizeof(wchar_t));
  + 
  +  /* do the conversion */
  +  n = (size_t)pg_mb2wchar_with_len(from, (pg_wchar *)result, len);
  +  if (n  0)
  +  {
  +  /* store the result */
  +  if (n  len)
  +  n = len;
  +  memcpy(to, result, n*sizeof(wchar_t));
  +  pfree(result);
  +  *(to + n) = '\0';
  +  }
  +  return n;
  +  }
  +  return mbstowcs(to, from, len);
  + }
  + 
#endif   /* WIN32 */

int
  ***
  *** 70,75 
  --- 103,113 
{
 wchar_t character;

  +  if (lc_ctype_is_c())
  +  {
  +  return isalpha(TOUCHAR(ptr));
  +  }
  + 
 char2wchar(character, ptr, 1);

 return iswalpha((wint_t) character);
  ***
  *** 80,85 
  --- 118,128 
{
 wchar_t character;

  +  if (lc_ctype_is_c())
  +  {
  +  return isprint(TOUCHAR(ptr));
  +  }
  + 
 char2wchar(character, ptr, 1);

 return iswprint((wint_t) character);
  ***
  *** 126,132 
 if ( wlen  0 )
 ereport(ERROR,
 
  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
  !   errmsg(transalation failed from 
  server encoding to wchar_t)));

 Assert(wlen=len);
 wstr[wlen] = 0;
  --- 169,175 
 if ( wlen  0 )
 ereport(ERROR,
 
  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
  !   errmsg(translation failed from server 
  encoding to wchar_t)));

 Assert(wlen=len);
 wstr[wlen] = 0;
  ***
  *** 152,158 
 if ( wlen  0 )
 ereport(ERROR,
 
  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
  !   errmsg(transalation failed from 
  wchar_t to server encoding %d, errno)));
 Assert(wlen=len);
 out[wlen]='\0';
 }
  --- 195,201 
 if ( wlen  0 )
 ereport(ERROR,
 
  (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
  !   errmsg(translation failed from 
  wchar_t to server encoding %d, errno)));
 Assert(wlen=len);
 out[wlen]='\0';
 }
  Index: ts_locale.h
  ===
  RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.h,v
  retrieving revision 1.7
  diff -c -r1.7 ts_locale.h
  *** ts_locale.h4 Oct 2006 00:29:47 -   1.7
  --- ts_locale.h4 Jan 2007 12:16:00 -
  ***
  *** 38,45 
#else/* WIN32 */

/* correct mbstowcs */
  - #define char2wchar mbstowcs
#define wchar2char wcstombs
#endif   /* WIN32 */

 

Re: [HACKERS] Request for review: tsearch2 patch

2007-01-10 Thread Tatsuo Ishii
From: Teodor Sigaev [EMAIL PROTECTED]
Subject: Re: [HACKERS] Request for review: tsearch2 patch
Date: Wed, 10 Jan 2007 18:50:44 +0300
Message-ID: [EMAIL PROTECTED]

  I have tested with local-enabled environment and found a bug. Included
  is the new version of patches. 
 Your patch causes crash on tsearch2's installcheck with 'initdb -E UTF8 
 --locale 
 C', simple way to reproduce:
 # select to_tsquery('default', '''New York''');
 server closed the connection unexpectedly
  This probably means the server terminated abnormally
  before or while processing the request.
 The connection to the server was lost. Attempting reset: Failed.

It seems it's a bug with original tsearch2. Here is the patches.

--
*** wordparser/parser.c~2007-01-07 09:54:39.0 +0900
--- wordparser/parser.c 2007-01-11 10:33:41.0 +0900
***
*** 51,57 
if (prs-charmaxlen  1)
{
prs-usewide = true;
!   prs-wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs-lenstr);
prs-lenwstr = char2wchar(prs-wstr, prs-str, prs-lenstr);
}
else
--- 51,57 
if (prs-charmaxlen  1)
{
prs-usewide = true;
!   prs-wstr = (wchar_t *) palloc(sizeof(wchar_t) * 
(prs-lenstr+1));
prs-lenwstr = char2wchar(prs-wstr, prs-str, prs-lenstr);
}
else
--

  ! static int p_isalnum(TParser *prs) {
 ...
  !  if (lc_ctype_is_c())
  !  {
  !  if (c  0x7f)
  !  return 1;
 
 I have some some doubts that any character greater than 0x7f is an alpha 
 symbol. 
 Is it simple assumption or workaround?

Yeah, it's a workaround. Since there's no concept other than
alpha/numeric/latin in tsearch2, Asian characters have to be fall in
one of them.
--
Tatsuo Ishii
SRA OSS, Inc. Japan

---(end of broadcast)---
TIP 3: Have you checked our extensive FAQ?

   http://www.postgresql.org/docs/faq


Re: [HACKERS] Request for review: tsearch2 patch

2007-01-10 Thread Tatsuo Ishii
  I have tested with local-enabled environment and found a bug. Included
  is the new version of patches. 
 Your patch causes crash on tsearch2's installcheck with 'initdb -E UTF8 
 --locale 
 C', simple way to reproduce:
 # select to_tsquery('default', '''New York''');
 server closed the connection unexpectedly
  This probably means the server terminated abnormally
  before or while processing the request.
 The connection to the server was lost. Attempting reset: Failed.

It seems it's a bug with original tsearch2. Here is the patches.

--
*** wordparser/parser.c~2007-01-07 09:54:39.0 +0900
--- wordparser/parser.c 2007-01-11 10:33:41.0 +0900
***
*** 51,57 
if (prs-charmaxlen  1)
{
prs-usewide = true;
!   prs-wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs-lenstr);
prs-lenwstr = char2wchar(prs-wstr, prs-str, prs-lenstr);
}
else
--- 51,57 
if (prs-charmaxlen  1)
{
prs-usewide = true;
!   prs-wstr = (wchar_t *) palloc(sizeof(wchar_t) * 
(prs-lenstr+1));
prs-lenwstr = char2wchar(prs-wstr, prs-str, prs-lenstr);
}
else
--

  ! static int p_isalnum(TParser *prs) {
 ...
  !  if (lc_ctype_is_c())
  !  {
  !  if (c  0x7f)
  !  return 1;
 
 I have some some doubts that any character greater than 0x7f is an alpha 
 symbol. 
 Is it simple assumption or workaround?

Yeah, it's a workaround. Since there's no concept other than
alpha/numeric/latin in tsearch2, Asian characters have to be fall in
one of them.
--
Tatsuo Ishii
SRA OSS, Inc. Japan

---(end of broadcast)---
TIP 6: explain analyze is your friend


Re: [HACKERS] Request for review: tsearch2 patch

2007-01-04 Thread Tatsuo Ishii
I have tested with local-enabled environment and found a bug. Included
is the new version of patches. 

Teodor, Oleg, what do you think about these patches?
If ok, shall I commit to CVS head?
--
Tatsuo Ishii
SRA OSS, Inc. Japan

 Hi,
 
 Here are patches against tsearch2 with CVS head.  Currently tsearch2
 does not work with multibyte encoding which uses C locale. These
 patches are intended to solve the problem by using PostgreSQL in-house
 multibyte function instead of mbstowcs which does not work with C
 locale. Also iswalpha etc. will not be called in case of C locale
 since they are not working with it. Tested with the EUC_JP encoding
 (should be working with any multibye encodings). Existing single byte
 encodings should not be broken by the patches, I did not test though.
 --
 Tatsuo Ishii
 SRA OSS, Inc. Japan
Index: ts_locale.c
===
RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.c,v
retrieving revision 1.7
diff -c -r1.7 ts_locale.c
*** ts_locale.c 20 Nov 2006 14:03:30 -  1.7
--- ts_locale.c 4 Jan 2007 12:16:00 -
***
*** 63,68 
--- 63,101 
  
return mbstowcs(to, from, len);
  }
+ 
+ #else /* WIN32 */
+ 
+ size_t
+ char2wchar(wchar_t *to, const char *from, size_t len)
+ {
+   wchar_t *result;
+   size_t n;
+ 
+   if (to == NULL)
+   return 0;
+ 
+   if (lc_ctype_is_c())
+   {
+   /* allocate neccesary memory for to including NULL terminate 
*/
+   result = (wchar_t *)palloc((len+1)*sizeof(wchar_t));
+ 
+   /* do the conversion */
+   n = (size_t)pg_mb2wchar_with_len(from, (pg_wchar *)result, len);
+   if (n  0)
+   {
+   /* store the result */
+   if (n  len)
+   n = len;
+   memcpy(to, result, n*sizeof(wchar_t));
+   pfree(result);
+   *(to + n) = '\0';
+   }
+   return n;
+   }
+   return mbstowcs(to, from, len);
+ }
+ 
  #endif   /* WIN32 */
  
  int
***
*** 70,75 
--- 103,113 
  {
wchar_t character;
  
+   if (lc_ctype_is_c())
+   {
+   return isalpha(TOUCHAR(ptr));
+   }
+ 
char2wchar(character, ptr, 1);
  
return iswalpha((wint_t) character);
***
*** 80,85 
--- 118,128 
  {
wchar_t character;
  
+   if (lc_ctype_is_c())
+   {
+   return isprint(TOUCHAR(ptr));
+   }
+ 
char2wchar(character, ptr, 1);
  
return iswprint((wint_t) character);
***
*** 126,132 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(transalation failed from 
server encoding to wchar_t)));
  
Assert(wlen=len);
wstr[wlen] = 0;
--- 169,175 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(translation failed from server 
encoding to wchar_t)));
  
Assert(wlen=len);
wstr[wlen] = 0;
***
*** 152,158 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(transalation failed from 
wchar_t to server encoding %d, errno)));
Assert(wlen=len);
out[wlen]='\0';
}
--- 195,201 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(translation failed from 
wchar_t to server encoding %d, errno)));
Assert(wlen=len);
out[wlen]='\0';
}
Index: ts_locale.h
===
RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.h,v
retrieving revision 1.7
diff -c -r1.7 ts_locale.h
*** ts_locale.h 4 Oct 2006 00:29:47 -   1.7
--- ts_locale.h 4 Jan 2007 12:16:00 -
***
*** 38,45 
  #else /* WIN32 */
  
  /* correct mbstowcs */
- #define char2wchar mbstowcs
  #define wchar2char wcstombs
  #endif   /* WIN32 */
  
  #define t_isdigit(x)  ( pg_mblen(x)==1  isdigit( TOUCHAR(x) ) )
--- 38,46 
  #else /* WIN32 */
  
  /* correct mbstowcs */
  #define wchar2char wcstombs
+ size_tchar2wchar(wchar_t *to, const char *from, size_t len);
+ 
  #endif   /* WIN32 */
  
  #define 

[HACKERS] Request for review: tsearch2 patch

2007-01-01 Thread Tatsuo Ishii
Hi,

Here are patches against tsearch2 with CVS head.  Currently tsearch2
does not work with multibyte encoding which uses C locale. These
patches are intended to solve the problem by using PostgreSQL in-house
multibyte function instead of mbstowcs which does not work with C
locale. Also iswalpha etc. will not be called in case of C locale
since they are not working with it. Tested with the EUC_JP encoding
(should be working with any multibye encodings). Existing single byte
encodings should not be broken by the patches, I did not test though.
--
Tatsuo Ishii
SRA OSS, Inc. Japan
Index: ts_locale.c
===
RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.c,v
retrieving revision 1.7
diff -c -r1.7 ts_locale.c
*** ts_locale.c 20 Nov 2006 14:03:30 -  1.7
--- ts_locale.c 1 Jan 2007 12:22:50 -
***
*** 63,68 
--- 63,101 
  
return mbstowcs(to, from, len);
  }
+ 
+ #else /* WIN32 */
+ 
+ size_t
+ char2wchar(wchar_t *to, const char *from, size_t len)
+ {
+   wchar_t *result;
+   size_t n;
+ 
+   if (to == NULL)
+   return 0;
+ 
+   if (lc_ctype_is_c)
+   {
+   /* allocate neccesary memory for to including NULL terminate 
*/
+   result = (wchar_t *)palloc((len+1)*sizeof(wchar_t));
+ 
+   /* do the conversion */
+   n = (size_t)pg_mb2wchar_with_len(from, (pg_wchar *)result, len);
+   if (n  0)
+   {
+   /* store the result */
+   if (n  len)
+   n = len;
+   memcpy(to, result, n*sizeof(wchar_t));
+   pfree(result);
+   *(to + n) = '\0';
+   }
+   return n;
+   }
+   return mbstowcs(to, from, len);
+ }
+ 
  #endif   /* WIN32 */
  
  int
***
*** 70,75 
--- 103,113 
  {
wchar_t character;
  
+   if (lc_ctype_is_c)
+   {
+   return isalpha(TOUCHAR(ptr));
+   }
+ 
char2wchar(character, ptr, 1);
  
return iswalpha((wint_t) character);
***
*** 80,85 
--- 118,128 
  {
wchar_t character;
  
+   if (lc_ctype_is_c)
+   {
+   return isprint(TOUCHAR(ptr));
+   }
+ 
char2wchar(character, ptr, 1);
  
return iswprint((wint_t) character);
***
*** 126,132 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(transalation failed from 
server encoding to wchar_t)));
  
Assert(wlen=len);
wstr[wlen] = 0;
--- 169,175 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(translation failed from server 
encoding to wchar_t)));
  
Assert(wlen=len);
wstr[wlen] = 0;
***
*** 152,158 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(transalation failed from 
wchar_t to server encoding %d, errno)));
Assert(wlen=len);
out[wlen]='\0';
}
--- 195,201 
if ( wlen  0 )
ereport(ERROR,

(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!errmsg(translation failed from 
wchar_t to server encoding %d, errno)));
Assert(wlen=len);
out[wlen]='\0';
}
Index: ts_locale.h
===
RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.h,v
retrieving revision 1.7
diff -c -r1.7 ts_locale.h
*** ts_locale.h 4 Oct 2006 00:29:47 -   1.7
--- ts_locale.h 1 Jan 2007 12:22:50 -
***
*** 38,45 
  #else /* WIN32 */
  
  /* correct mbstowcs */
- #define char2wchar mbstowcs
  #define wchar2char wcstombs
  #endif   /* WIN32 */
  
  #define t_isdigit(x)  ( pg_mblen(x)==1  isdigit( TOUCHAR(x) ) )
--- 38,46 
  #else /* WIN32 */
  
  /* correct mbstowcs */
  #define wchar2char wcstombs
+ size_tchar2wchar(wchar_t *to, const char *from, size_t len);
+ 
  #endif   /* WIN32 */
  
  #define t_isdigit(x)  ( pg_mblen(x)==1  isdigit( TOUCHAR(x) ) )
***
*** 54,59 
--- 55,61 
   * t_iseq() should be called only for ASCII symbols
   */
  #define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) 
) :