Hi,
Upper(), lower() or initcap() function truncates the result
under Japanese Windows with e.g. the server encoding=UTF-8
and the LC_CTYPE setting Japanese_japan.932 .
Below is an example.
$ psql
psql (8.4devel)
Type "help" for help.
inoue=# \encoding sjis
inoue=# show server_encoding;
server_encoding
-
UTF8
(1 行)
inoue=# show LC_CTYPE;
lc_ctype
Japanese_Japan.932
(1 行)
inoue=# \set jpnstr '''カタカナ'''
inoue=# select char_length(:jpnstr);
char_length
-
4
(1 行)
inoue=# select upper(:jpnstr);
upper
カタカ
(1 行)
inoue=# select char_length(upper(:jpnstr));
char_length
-
3
(1 行)
The output of the last command should be 4 not 3.
Attached is a patch to fix the bug.
After applying the patch the result is
inoue=# select upper(:jpnstr);
upper
--
カタカナ
(1 行)
inoue=# select char_length(upper(:jpnstr));
char_length
-
4
(1 行)
regards,
Hiroshi Inoue
Index: formatting.c
===
RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.151
diff -c -c -r1.151 formatting.c
*** formatting.c1 Dec 2008 17:11:18 - 1.151
--- formatting.c14 Dec 2008 09:09:00 -
***
*** 1462,1467
--- 1462,1468
{
wchar_t *workspace;
int curr_char = 0;
+ size_t max_len, alloc_size;
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
***
*** 1472,1480
workspace[curr_char] = towlower(workspace[curr_char]);
/* Make result large enough; case change might change number of
bytes */
! result = palloc(curr_char * MB_CUR_MAX + 1);
! wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
pfree(workspace);
}
else
--- 1473,1489
workspace[curr_char] = towlower(workspace[curr_char]);
/* Make result large enough; case change might change number of
bytes */
! #ifdefWIN32
! max_len = pg_database_encoding_max_length();
! if (MB_CUR_MAX > max_len)
! max_len = MB_CUR_MAX;
! #else
! max_len = MB_CUR_MAX;
! #endif
! alloc_size = curr_char * max_len + 1;
! result = palloc(alloc_size);
! wchar2char(result, workspace, alloc_size);
pfree(workspace);
}
else
***
*** 1510,1515
--- 1519,1525
{
wchar_t *workspace;
int curr_char = 0;
+ size_t max_len, alloc_size;
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
***
*** 1520,1528
workspace[curr_char] = towupper(workspace[curr_char]);
/* Make result large enough; case change might change number of
bytes */
! result = palloc(curr_char * MB_CUR_MAX + 1);
! wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
pfree(workspace);
}
else
--- 1530,1546
workspace[curr_char] = towupper(workspace[curr_char]);
/* Make result large enough; case change might change number of
bytes */
! #ifdefWIN32
! max_len = pg_database_encoding_max_length();
! if (MB_CUR_MAX > max_len)
! max_len = MB_CUR_MAX;
! #else
! max_len = MB_CUR_MAX;
! #endif
! alloc_size = curr_char * max_len + 1;
! result = palloc(alloc_size);
! wchar2char(result, workspace, alloc_size);
pfree(workspace);
}
else
***
*** 1559,1564
--- 1577,1583
{
wchar_t *workspace;
int curr_char = 0;
+ size_t max_len, alloc_size;
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
***
*** 1575,1583
}
/* Make result large enough; case change might change number of
bytes */
! result = palloc(curr_char * MB_CUR_MAX + 1);
! wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
pfree(workspace);
}
else
--- 1594,1610
}
/* Make result large enough; case change might change number of