Guillaume Smet wrote:
app_hls
On 9/20/07, Andrew Dunstan <[EMAIL PROTECTED]> wrote:
Can you retry both sets of tests but this time in C locale? The lower()
code works differently in C locale, and it might be that we need to look
at tweaking just one case.
Please try the attached patch, which goes back to using a special case
for single-byte ILIKE. I want to make sure that at the very least we
don't cause a performance regression with the code done this release. I
can't see an obvious way around the problem for multi-byte case -
lower() then requires converting to and from wchar, and I don't see a
way of avoiding calling lower(). If this is a major blocker I would
suggest you look at an alternative to using ILIKE for your UTF8 data.
cheers
andrew
Index: src/backend/utils/adt/like.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/like.c,v
retrieving revision 1.69
diff -c -r1.69 like.c
*** src/backend/utils/adt/like.c 2 Jun 2007 02:03:42 -0000 1.69
--- src/backend/utils/adt/like.c 20 Sep 2007 13:12:39 -0000
***************
*** 36,41 ****
--- 36,43 ----
static int UTF8_MatchText(char *t, int tlen, char *p, int plen);
+ static int SB_IMatchText(char *t, int tlen, char *p, int plen);
+
static int GenericMatchText(char *s, int slen, char* p, int plen);
static int Generic_Text_IC_like(text *str, text *pat);
***************
*** 104,109 ****
--- 106,117 ----
#include "like_match.c"
+ /* setup to compile like_match.c for single byte case insensitive matches */
+ #define MATCH_LOWER
+ #define NextChar(p, plen) NextByte((p), (plen))
+ #define MatchText SB_IMatchText
+
+ #include "like_match.c"
/* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
***************
*** 132,146 ****
int slen,
plen;
! /* Force inputs to lower case to achieve case insensitivity */
! str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str)));
! pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat)));
! s = VARDATA(str);
! slen = (VARSIZE(str) - VARHDRSZ);
! p = VARDATA(pat);
! plen = (VARSIZE(pat) - VARHDRSZ);
! return GenericMatchText(s, slen, p, plen);
}
/*
--- 140,171 ----
int slen,
plen;
! /* For efficiency reasons, in the single byte case we don't call
! * lower() on the pattern and text, but instead call to_lower on each
! * character. In the multi-byte case we don't have much choice :-(
! */
! if (pg_database_encoding_max_length() > 1)
! {
! pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat)));
! p = VARDATA(pat);
! plen = (VARSIZE(pat) - VARHDRSZ);
! str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str)));
! s = VARDATA(str);
! slen = (VARSIZE(str) - VARHDRSZ);
! if (GetDatabaseEncoding() == PG_UTF8)
! return UTF8_MatchText(s, slen, p, plen);
! else
! return MB_MatchText(s, slen, p, plen);
! }
! else
! {
! p = VARDATA(pat);
! plen = (VARSIZE(pat) - VARHDRSZ);
! s = VARDATA(str);
! slen = (VARSIZE(str) - VARHDRSZ);
! return SB_IMatchText(s, slen, p, plen);
! }
}
/*
Index: src/backend/utils/adt/like_match.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/like_match.c,v
retrieving revision 1.16
diff -c -r1.16 like_match.c
*** src/backend/utils/adt/like_match.c 2 Jun 2007 02:03:42 -0000 1.16
--- src/backend/utils/adt/like_match.c 20 Sep 2007 13:12:39 -0000
***************
*** 13,18 ****
--- 13,19 ----
* NextChar
* MatchText - to name of function wanted
* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
+ * MATCH_LOWER - define iff using to_lower on text chars
*
* Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
***************
*** 68,73 ****
--- 69,80 ----
*--------------------
*/
+ #ifdef MATCH_LOWER
+ #define TCHAR(t) tolower((t))
+ #else
+ #define TCHAR(t) (t)
+ #endif
+
static int
MatchText(char *t, int tlen, char *p, int plen)
{
***************
*** 143,155 ****
else
{
! char firstpat = *p ;
if (*p == '\\')
{
if (plen < 2)
return LIKE_FALSE;
! firstpat = p[1];
}
while (tlen > 0)
--- 150,162 ----
else
{
! char firstpat = TCHAR(*p) ;
if (*p == '\\')
{
if (plen < 2)
return LIKE_FALSE;
! firstpat = TCHAR(p[1]);
}
while (tlen > 0)
***************
*** 158,164 ****
* Optimization to prevent most recursion: don't recurse
* unless first pattern byte matches first text byte.
*/
! if (*t == firstpat)
{
int matched = MatchText(t, tlen, p, plen);
--- 165,171 ----
* Optimization to prevent most recursion: don't recurse
* unless first pattern byte matches first text byte.
*/
! if (TCHAR(*t) == firstpat)
{
int matched = MatchText(t, tlen, p, plen);
***************
*** 183,189 ****
NextByte(p, plen);
continue;
}
! else if (*t != *p)
{
/*
* Not the single-character wildcard and no explicit match? Then
--- 190,196 ----
NextByte(p, plen);
continue;
}
! else if (TCHAR(*t) != TCHAR(*p))
{
/*
* Not the single-character wildcard and no explicit match? Then
***************
*** 338,340 ****
--- 345,352 ----
#undef do_like_escape
#endif
+ #undef TCHAR
+
+ #ifdef MATCH_LOWER
+ #undef MATCH_LOWER
+ #endif
---------------------------(end of broadcast)---------------------------
TIP 3: Have you checked our extensive FAQ?
http://www.postgresql.org/docs/faq