Bruce Momjian wrote:
> Bruce Momjian wrote:
> > Alvaro Herrera wrote:
> > > Bruce Momjian wrote:
> > > 
> > > > I moved str_initcap() over into oracle_compat.c and then had initcap()
> > > > convert to/from TEXT to call it.  The code is a little weird because
> > > > str_initcap() needs to convert to text to use texttowcs(), so in
> > > > multibyte encodings initcap converts the string to text, then to char,
> > > > then to text to call texttowcs().  I didn't see a cleaner way to do
> > > > this.
> > > 
> > > Why not use wchar2char?  It seems there's room for extra cleanup here.
> > > 
> > > Also, the prototype of str_initcap in builtins.h looks out of place.
> > 
> > I talked to Alvaro on IM, and there is certainly much more cleanup to do
> > in this area. I will work from the bottom up.  First, is moving the
> > USE_WIDE_UPPER_LOWER define to c.h, and removing TS_USE_WIDE and using
> > USE_WIDE_UPPER_LOWER instead.  Patch attached and applied.
> 
> The second step is to move wchar2char() and char2wchar() from tsearch
> into /mb to be easier to use for other modules;  also move pnstrdup(). 

The third step is for oracle_compat.c::initcap() to use
formatting.c::str_initcap().  You can see the result;  patch attached
(not applied).

This greatly reduces the size of initcap(), with the downside that we
are making two extra copies of the string to convert it to/from char*.

Is this acceptable?  If it is I will do the same for uppper()/lower()
with similar code size reduction and modularity.

If not perhaps I should keep the non-multibyte code in initcap() and
have only the multi-byte use str_initcap().

-- 
  Bruce Momjian  <[EMAIL PROTECTED]>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
Index: src/backend/utils/adt/formatting.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.142
diff -c -c -r1.142 formatting.c
*** src/backend/utils/adt/formatting.c	17 Jun 2008 16:09:06 -0000	1.142
--- src/backend/utils/adt/formatting.c	21 Jun 2008 20:00:45 -0000
***************
*** 1499,1526 ****
  	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
  	{
  		wchar_t		*workspace;
! 		text		*in_text;
! 		text		*out_text;
! 		int			i;
  
! 		in_text = cstring_to_text(buff);
! 		workspace = texttowcs(in_text);
  
! 		for (i = 0; workspace[i] != 0; i++)
  		{
  			if (wasalnum)
! 				workspace[i] = towlower(workspace[i]);
  			else
! 				workspace[i] = towupper(workspace[i]);
! 			wasalnum = iswalnum(workspace[i]);
  		}
  
! 		out_text = wcstotext(workspace, i);
! 		result = text_to_cstring(out_text);
  
  		pfree(workspace);
- 		pfree(in_text);
- 		pfree(out_text);
  	}
  	else
  #endif		/* USE_WIDE_UPPER_LOWER */
--- 1499,1525 ----
  	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
  	{
  		wchar_t		*workspace;
! 		int			curr_char = 0;
  
! 		/* Output workspace cannot have more codes than input bytes */
! 		workspace = (wchar_t *) palloc((strlen(buff) + 1) * sizeof(wchar_t));
  
! 		char2wchar(workspace, strlen(buff) + 1, buff, strlen(buff) + 1);
! 
! 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
  		{
  			if (wasalnum)
! 				workspace[curr_char] = towlower(workspace[curr_char]);
  			else
! 				workspace[curr_char] = towupper(workspace[curr_char]);
! 			wasalnum = iswalnum(workspace[curr_char]);
  		}
  
! 		/* Make result large enough; case change might change number of bytes */
! 		result = palloc(curr_char * MB_CUR_MAX + 1);
  
+ 		wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
  		pfree(workspace);
  	}
  	else
  #endif		/* USE_WIDE_UPPER_LOWER */
Index: src/backend/utils/adt/oracle_compat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v
retrieving revision 1.80
diff -c -c -r1.80 oracle_compat.c
*** src/backend/utils/adt/oracle_compat.c	17 Jun 2008 16:09:06 -0000	1.80
--- src/backend/utils/adt/oracle_compat.c	21 Jun 2008 20:00:45 -0000
***************
*** 467,530 ****
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER
  
! 	/*
! 	 * Use wide char code only when max encoding length > 1 and ctype != C.
! 	 * Some operating systems fail with multi-byte encodings and a C locale.
! 	 * Also, for a C locale there is no need to process as multibyte.
! 	 */
! 	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! 	{
! 		text	   *string = PG_GETARG_TEXT_PP(0);
! 		text	   *result;
! 		wchar_t    *workspace;
! 		int			wasalnum = 0;
! 		int			i;
! 
! 		workspace = texttowcs(string);
! 
! 		for (i = 0; workspace[i] != 0; i++)
! 		{
! 			if (wasalnum)
! 				workspace[i] = towlower(workspace[i]);
! 			else
! 				workspace[i] = towupper(workspace[i]);
! 			wasalnum = iswalnum(workspace[i]);
! 		}
! 
! 		result = wcstotext(workspace, i);
! 
! 		pfree(workspace);
! 
! 		PG_RETURN_TEXT_P(result);
! 	}
! 	else
! #endif   /* USE_WIDE_UPPER_LOWER */
! 	{
! 		text	   *string = PG_GETARG_TEXT_P_COPY(0);
! 		int			wasalnum = 0;
! 		char	   *ptr;
! 		int			m;
! 
! 		/*
! 		 * Since we copied the string, we can scribble directly on the value
! 		 */
! 		ptr = VARDATA(string);
! 		m = VARSIZE(string) - VARHDRSZ;
  
! 		while (m-- > 0)
! 		{
! 			if (wasalnum)
! 				*ptr = tolower((unsigned char) *ptr);
! 			else
! 				*ptr = toupper((unsigned char) *ptr);
! 			wasalnum = isalnum((unsigned char) *ptr);
! 			ptr++;
! 		}
! 
! 		PG_RETURN_TEXT_P(string);
! 	}
  }
  
  
--- 467,482 ----
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
! 	char	*in_string, *out_string;
! 	text	*result;
  
! 	in_string = text_to_cstring(PG_GETARG_TEXT_PP(0));
! 	out_string = str_initcap(in_string);
! 	pfree(in_string);
! 	result = cstring_to_text(out_string);
! 	pfree(out_string);
  
! 	PG_RETURN_TEXT_P(result);
  }
  
  
-- 
Sent via pgsql-patches mailing list (pgsql-patches@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-patches

Reply via email to