*** varlena.c	2008-08-31 20:31:56.000000000 +0100
--- varlena.c	2008-09-05 19:41:04.000000000 +0100
***************
*** 40,45 ****
--- 40,48 ----
  	pg_wchar   *wstr2;			/* note: these are palloc'd */
  	int			len1;			/* string lengths in logical characters */
  	int			len2;
+ 	int	skiptable[256];			/* For boyer-moore searching */
+ 	int	skiptablesize;			/* How much of the skiptable do we utilize */
+ 
  } TextPositionState;
  
  #define DatumGetUnknownP(X)			((unknown *) PG_DETOAST_DATUM(X))
***************
*** 804,816 ****
   * called multiple times with increasing values of start_pos, which is
   * the 1-based character position to start the search from.  The "state"
   * variable is normally just a local variable in the caller.
   */
- 
  static void
  text_position_setup(text *t1, text *t2, TextPositionState *state)
  {
  	int			len1 = VARSIZE_ANY_EXHDR(t1);
  	int			len2 = VARSIZE_ANY_EXHDR(t2);
  
  	if (pg_database_encoding_max_length() == 1)
  	{
--- 807,821 ----
   * called multiple times with increasing values of start_pos, which is
   * the 1-based character position to start the search from.  The "state"
   * variable is normally just a local variable in the caller.
+  *
+  * David Rowley: 2008-09-04 -- Updated for Boyer-Moore searching
   */
  static void
  text_position_setup(text *t1, text *t2, TextPositionState *state)
  {
  	int			len1 = VARSIZE_ANY_EXHDR(t1);
  	int			len2 = VARSIZE_ANY_EXHDR(t2);
+ 	int			ai;
  
  	if (pg_database_encoding_max_length() == 1)
  	{
***************
*** 820,825 ****
--- 825,880 ----
  		state->str2 = VARDATA_ANY(t2);
  		state->len1 = len1;
  		state->len2 = len2;
+ 
+ 		/* Here we must determine how much of the skip table we should use.
+ 		 * With haystack lengths of only a few characters we don't really want
+ 		 * to have to initalize the full 256 elements of the table. Such an
+ 		 * initialization would take too long in comparison to the rest of the
+ 		 * search time.
+ 		 * To get around this problem when searching for smaller strings we only
+ 		 * use part of the skiptable, the larger the search (or at least the longer
+ 		 * we estimate that it will take) the more we use of the skip table.
+ 		 *
+ 		 * After quite a few benchmarks a rule of thumb seems to show that the
+ 		 * best skip table size is around 10 times less than the length of the
+ 		 * haystack. Of course this is not an exact science. The needle length
+ 		 * counts and also the mix of characters in both the needle and the haystack.
+ 		 * It might look like a good idea to make the skip table around the same size
+ 		 * as the needle, however that likely means more collisions or possibly no 
+ 		 * empty positions in the table. That would mean we'd never be able to
+ 		 * skip the length of the needle.
+ 		 * 
+ 		 * The following code calculates the skip table's size. Notice that for small 
+ 		 * searches we're bearly using any of the skiptable. 
+ 		 *
+ 		 */
+ 
+ 		if (state->len2 < 16)
+ 			state->skiptablesize = 3;
+ 		else if (state->len2 < 64)
+ 			state->skiptablesize = 7;
+ 		else if (state->len2 < 128)
+ 			state->skiptablesize = 15;
+ 		else if (state->len2 < 512)
+ 			state->skiptablesize = 31;
+ 		else if (state->len2 < 2048)
+ 			state->skiptablesize = 63;
+ 		else if (state->len2 < 4096)
+ 			state->skiptablesize = 127;
+ 		else
+ 			state->skiptablesize = 255;
+ 
+ 		/* Initalize the skip table. We set all elements to the needle length */
+ 		for (ai = 0; ai <= state->skiptablesize; ai++)
+ 			state->skiptable[ai] = state->len2;
+ 
+ 		/* Here we process the needle marking the last occurence
+ 		 * of each character (ignoring the very last character)
+ 		 */
+ 		for (ai = 0; ai < state->len2 - 1; ai++)
+ 			state->skiptable[(unsigned char) state->str2[ai] & state->skiptablesize] = state->len2 - ai - 1;
+ 
+ 
  	}
  	else
  	{
***************
*** 837,903 ****
  		state->wstr2 = p2;
  		state->len1 = len1;
  		state->len2 = len2;
  	}
  }
  
! static int
  text_position_next(int start_pos, TextPositionState *state)
  {
! 	int			pos = 0,
! 				p,
! 				px;
  
  	Assert(start_pos > 0);		/* else caller error */
  
- 	if (state->len2 <= 0)
- 		return start_pos;		/* result for empty pattern */
  
! 	if (!state->use_wchar)
! 	{
! 		/* simple case - single byte encoding */
! 		char	   *p1 = state->str1;
! 		char	   *p2 = state->str2;
  
- 		/* no use in searching str past point where search_str will fit */
- 		px = (state->len1 - state->len2);
  
! 		p1 += start_pos - 1;
  
- 		for (p = start_pos - 1; p <= px; p++)
- 		{
- 			if ((*p1 == *p2) && (strncmp(p1, p2, state->len2) == 0))
- 			{
- 				pos = p + 1;
- 				break;
- 			}
- 			p1++;
- 		}
- 	}
- 	else
- 	{
- 		/* not as simple - multibyte encoding */
- 		pg_wchar   *p1 = state->wstr1;
- 		pg_wchar   *p2 = state->wstr2;
  
! 		/* no use in searching str past point where search_str will fit */
! 		px = (state->len1 - state->len2);
  
- 		p1 += start_pos - 1;
  
! 		for (p = start_pos - 1; p <= px; p++)
! 		{
! 			if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, state->len2) == 0))
! 			{
! 				pos = p + 1;
! 				break;
! 			}
! 			p1++;
! 		}
! 	}
  
! 	return pos;
  }
  
  static void
  text_position_cleanup(TextPositionState *state)
  {
--- 892,1043 ----
  		state->wstr2 = p2;
  		state->len1 = len1;
  		state->len2 = len2;
+ 
+ 
+ 		if (state->len2 < 16)
+ 			state->skiptablesize = 3;
+ 		else if (state->len2 < 64)
+ 			state->skiptablesize = 7;
+ 		else if (state->len2 < 128)
+ 			state->skiptablesize = 15;
+ 		else if (state->len2 < 512)
+ 			state->skiptablesize = 31;
+ 		else if (state->len2 < 2048)
+ 			state->skiptablesize = 63;
+ 		else if (state->len2 < 4096)
+ 			state->skiptablesize = 127;
+ 		else
+ 			state->skiptablesize = 255;
+ 
+ 		/* Initalize the skip table. We set all elements to the needle length */
+ 		for (ai = 0; ai <= state->skiptablesize; ai++)
+ 			state->skiptable[ai] = state->len2;
+ 
+ 		/* Here we process the needle marking the last occurence
+ 		 * of each character (ignoring the very last character)
+ 		 */
+ 		for (ai = 0; ai < state->len2 - 1; ai++)
+ 			state->skiptable[state->wstr2[ai] & state->skiptablesize] = state->len2 - ai - 1;
+ 
  	}
  }
  
! /* text_position_next
!  * David Rowley 2008-09-05
!  * Uses Boyer-Moore searching
!  */
! int
  text_position_next(int start_pos, TextPositionState *state)
  {
!   /*
!    * state->len2 = needle length in chars 
!    * state->len1 = haystack length in chars 
!    */
  
  	Assert(start_pos > 0);		/* else caller error */
  
  
!   if (state->len2 <= 0)
! 		return start_pos;		/* Empty needle, found it! */
  
  
!   /* Changed for zero based arrays */
!   start_pos--;
  
  
!   /* Eliminate the impossible first, the needle is
!    * too big for the haystack.
!    */
!   if (state->len1 + start_pos < state->len2)
!     return 0;
  
  
! 	if (!state->use_wchar)
!   {
!     char *nptr;
!     char *hptr;
!     char *p;
!     char *needle = state->str2;
!     char *haystack = state->str1;
! 
!  
!     /* Start at startpos plus the length of the needle */
!     hptr = &haystack[state->len2 - 1 + start_pos];
! 
!     while (hptr < &haystack[state->len1]) 
!     {
!       nptr = &needle[state->len2 - 1]; /* Point to the end of the needle */
!       p = hptr;
! 
!       while (*nptr == *p) 
!       {
!         /* Do we have it? Return 1 based array pos */
!         if (nptr-- == needle) 
!           return p - haystack + 1; 
! 
!         p--;
!       }
! 
!       /* Ask the skiptable where to look next. If it
!        * finds a match then we align the two ~matching~
!        * characters and start another search there.
!        * Else we skip a whole needle length.
!        * Of course the ~matching~ characters only have the
!        * same hash value, the character value may be different.
!        */
! 
!       hptr += state->skiptable[(unsigned char) *hptr & state->skiptablesize];
!     }
! 
!     return 0; /* Not found */
! 
!   } 
!   
!   else
!   { 
!     /* The multibyte char version. This works exactly the same way.
!      */
!     pg_wchar *nptr;
!     pg_wchar *hptr;
!     pg_wchar *p;
!     pg_wchar *needle = state->wstr2;
!     pg_wchar *haystack = state->wstr1;
! 
!   
!     /* Start at start_pos plus the length of the needle */
!     hptr = &haystack[state->len2 - 1 + start_pos];
! 
!     while (hptr < &haystack[state->len1]) 
!     {
!       nptr = &needle[state->len2 - 1]; /* Point to the end of the needle */
!       p = hptr;
! 
!       while (*nptr == *p) 
!       {
!         /* Do we have it? Return 1 based array pos */
!         if (nptr-- == needle) 
!           return p - haystack + 1; 
! 
!         p--;
!       }
!  
!       /* Ask the skiptable where to look next. If it
!        * finds a match then we align the two ~matching~
!        * characters and start another search there.
!        * Else we skip a whole needle length.
!        * Of course the ~matching~ characters only have the
!        * same hash value, the character value may be different.
!        */
! 
!        hptr += state->skiptable[*hptr & state->skiptablesize];
!     }
  
!     return 0; /* Not found */
!   }
  }
  
+ 
+ 
  static void
  text_position_cleanup(TextPositionState *state)
  {
