Re: [HACKERS] Fix for seg picksplit function

Alexander Korotkov Mon, 15 Nov 2010 01:07:43 -0800

With help of Oleg I found, that line "*left = *right = FirstOffsetNumber;"
was needed only for 7.X compatibility, and it isn't needed any more.
Also, I've replaced "i - 1" by "i - FirstOffsetNumber" in array filling.
I believe it's more correct way, because it'll work correctly in the case
when FirstOffsetNumber alters.


----
With best regards,
Alexander Korotkov.

*** a/contrib/seg/seg.c
--- b/contrib/seg/seg.c
***************
*** 292,329 **** gseg_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result)
  	return (result);
  }
  
  
  
  /*
  ** The GiST PickSplit method for segments
! ** We use Guttman's poly time split algorithm
  */
  GIST_SPLITVEC *
  gseg_picksplit(GistEntryVector *entryvec,
  			   GIST_SPLITVEC *v)
  {
! 	OffsetNumber i,
! 				j;
! 	SEG		   *datum_alpha,
! 			   *datum_beta;
  	SEG		   *datum_l,
  			   *datum_r;
! 	SEG		   *union_d,
! 			   *union_dl,
! 			   *union_dr;
! 	SEG		   *inter_d;
! 	bool		firsttime;
! 	float		size_alpha,
! 				size_beta,
! 				size_union,
! 				size_inter;
! 	float		size_waste,
! 				waste;
! 	float		size_l,
! 				size_r;
  	int			nbytes;
! 	OffsetNumber seed_1 = 1,
! 				seed_2 = 2;
  	OffsetNumber *left,
  			   *right;
  	OffsetNumber maxoff;
--- 292,333 ----
  	return (result);
  }
  
+ /*
+  * Auxiliary structure for picksplit method.
+  */
+ typedef struct
+ {
+ 	int index;
+ 	SEG *data;
+ } PickSplitSortItem;
  
+ /*
+  * Compare function for PickSplitSortItem based on seg_cmp.
+  */
+ static int
+ sort_item_cmp(const void *a, const void *b)
+ {
+ 	PickSplitSortItem *i1 = (PickSplitSortItem *)a;
+ 	PickSplitSortItem *i2 = (PickSplitSortItem *)b;
+ 	return seg_cmp(i1->data, i2->data);
+ }
  
  /*
  ** The GiST PickSplit method for segments
! ** Algorithm based on sorting. Incoming array of segs is sorting using seg_cmp
! ** function. After that first half of segs goes to the left datum, and the
! ** second half of segs goes to the right datum.
  */
  GIST_SPLITVEC *
  gseg_picksplit(GistEntryVector *entryvec,
  			   GIST_SPLITVEC *v)
  {
! 	OffsetNumber i;
  	SEG		   *datum_l,
  			   *datum_r;
! 	PickSplitSortItem	*sortItems;
  	int			nbytes;
! 	OffsetNumber seed_2;
  	OffsetNumber *left,
  			   *right;
  	OffsetNumber maxoff;
***************
*** 332,443 **** gseg_picksplit(GistEntryVector *entryvec,
  	fprintf(stderr, "picksplit\n");
  #endif
  
! 	maxoff = entryvec->n - 2;
! 	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
  	v->spl_left = (OffsetNumber *) palloc(nbytes);
  	v->spl_right = (OffsetNumber *) palloc(nbytes);
  
! 	firsttime = true;
! 	waste = 0.0;
! 
! 	for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
  	{
! 		datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key);
! 		for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
! 		{
! 			datum_beta = (SEG *) DatumGetPointer(entryvec->vector[j].key);
! 
! 			/* compute the wasted space by unioning these guys */
! 			/* size_waste = size_union - size_inter; */
! 			union_d = seg_union(datum_alpha, datum_beta);
! 			rt_seg_size(union_d, &size_union);
! 			inter_d = seg_inter(datum_alpha, datum_beta);
! 			rt_seg_size(inter_d, &size_inter);
! 			size_waste = size_union - size_inter;
! 
! 			/*
! 			 * are these a more promising split that what we've already seen?
! 			 */
! 			if (size_waste > waste || firsttime)
! 			{
! 				waste = size_waste;
! 				seed_1 = i;
! 				seed_2 = j;
! 				firsttime = false;
! 			}
! 		}
  	}
  
  	left = v->spl_left;
  	v->spl_nleft = 0;
  	right = v->spl_right;
  	v->spl_nright = 0;
  
! 	datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[seed_1].key);
! 	datum_l = seg_union(datum_alpha, datum_alpha);
! 	rt_seg_size(datum_l, &size_l);
! 	datum_beta = (SEG *) DatumGetPointer(entryvec->vector[seed_2].key);
! 	datum_r = seg_union(datum_beta, datum_beta);
! 	rt_seg_size(datum_r, &size_r);
! 
! 	/*
! 	 * Now split up the regions between the two seeds.	An important property
! 	 * of this split algorithm is that the split vector v has the indices of
! 	 * items to be split in order in its left and right vectors.  We exploit
! 	 * this property by doing a merge in the code that actually splits the
! 	 * page.
! 	 *
! 	 * For efficiency, we also place the new index tuple in this loop. This is
! 	 * handled at the very end, when we have placed all the existing tuples
! 	 * and i == maxoff + 1.
! 	 */
! 
! 	maxoff = OffsetNumberNext(maxoff);
! 	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
  	{
! 		/*
! 		 * If we've already decided where to place this item, just put it on
! 		 * the right list.	Otherwise, we need to figure out which page needs
! 		 * the least enlargement in order to store the item.
! 		 */
! 
! 		if (i == seed_1)
! 		{
! 			*left++ = i;
! 			v->spl_nleft++;
! 			continue;
! 		}
! 		else if (i == seed_2)
! 		{
! 			*right++ = i;
! 			v->spl_nright++;
! 			continue;
! 		}
! 
! 		/* okay, which page needs least enlargement? */
! 		datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key);
! 		union_dl = seg_union(datum_l, datum_alpha);
! 		union_dr = seg_union(datum_r, datum_alpha);
! 		rt_seg_size(union_dl, &size_alpha);
! 		rt_seg_size(union_dr, &size_beta);
! 
! 		/* pick which page to add it to */
! 		if (size_alpha - size_l < size_beta - size_r)
  		{
! 			datum_l = union_dl;
! 			size_l = size_alpha;
! 			*left++ = i;
  			v->spl_nleft++;
  		}
  		else
  		{
! 			datum_r = union_dr;
! 			size_r = size_alpha;
! 			*right++ = i;
  			v->spl_nright++;
  		}
  	}
- 	*left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */
  
  	v->spl_ldatum = PointerGetDatum(datum_l);
  	v->spl_rdatum = PointerGetDatum(datum_r);
--- 336,386 ----
  	fprintf(stderr, "picksplit\n");
  #endif
  
! 	maxoff = entryvec->n - 1;
! 	nbytes = (maxoff + 1) * sizeof(OffsetNumber);
! 	sortItems = (PickSplitSortItem *)palloc(maxoff * sizeof(PickSplitSortItem));
  	v->spl_left = (OffsetNumber *) palloc(nbytes);
  	v->spl_right = (OffsetNumber *) palloc(nbytes);
  
! 	/*
! 	 * Preparing auxiliary array and sorting.
! 	 */
! 	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
  	{
! 		sortItems[i - FirstOffsetNumber].index = i;
! 		sortItems[i - FirstOffsetNumber].data = (SEG *) DatumGetPointer(entryvec->vector[i].key);
  	}
+ 	qsort(sortItems, maxoff, sizeof(PickSplitSortItem), sort_item_cmp);
+ 	seed_2 = maxoff / 2;
  
  	left = v->spl_left;
  	v->spl_nleft = 0;
  	right = v->spl_right;
  	v->spl_nright = 0;
  
! 	for (i = 0; i < maxoff; i++)
  	{
! 		/* First half of segs goes to the left datum. */
! 		if (i < seed_2)
  		{
! 			datum_l = seg_union(sortItems[i].data,
! 								(i == 0
! 								 ? sortItems[i].data /* union with self at start */
! 								 : datum_l) /* union with existing value */ );
! 			*left++ = sortItems[i].index;
  			v->spl_nleft++;
  		}
+ 		/* The other half of segs goes to the right datum. */
  		else
  		{
! 			datum_r = seg_union(sortItems[i].data,
! 								(i == seed_2
! 								 ? sortItems[i].data /* union with self at start */
! 								 : datum_r) /* union with existing value */ );
! 			*right++ = sortItems[i].index;
  			v->spl_nright++;
  		}
  	}
  
  	v->spl_ldatum = PointerGetDatum(datum_l);
  	v->spl_rdatum = PointerGetDatum(datum_r);

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Fix for seg picksplit function

Reply via email to