On Wed, 22 Apr 2009, Matthew Wakeling wrote:
I will post a patch when I have ported my bioseg code over to the seg data type.

Here is my patch ported over to the seg contrib package, attached. Apply it to seg.c and all should be well. A similar thing needs to be done to cube, but I haven't looked at that.

Matthew

--
An optimist sees the glass as half full, a pessimist as half empty,
and an engineer as having redundant storage capacity.
--- seg.c       2006-09-10 18:36:51.000000000 +0100
+++ seg.c_new   2009-04-22 17:03:13.000000000 +0100
@@ -69,7 +69,6 @@
 bool           seg_right(SEG * a, SEG * b);
 bool           seg_over_right(SEG * a, SEG * b);
 SEG               *seg_union(SEG * a, SEG * b);
-SEG               *seg_inter(SEG * a, SEG * b);
 void           rt_seg_size(SEG * a, float *sz);
 float     *seg_size(SEG * a);
 
@@ -269,14 +268,22 @@
 gseg_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result)
 {
        SEG                *ud;
+    SEG        *origrange;
+    SEG        *newrange;
        float           tmp1,
                                tmp2;
 
-       ud = seg_union((SEG *) DatumGetPointer(origentry->key),
-                                  (SEG *) DatumGetPointer(newentry->key));
+    origrange = (SEG *) DatumGetPointer(origentry->key);
+    newrange = (SEG *) DatumGetPointer(newentry->key);
+       ud = seg_union(origrange, newrange);
        rt_seg_size(ud, &tmp1);
-       rt_seg_size((SEG *) DatumGetPointer(origentry->key), &tmp2);
-       *result = tmp1 - tmp2;
+       rt_seg_size(origrange, &tmp2);
+    if (tmp1 == tmp2) {
+        rt_seg_size(newrange, &tmp1);
+        *result = (tmp2 - tmp1) / tmp2;
+    } else {
+        *result = tmp1 - tmp2 + 1.0;
+    }
 
 #ifdef GIST_DEBUG
        fprintf(stderr, "penalty\n");
@@ -297,27 +304,16 @@
                           GIST_SPLITVEC *v)
 {
        OffsetNumber i,
-                               j;
        SEG                *datum_alpha,
-                          *datum_beta;
        SEG                *datum_l,
                           *datum_r;
-       SEG                *union_d,
-                          *union_dl,
-                          *union_dr;
-       SEG                *inter_d;
-       bool            firsttime;
-       float           size_alpha,
-                               size_beta,
-                               size_union,
-                               size_inter;
-       float           size_waste,
-                               waste;
-       float           size_l,
-                               size_r;
+    bool        firsttime;
+       float           lowest,
+                               highest,
+                midpoint,
+                split,
+                midpointsum;
        int                     nbytes;
-       OffsetNumber seed_1 = 1,
-                               seed_2 = 2;
        OffsetNumber *left,
                           *right;
        OffsetNumber maxoff;
@@ -326,107 +322,64 @@
        fprintf(stderr, "picksplit\n");
 #endif
 
-       maxoff = entryvec->n - 2;
+       maxoff = entryvec->n - 1;
        nbytes = (maxoff + 2) * sizeof(OffsetNumber);
        v->spl_left = (OffsetNumber *) palloc(nbytes);
        v->spl_right = (OffsetNumber *) palloc(nbytes);
 
+    midpointsum = 0.0;
        firsttime = true;
-       waste = 0.0;
+    lowest = 0.0;
+    highest = 0.0;
 
-       for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
+       for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
        {
                datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key);
-               for (j = OffsetNumberNext(i); j <= maxoff; j = 
OffsetNumberNext(j))
-               {
-                       datum_beta = (SEG *) 
DatumGetPointer(entryvec->vector[j].key);
-
-                       /* compute the wasted space by unioning these guys */
-                       /* size_waste = size_union - size_inter; */
-                       union_d = seg_union(datum_alpha, datum_beta);
-                       rt_seg_size(union_d, &size_union);
-                       inter_d = seg_inter(datum_alpha, datum_beta);
-                       rt_seg_size(inter_d, &size_inter);
-                       size_waste = size_union - size_inter;
-
-                       /*
-                        * are these a more promising split that what we've 
already seen?
-                        */
-                       if (size_waste > waste || firsttime)
-                       {
-                               waste = size_waste;
-                               seed_1 = i;
-                               seed_2 = j;
-                               firsttime = false;
-                       }
-               }
+        midpoint = (datum_alpha->lower + datum_alpha->upper) / 2;
+        midpointsum += midpoint;
+        if (firsttime || (midpoint < lowest))
+        {
+            lowest = midpoint;
+        }
+        if (firsttime || (midpoint > highest))
+        {
+            highest = midpoint;
+        }
+        firsttime = false;
        }
 
+    split = midpointsum / maxoff;
        left = v->spl_left;
        v->spl_nleft = 0;
        right = v->spl_right;
        v->spl_nright = 0;
 
-       datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[seed_1].key);
-       datum_l = seg_union(datum_alpha, datum_alpha);
-       rt_seg_size(datum_l, &size_l);
-       datum_beta = (SEG *) DatumGetPointer(entryvec->vector[seed_2].key);
-       datum_r = seg_union(datum_beta, datum_beta);
-       rt_seg_size(datum_r, &size_r);
+    datum_l = (SEG *) palloc(sizeof(*datum_l));
+    datum_l->lower = lowest;
+    datum_l->upper = lowest;
+    datum_r = (SEG *) palloc(sizeof(*datum_r));
+    datum_r->lower = highest;
+    datum_r->upper = highest;
 
        /*
-        * Now split up the regions between the two seeds.      An important 
property
-        * of this split algorithm is that the split vector v has the indices of
-        * items to be split in order in its left and right vectors.  We exploit
-        * this property by doing a merge in the code that actually splits the
-        * page.
-        *
-        * For efficiency, we also place the new index tuple in this loop. This 
is
-        * handled at the very end, when we have placed all the existing tuples
-        * and i == maxoff + 1.
+        * Now split up the regions between the two seeds.
         */
 
-       maxoff = OffsetNumberNext(maxoff);
        for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
        {
-               /*
-                * If we've already decided where to place this item, just put 
it on
-                * the right list.      Otherwise, we need to figure out which 
page needs
-                * the least enlargement in order to store the item.
-                */
-
-               if (i == seed_1)
-               {
-                       *left++ = i;
-                       v->spl_nleft++;
-                       continue;
-               }
-               else if (i == seed_2)
-               {
-                       *right++ = i;
-                       v->spl_nright++;
-                       continue;
-               }
-
-               /* okay, which page needs least enlargement? */
                datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key);
-               union_dl = seg_union(datum_l, datum_alpha);
-               union_dr = seg_union(datum_r, datum_alpha);
-               rt_seg_size(union_dl, &size_alpha);
-               rt_seg_size(union_dr, &size_beta);
+        midpoint = (datum_alpha->lower + datum_alpha->upper) / 2;
 
                /* pick which page to add it to */
-               if (size_alpha - size_l < size_beta - size_r)
+        if (mipoint <= split)
                {
-                       datum_l = union_dl;
-                       size_l = size_alpha;
+                       datum_l = seg_union(datum_l, datum_alpha);
                        *left++ = i;
                        v->spl_nleft++;
                }
                else
                {
-                       datum_r = union_dr;
-                       size_r = size_alpha;
+                       datum_r = seg_union(datum_r, datum_alpha);
                        *right++ = i;
                        v->spl_nright++;
                }
@@ -665,45 +618,6 @@
        return (n);
 }
 
-
-SEG *
-seg_inter(SEG * a, SEG * b)
-{
-       SEG                *n;
-
-       n = (SEG *) palloc(sizeof(*n));
-
-       /* take min of upper endpoints */
-       if (a->upper < b->upper)
-       {
-               n->upper = a->upper;
-               n->u_sigd = a->u_sigd;
-               n->u_ext = a->u_ext;
-       }
-       else
-       {
-               n->upper = b->upper;
-               n->u_sigd = b->u_sigd;
-               n->u_ext = b->u_ext;
-       }
-
-       /* take max of lower endpoints */
-       if (a->lower > b->lower)
-       {
-               n->lower = a->lower;
-               n->l_sigd = a->l_sigd;
-               n->l_ext = a->l_ext;
-       }
-       else
-       {
-               n->lower = b->lower;
-               n->l_sigd = b->l_sigd;
-               n->l_ext = b->l_ext;
-       }
-
-       return (n);
-}
-
 void
 rt_seg_size(SEG * a, float *size)
 {
-- 
Sent via pgsql-performance mailing list (pgsql-performance@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-performance

Reply via email to