Hi,

I've prepared a Quick & Dirty patch serie for some missing parts in
intarray contrib module. Here they go with some explanation...


On May 06 12:38, Volkan YAZICI wrote:
> [4]
> In the inner_int_contains() function of _int_tool.c, there's a while
> loop like
> 
> [Code assumes that arrays are sorted.]
> 
> na = ARRNELEMS(a);
> nb = ARRNELEMS(b);
> da = ARRPTR(a);
> db = ARRPTR(b);
> 
> i = j = n = 0;
> while (i < na && j < nb)
>     if (da[i] < db[j])
>         i++;
>     else if (da[i] == db[j])
>     {
>         n++;
>         i++;
>         j++;
>     }
>     else
>         j++;
> 
> return (n == nb) ? TRUE : FALSE;
> 
> AFAICS, last "j++" should be replaced with "return FALSE". Because, "n"
> cannot be equal to "nb" no more, if "j" gets incremented without
> incrementing "n" (remember "j < nb" in the "while" condition).

intarray_contains.patch.0 is for above problem.


[5]
ISTM, inner_int_union() of _int_tool.c, makes some redundant visits to
array:

    ...
    /* union */
    i = j = 0;
    while (i < na && j < nb)
        if (da[i] < db[j])
            *dr++ = da[i++];
        else
            *dr++ = db[j++];

    while (i < na)
        *dr++ = da[i++];
    while (j < nb)
        *dr++ = db[j++];

}

if (ARRNELEMS(r) > 1)
    r = _int_unique(r);

IMHO, uniting unique values (instead of uniting and then removing
duplicates) should work faster. intarray_union.patch.0 is for this
problem. (Patched code, handles uniting for unique values.)


[6]
There's a seperate sorting algorithm isort() in _int_tool.c. Looks like
it executes some kind of shell sort on the array and returns true if
array has any duplicates. It's used for common sorting and deciding on
executing _int_unique() on the related array if isort() says it has
duplicates.

IIRC, our inner qsort() has a smaller O(N) degree when compared to above
sorting algorithm. Also for the code's sake it'd be better to switch
using qsort() in all sorting related stuff. For these reasons,
intarray_sort.patch.0 addresses this (probable) gotcha.


All 3 patches passed regression tests for intarray contrib module. But
these are just for addressing some gotchas I found while reading code.
Your comments for these problems(?) are welcome.


Regards.
Index: contrib/intarray/_int_tool.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v
retrieving revision 1.6
diff -c -r1.6 _int_tool.c
*** contrib/intarray/_int_tool.c        19 Nov 2005 03:00:09 -0000      1.6
--- contrib/intarray/_int_tool.c        6 May 2006 17:51:35 -0000
***************
*** 34,40 ****
                        j++;
                }
                else
!                       j++;
  
        return (n == nb) ? TRUE : FALSE;
  }
--- 34,40 ----
                        j++;
                }
                else
!                       break;
  
        return (n == nb) ? TRUE : FALSE;
  }
Index: contrib/intarray/_int_tool.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v
retrieving revision 1.6
diff -c -r1.6 _int_tool.c
*** contrib/intarray/_int_tool.c        19 Nov 2005 03:00:09 -0000      1.6
--- contrib/intarray/_int_tool.c        7 May 2006 09:59:09 -0000
***************
*** 183,221 ****
        return;
  }
  
- 
- /* len >= 2 */
- bool
- isort(int4 *a, int len)
- {
-       int4            tmp,
-                               index;
-       int4       *cur,
-                          *end;
-       bool            r = FALSE;
- 
-       end = a + len;
-       do
-       {
-               index = 0;
-               cur = a + 1;
-               while (cur < end)
-               {
-                       if (*(cur - 1) > *cur)
-                       {
-                               tmp = *(cur - 1);
-                               *(cur - 1) = *cur;
-                               *cur = tmp;
-                               index = 1;
-                       }
-                       else if (!r && *(cur - 1) == *cur)
-                               r = TRUE;
-                       cur++;
-               }
-       } while (index);
-       return r;
- }
- 
  ArrayType *
  new_intArrayType(int num)
  {
--- 183,188 ----
Index: contrib/intarray/_int.h
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int.h,v
retrieving revision 1.9
diff -c -r1.9 _int.h
*** contrib/intarray/_int.h     3 May 2006 16:31:07 -0000       1.9
--- contrib/intarray/_int.h     7 May 2006 09:59:10 -0000
***************
*** 38,56 ****
  
  #define ARRISVOID(x)  ((x) == NULL || ARRNELEMS(x) == 0)
  
- #define SORT(x) \
-       do { \
-                if ( ARRNELEMS( x ) > 1 ) \
-                       isort( ARRPTR( x ), ARRNELEMS( x ) ); \
-       } while(0)
- 
- #define PREPAREARR(x) \
-       do { \
-                if ( ARRNELEMS( x ) > 1 ) \
-                       if ( isort( ARRPTR( x ), ARRNELEMS( x ) ) ) \
-                               x = _int_unique( x ); \
-       } while(0)
- 
  /* "wish" function */
  #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
  
--- 38,43 ----
***************
*** 105,111 ****
  /*
  ** useful function
  */
- bool          isort(int4 *a, const int len);
  ArrayType  *new_intArrayType(int num);
  ArrayType  *copy_intArrayType(ArrayType *a);
  ArrayType  *resize_intArrayType(ArrayType *a, int num);
--- 92,97 ----
***************
*** 171,173 ****
--- 157,168 ----
  if (ARRNELEMS(a) > 1)                                                         
                        \
                qsort((void*)ARRPTR(a), ARRNELEMS(a),sizeof(int4),              
\
                                (direction) ? compASC : compDESC )
+ 
+ #define SORT(x)       QSORT(x, 1)
+ 
+ #define PREPAREARR(x) \
+       if (ARRNELEMS(x) > 1) \
+       { \
+               SORT(x); \
+               x = _int_unique( x ); \
+       }
Index: contrib/intarray/_int_tool.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/intarray/_int_tool.c,v
retrieving revision 1.6
diff -c -r1.6 _int_tool.c
*** contrib/intarray/_int_tool.c        19 Nov 2005 03:00:09 -0000      1.6
--- contrib/intarray/_int_tool.c        6 May 2006 17:49:13 -0000
***************
*** 94,103 ****
        if (ARRISVOID(b))
                r = copy_intArrayType(a);
  
!       if (r)
!               dr = ARRPTR(r);
        else
        {
                na = ARRNELEMS(a);
                nb = ARRNELEMS(b);
                da = ARRPTR(a);
--- 94,108 ----
        if (ARRISVOID(b))
                r = copy_intArrayType(a);
  
!       if (r)  /* Only one of the arrays isn't void and it's assigned to r. */
!       {
!               if (ARRNELEMS(r) > 1)
!                       r = _int_unique(r);                             /* 
Remove repetitive values. */
!       }
        else
        {
+               int k;
+               
                na = ARRNELEMS(a);
                nb = ARRNELEMS(b);
                da = ARRPTR(a);
***************
*** 106,128 ****
                r = new_intArrayType(na + nb);
                dr = ARRPTR(r);
  
!               /* union */
!               i = j = 0;
!               while (i < na && j < nb)
!                       if (da[i] < db[j])
!                               *dr++ = da[i++];
!                       else
!                               *dr++ = db[j++];
! 
!               while (i < na)
!                       *dr++ = da[i++];
!               while (j < nb)
!                       *dr++ = db[j++];
! 
        }
  
!       if (ARRNELEMS(r) > 1)
!               r = _int_unique(r);
  
        return r;
  }
--- 111,164 ----
                r = new_intArrayType(na + nb);
                dr = ARRPTR(r);
  
! /* Move i to next new integer in the array. */
! #define NEXT_NEW(arr, len, i) \
!       if (i < len) \
!       { \
!               while (++i < len) \
!                       if (arr[i] != arr[i-1]) \
!                               break; \
        }
+               
+               /*
+                * Form sorted union of arrays A and B.
+                * (Repetitive values will be discarded.)
+                */
+               for (i = j = k = 0; i < na || j < nb; k++)
+               {
+                       if (i >= na)                                    /* A is 
exhausted. */
+                       {
+                               dr[k] = db[j];
+                               NEXT_NEW(db, nb, j);
+                       }
+                       else if (j >= nb)                               /* B is 
exhausted. */
+                       {
+                               dr[k] = da[i];
+                               NEXT_NEW(da, na, i);
+                       }
+                       else                                                    
/* a[] and b[] still isn't exhausted. */
+                       {
+                               if (da[i] < db[j])
+                               {
+                                       dr[k] = da[i];
+                                       NEXT_NEW(da, na, i);
+                               }
+                               else if (da[i] > db[j])
+                               {
+                                       dr[k] = db[j];
+                                       NEXT_NEW(db, nb, j);
+                               }
+                               else
+                               {
+                                       dr[k] = da[i];
+                                       NEXT_NEW(da, na, i);
+                                       NEXT_NEW(db, nb, j);
+                               }
+                       }
+               }
  
!               r = resize_intArrayType(r, k);
!       }
  
        return r;
  }
---------------------------(end of broadcast)---------------------------
TIP 5: don't forget to increase your free space map settings

Reply via email to