Changeset: fef24f868a70 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=fef24f868a70
Modified Files:
gdk/gdk.h
gdk/gdk_align.c
gdk/gdk_batop.c
gdk/gdk_cbp.c
gdk/gdk_col.c
gdk/gdk_delta.c
gdk/gdk_search.c
gdk/gdk_storage.c
Branch: headless
Log Message:
Revamped sorting bits.
b->sorted is now truly a single bit which is set if the column is
sorted in ascending order. There is another bit, b->revsorted, which
is set if the column is sorted in descending (reverse) order. Both
bits can be set at the same time, implying that all values in the
column are equal.
If the bits are unset, the column is either known to be unsorted in
the relevant order, or it is not known whether the column is sorted.
The idiom (COLisordered(b) & 1) should be completely removed.
diffs (truncated from 1064 to 300 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -841,14 +841,14 @@
unsigned short width; /* byte-width of the atom array */
chr type; /* type id. */
chr shift; /* log2 of bunwidth */
- bit sorted; /* 0=false, 1=true; */
unsigned char
varsized:1, /* varsized(>0) or fixedsized(0). */
key:2, /* duplicates allowed? */
dense:1,
nonil:1, /* nonil isn't propchecked yet */
nil:1, /* nil is set when we found one nil (propcheck)
*/
- unused1:2;
+ sorted:1, /* column is sorted */
+ revsorted:1; /* column is reverse sorted */
oid align; /* OID for sync alignment */
oid nosorted_rev; /* position that proves sorted_rev==FALSE */
oid nokey[2]; /* positions that prove key ==FALSE */
@@ -1676,10 +1676,6 @@
* BUNs of a COL in reverse order. It just reverses the sequence, so
* this does not necessarily mean that they are sorted in reverse order!
*/
-#define GDK_SORTED_REV 128 /* reversely sorted */
-#define GDK_SORTED 65 /* 65 = (32 bits radix_clustered)<<1 + 1 */
-#define REVERT_SORTED(o) \
- ((o)==GDK_SORTED?GDK_SORTED_REV:((o)==GDK_SORTED_REV?GDK_SORTED:0))
gdk_export COL *COLsort(COL *b);
gdk_export COL *COLsort_rev(COL *b);
@@ -1699,7 +1695,7 @@
gdk_export int GDKssort(void *h, void *t, void *base, size_t n, int s, int
tpe);
gdk_export int GDKssort_rev(void *h, void *t, void *base, size_t n, int s, int
tpe);
-#define COLisordered(b) (((b)->type ==
TYPE_void)?GDK_SORTED:(b)->sorted)
+#define COLisordered(b) (((b)->type == TYPE_void) || (b)->sorted)
#define COLdense(b) (COLvoid(b) && (b)->seqbase != oid_nil)
#define COLvoid(b) (((b)->dense&(b)->sorted&1) || (b)->type==TYPE_void)
#define COLiskey(b) (b->key != FALSE || COLdense(b))
@@ -1708,13 +1704,30 @@
#define COLsettrivprop(b) \
do { \
int trivial = (b)->count <= 1; \
- if (!(b)->dense && (b)->type == TYPE_void && (b)->seqbase !=
oid_nil) { \
- (b)->dense = 1; \
- (b)->descdirty = TRUE; \
- } \
- if (!(b)->nonil && (b)->type == TYPE_void && (b)->seqbase !=
oid_nil) { \
- (b)->nonil = 1; \
- (b)->descdirty = TRUE; \
+ if ((b)->type == TYPE_void) { \
+ if ((b)->seqbase != oid_nil) { \
+ if (!(b)->dense) { \
+ (b)->dense = TRUE; \
+ (b)->descdirty = TRUE; \
+ } \
+ if (!(b)->nonil) { \
+ (b)->nonil = TRUE; \
+ (b)->descdirty = TRUE; \
+ } \
+ } else { \
+ if (!(b)->nil) { \
+ (b)->nil = TRUE; \
+ (b)->descdirty = TRUE; \
+ } \
+ if (!(b)->revsorted) { \
+ (b)->revsorted = TRUE; \
+ (b)->descdirty = TRUE; \
+ } \
+ } \
+ if (!(b)->sorted) { \
+ (b)->sorted = TRUE; \
+ (b)->descdirty = TRUE; \
+ } \
} \
if (trivial) { \
oid sqbs; \
@@ -1722,19 +1735,25 @@
if (!(b)->dense && \
(b)->type == TYPE_oid && \
(sqbs = (b)->count == 0 ? 0 : * (oid *) BUNhead(bi,
BUNfirst(b))) != oid_nil) { \
+ (b)->dense = TRUE; \
+ COLseqbase((b), sqbs); \
(b)->descdirty = TRUE; \
- (b)->dense = 1; \
- COLseqbase((b), sqbs); \
+ } \
+ if (!(b)->revsorted) { \
+ (b)->revsorted = TRUE; \
+ (b)->descdirty = TRUE; \
} \
} \
if ((trivial && (b)->type != TYPE_void) || (b)->dense) { \
- if ((b)->sorted != GDK_SORTED) \
+ if (!(b)->sorted) { \
+ (b)->sorted = TRUE; \
(b)->descdirty = TRUE; \
- (b)->sorted = GDK_SORTED; \
+ } \
if (!(b)->key) \
COLkey((b), TRUE); \
} \
} while (0)
+
/*
*
* COL Buffer Pool
@@ -3003,8 +3022,8 @@
/*
- * loop over a COL with ordered tail
- * ---------------------------------
+ * loop over a sorted COL
+ * ----------------------
*
* Here we loop over a COL with an ordered tail column (see for instance
* @%COLsort@). Again, 'p' and 'q' are iteration variables, where 'p'
@@ -3014,94 +3033,12 @@
* The 's' finally is an integer denoting the bunsize, used for speed.
*/
#define SORTloop(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop: COL not sorted.\n"); \
- else for (p = (ATOMcmp((b)->ttype,tl,ATOMnilptr((b)->ttype))? \
- SORTfndfirst(b,tl):BUNfirst(b)), \
- q = (ATOMcmp((b)->ttype,th,ATOMnilptr((b)->ttype))? \
- SORTfndlast(b,th):BUNlast(b)); p < q; p++)
+ if (!(b)->sorted) GDKerror("SORTloop: COL not sorted.\n"); \
+ else for (p = (ATOMcmp((b)->type, tl, ATOMnilptr((b)->type)) ? \
+ SORTfndfirst(b, tl) : BUNfirst(b)), \
+ q = (ATOMcmp((b)->type, th, ATOMnilptr((b)->type)) ? \
+ SORTfndlast(b, th) : BUNlast(b)); p < q; p++)
-#define SORTloop_chr(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_chr: COL not sorted.n"); \
- else for (p =
simple_EQ(tl,&chr_nil,chr)?BUNfirst(b):SORTfndfirst_chr(b,tl), \
- q =
simple_EQ(th,&chr_nil,chr)?BUNfirst(b):SORTfndlast_chr(b,th); p < q; p++)
-
-#define SORTloop_bte(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_bte: COL not sorted.n"); \
- else for (p =
simple_EQ(tl,&bte_nil,bte)?BUNfirst(b):SORTfndfirst_bte(b,tl), \
- q =
simple_EQ(th,&bte_nil,bte)?BUNfirst(b):SORTfndlast_bte(b,th); p < q; p++)
-
-#define SORTloop_sht(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_sht: COL not sorted.n"); \
- else for (p =
simple_EQ(tl,&sht_nil,sht)?BUNfirst(b):SORTfndfirst_sht(b,tl), \
- q =
simple_EQ(th,&sht_nil,sht)?BUNfirst(b):SORTfndlast_sht(b,th); p < q; p++)
-
-#define SORTloop_int(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_int: COL not sorted.n"); \
- else for (p =
simple_EQ(tl,&int_nil,int)?BUNfirst(b):SORTfndfirst_int(b,tl), \
- q =
simple_EQ(th,&int_nil,int)?BUNfirst(b):SORTfndlast_int(b,th); p < q; p++)
-
-#define SORTloop_flt(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_flt: COL not sorted.n"); \
- else for (p =
simple_EQ(tl,&flt_nil,flt)?BUNfirst(b):SORTfndfirst_flt(b,tl), \
- q =
simple_EQ(th,&flt_nil,flt)?BUNfirst(b):SORTfndlast_flt(b,th); p < q; p++)
-
-#define SORTloop_lng(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_lng: COL not sorted.n"); \
- else for (p =
simple_EQ(tl,&lng_nil,lng)?BUNfirst(b):SORTfndfirst_lng(b,tl), \
- q =
simple_EQ(th,&lng_nil,lng)?BUNfirst(b):SORTfndlast_lng(b,th); p < q; p++)
-
-#define SORTloop_dbl(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_dbl: COL not sorted.n"); \
- else for (p =
simple_EQ(tl,&dbl_nil,dbl)?BUNfirst(b):SORTfndfirst_dbl(b,tl), \
- q =
simple_EQ(th,&dbl_nil,dbl)?BUNfirst(b):SORTfndlast_dbl(b,th); p < q; p++)
-
-#define SORTloop_loc(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_loc: COL not sorted.n"); \
- else for (p =
atom_EQ(tl,ATOMnilptr((b)->ttype),(b)->ttype)?BUNfirst(b):SORTfndfirst_loc(b,tl),
\
- q =
atom_EQ(th,ATOMnilptr((b)->ttype),(b)->ttype)?BUNfirst(b):SORTfndlast_loc(b,th);
p < q; p++)
-
-#define SORTloop_var(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_var: COL not sorted.n"); \
- else for (p =
atom_EQ(tl,ATOMnilptr((b)->ttype),(b)->ttype)?BUNfirst(b):SORTfndfirst_var(b,tl),
\
- q =
atom_EQ(th,ATOMnilptr((b)->ttype),(b)->ttype)?BUNfirst(b):SORTfndlast_var(b,th);
p < q; p++)
-
-
-/* OIDDEPEND */
-#if SIZEOF_OID == SIZEOF_INT
-#define SORTfnd_oid(b,v) SORTfnd_int(b,v)
-#define SORTfndfirst_oid(b,v) SORTfndfirst_int(b,v)
-#define SORTfndlast_oid(b,v) SORTfndlast_int(b,v)
-#define SORTloop_oid(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_oid: COL not sorted.n"); \
- else for (p =
simple_EQ(tl,&oid_nil,oid)?BUNfirst(b):SORTfndfirst_int(b,tl), \
- q =
simple_EQ(th,&oid_nil,oid)?BUNfirst(b):SORTfndlast_int(b,th); p < q; p++)
-#else
-#define SORTfnd_oid(b,v) SORTfnd_lng(b,v)
-#define SORTfndfirst_oid(b,v) SORTfndfirst_lng(b,v)
-#define SORTfndlast_oid(b,v) SORTfndlast_lng(b,v)
-#define SORTloop_oid(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_oid: COL not sorted.n"); \
- else for (p =
simple_EQ(tl,&oid_nil,oid)?BUNfirst(b):SORTfndfirst_lng(b,tl), \
- q =
simple_EQ(th,&oid_nil,oid)?BUNfirst(b):SORTfndlast_lng(b,th); p < q; p++)
-#endif
-#if SIZEOF_WRD == SIZEOF_INT
-#define SORTfnd_wrd(b,v) SORTfnd_int(b,v)
-#define SORTfndfirst_wrd(b,v) SORTfndfirst_int(b,v)
-#define SORTfndlast_wrd(b,v) SORTfndlast_int(b,v)
-#define SORTloop_wrd(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_wrd: COL not sorted.n"); \
- else for (p =
simple_EQ(tl,&wrd_nil,wrd)?BUNfirst(b):SORTfndfirst_int(b,tl), \
- q =
simple_EQ(th,&wrd_nil,wrd)?BUNfirst(b):SORTfndlast_int(b,th); p < q; p++)
-#else
-#define SORTfnd_wrd(b,v) SORTfnd_lng(b,v)
-#define SORTfndfirst_wrd(b,v) SORTfndfirst_lng(b,v)
-#define SORTfndlast_wrd(b,v) SORTfndlast_lng(b,v)
-#define SORTloop_wrd(b,p,q,tl,th) \
- if (!(COLtordered(b)&1)) GDKerror("SORTloop_wrd: COL not sorted.n"); \
- else for (p =
simple_EQ(tl,&wrd_nil,wrd)?BUNfirst(b):SORTfndfirst_lng(b,tl), \
- q =
simple_EQ(th,&wrd_nil,wrd)?BUNfirst(b):SORTfndlast_lng(b,th); p < q; p++)
-#endif
-#define SORTloop_bit(b,p,q,tl,th) SORTloop_chr(b,p,q,tl,th)
/*
*
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -136,7 +136,8 @@
b1->nonil = b2->nonil;
}
COLkey(b1, COLiskey(b2));
- b1->sorted = COLisordered(b2);
+ b1->sorted = b2->sorted;
+ b1->revsorted = b2->revsorted;
b1->align = b2->align;
b1->descdirty = TRUE;
b1->nosorted_rev = (oid) (b2->nosorted_rev + diff);
@@ -227,6 +228,7 @@
bn->type = h->type;
bn->shift = h->shift;
bn->sorted = h->sorted;
+ bn->revsorted = h->revsorted;
bn->varsized = h->varsized;
bn->key = h->key;
bn->dense = h->dense;
@@ -290,7 +292,6 @@
COL *
COLmaterialize(COL *b)
{
- int ht;
oid cnt;
Heap head;
oid p, q;
@@ -298,7 +299,6 @@
COLcheck(b, "COLmaterialize");
assert(!isVIEW(b));
- ht = b->type;
cnt = COLcapacity(b);
head = b->heap;
p = BUNfirst(b);
@@ -306,11 +306,10 @@
assert(cnt >= q - p);
ALGODEBUG THRprintf(GDKout, "#COLmaterialize(%d);\n", (int)
b->batCacheid);
- if (!COLdense(b) || ht != TYPE_void) {
+ if (!COLdense(b) || b->type != TYPE_void) {
/* no voids */
return b;
}
- ht = TYPE_oid;
/* cleanup possible ACC's */
HASHdestroy(b);
@@ -322,7 +321,7 @@
}
/* point of no return */
- b->type = ht;
+ b->type = TYPE_oid;
COLsetdims(b);
b->dirty = TRUE;
b->descdirty = TRUE;
@@ -331,6 +330,10 @@
/* set the correct dense info */
b->dense = TRUE;
+ /* this shouldn't be necessary */
+ b->sorted = TRUE;
+ b->revsorted = b->count <= 1;
+
/* So now generate [h..h+cnt-1] */
h = b->seqbase;
x = (oid *) b->heap.base;
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -276,23 +276,25 @@
COLiter ni = col_iterator(n);
COLiter bi = col_iterator(b);
int xx = ATOMcmp(b->type, BUNhead(ni, BUNfirst(n)),
BUNhead(bi, last));
- if ((COLisordered(b) & 1) && ((COLisordered(n) & 1) ==
0 || xx < 0)) {
+ if (b->sorted && (!n->sorted || xx < 0)) {
b->sorted = FALSE;
b->nosorted = last;
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list