Changeset: 2e019d46b9e3 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2e019d46b9e3
Modified Files:
gdk/gdk_group.c
gdk/gdk_hash.c
monetdb5/optimizer/opt_deadcode.c
sql/backends/monet5/sql_optimizer.c
Branch: default
Log Message:
Merge with Jul2017 branch.
diffs (293 lines):
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -331,6 +331,48 @@
/* COMP */ cmp(v, BUNtail(bi, hb)) == 0 \
)
+/* reverse the bits of an OID value */
+static inline oid
+rev(oid x)
+{
+#if SIZEOF_OID == 8
+ x = ((x & 0x5555555555555555) << 1) | ((x >> 1) & 0x5555555555555555);
+ x = ((x & 0x3333333333333333) << 2) | ((x >> 2) & 0x3333333333333333);
+ x = ((x & 0x0F0F0F0F0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F0F0F0F0F);
+ x = ((x & 0x00FF00FF00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF00FF00FF);
+ x = ((x & 0x0000FFFF0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF0000FFFF);
+ x = ((x & 0x00000000FFFFFFFF) << 32) | ((x >> 32) & 0x00000000FFFFFFFF);
+#else
+ x = ((x & 0x55555555) << 1) | ((x >> 1) & 0x55555555);
+ x = ((x & 0x33333333) << 2) | ((x >> 2) & 0x33333333);
+ x = ((x & 0x0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F);
+ x = ((x & 0x00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF);
+ x = ((x & 0x0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF);
+#endif
+ return x;
+}
+
+/* population count: count number of 1 bits in a value */
+static inline int
+pop(oid x)
+{
+ /* divide and conquer implementation */
+#if SIZEOF_OID == 8
+ x = (x & 0x5555555555555555) + ((x >> 1) & 0x5555555555555555);
+ x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
+ x = (x & 0x0F0F0F0F0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F0F0F0F0F);
+ x = (x & 0x00FF00FF00FF00FF) + ((x >> 8) & 0x00FF00FF00FF00FF);
+ x = (x & 0x0000FFFF0000FFFF) + ((x >> 16) & 0x0000FFFF0000FFFF);
+ x = (x & 0x00000000FFFFFFFF) + ((x >> 32) & 0x00000000FFFFFFFF);
+#else
+ x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
+ x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F);
+ x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF);
+ x = (x & 0x0000FFFF) + ((x >> 16) & 0x0000FFFF);
+#endif
+ return (int) x;
+}
#define GRP_create_partial_hash_table(INIT_0,INIT_1,HASH,COMP) \
do { \
@@ -396,7 +438,11 @@
assert(p < end); \
INIT_1; \
prb = HASH; \
- prb = (prb ^ (BUN) grps[r] << bits) & hs->mask;
\
+ /* cleverly combine group ID with */ \
+ /* hash value: group ID in top-most */ \
+ /* bits of hash by reversing the */ \
+ /* bits and shifting them in place */ \
+ prb = (prb ^ rev(grps[r]) >> bits) & hs->mask; \
for (hb = HASHget(hs, prb); \
hb != HASHnil(hs) && hb >= start; \
hb = HASHgetlink(hs, hb)) { \
@@ -552,13 +598,13 @@ BATgroup_internal(BAT **groups, BAT **ex
}
if (b->tkey || cnt <= 1 || (g && (g->tkey || BATtdense(g)))) {
/* grouping is trivial: 1 element per group */
- ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+ ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
"s=%s#" BUNFMT ","
"g=%s#" BUNFMT ","
"e=%s#" BUNFMT ","
"h=%s#" BUNFMT ",subsorted=%d): "
"trivial case: 1 element per group\n",
- BATgetId(b), BATcount(b),
+ BATgetId(b), BATcount(b), ATOMname(b->ttype),
s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -593,13 +639,13 @@ BATgroup_internal(BAT **groups, BAT **ex
/* all values are equal */
if (g == NULL || (BATordered(g) && BATordered_rev(g))) {
/* there's only a single group: 0 */
- ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+ ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT
"[%s],"
"s=%s#" BUNFMT ","
"g=%s#" BUNFMT ","
"e=%s#" BUNFMT ","
"h=%s#" BUNFMT ",subsorted=%d): "
- "trivial case: single output group\n",
- BATgetId(b), BATcount(b),
+ "trivial case: single output group\n",
+ BATgetId(b), BATcount(b), ATOMname(b->ttype),
s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -635,13 +681,13 @@ BATgroup_internal(BAT **groups, BAT **ex
* e/h available in order to copy them,
* otherwise we will need to calculate them
* which we will do using the "normal" case */
- ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+ ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT
"[%s],"
"s=%s#" BUNFMT ","
"g=%s#" BUNFMT ","
"e=%s#" BUNFMT ","
"h=%s#" BUNFMT ",subsorted=%d): "
- "trivial case: copy input groups\n",
- BATgetId(b), BATcount(b),
+ "trivial case: copy input groups\n",
+ BATgetId(b), BATcount(b), ATOMname(b->ttype),
s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -731,13 +777,13 @@ BATgroup_internal(BAT **groups, BAT **ex
((BATordered(b) || BATordered_rev(b)) &&
(g == NULL || BATordered(g) || BATordered_rev(g)))) {
/* we only need to compare each entry with the previous */
- ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+ ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
"s=%s#" BUNFMT ","
"g=%s#" BUNFMT ","
"e=%s#" BUNFMT ","
"h=%s#" BUNFMT ",subsorted=%d): "
"compare consecutive values\n",
- BATgetId(b), BATcount(b),
+ BATgetId(b), BATcount(b), ATOMname(b->ttype),
s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -788,13 +834,13 @@ BATgroup_internal(BAT **groups, BAT **ex
* last time we saw that group, so if the last time we
* saw the old group of the current value is within
* this range, we can reuse the new group */
- ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+ ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
"s=%s#" BUNFMT ","
"g=%s#" BUNFMT ","
"e=%s#" BUNFMT ","
"h=%s#" BUNFMT ",subsorted=%d): "
"subscan old groups\n",
- BATgetId(b), BATcount(b),
+ BATgetId(b), BATcount(b), ATOMname(b->ttype),
s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -943,13 +989,13 @@ BATgroup_internal(BAT **groups, BAT **ex
/* we already have a hash table on b, or b is
* persistent and we could create a hash table, or b
* is a view on a bat that already has a hash table */
- ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+ ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
"s=%s#" BUNFMT ","
"g=%s#" BUNFMT ","
"e=%s#" BUNFMT ","
"h=%s#" BUNFMT ",subsorted=%d): "
"use existing hash table\n",
- BATgetId(b), BATcount(b),
+ BATgetId(b), BATcount(b), ATOMname(b->ttype),
s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -1009,8 +1055,8 @@ BATgroup_internal(BAT **groups, BAT **ex
size_t nmelen;
Heap *hp = NULL;
BUN prb;
+ int bits;
BUN mask;
- int bits;
GDKclrerr(); /* not interested in BAThash errors */
@@ -1018,13 +1064,13 @@ BATgroup_internal(BAT **groups, BAT **ex
* build an incomplete hash table on the fly--also see
* BATassertProps for similar code; we also exploit if
* g is clustered */
- ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
+ ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT "[%s],"
"s=%s#" BUNFMT ","
"g=%s#" BUNFMT ","
"e=%s#" BUNFMT ","
"h=%s#" BUNFMT ",subsorted=%d): "
"create partial hash table%s\n",
- BATgetId(b), BATcount(b),
+ BATgetId(b), BATcount(b), ATOMname(b->ttype),
s ? BATgetId(s) : "NULL", s ? BATcount(s) : 0,
g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
@@ -1032,25 +1078,10 @@ BATgroup_internal(BAT **groups, BAT **ex
subsorted, gc ? " (g clustered)" : "");
nme = BBP_physical(b->batCacheid);
nmelen = strlen(nme);
- if (ATOMsize(t) == 1) {
- mask = 1 << 16;
- bits = 8;
- } else if (ATOMsize(t) == 2) {
- mask = 1 << 16;
- bits = 8;
- } else {
- /* when combining value and group-id hashes,
- * we left-shift one of them by half the
- * hash-mask width to better spread bits and
- * use the entire hash-mask, and thus reduce
- * collisions */
- mask = HASHmask(cnt) >> 3;
- bits = 3;
- while (mask >>= 1)
- bits++;
- bits /= 2;
- mask = HASHmask(cnt);
- }
+ mask = MAX(HASHmask(cnt), 1 << 16);
+ /* mask is a power of two, so pop(mask - 1) tells us
+ * which power of two */
+ bits = 8 * SIZEOF_OID - pop(mask - 1);
if ((hp = GDKzalloc(sizeof(Heap))) == NULL ||
(hp->farmid = BBPselectfarm(TRANSIENT, b->ttype, hashheap))
< 0 ||
(hp->filename = GDKmalloc(nmelen + 30)) == NULL ||
@@ -1058,7 +1089,7 @@ BATgroup_internal(BAT **groups, BAT **ex
"%s.hash" SZFMT, nme, MT_getpid()) < 0 ||
(ext = GDKstrdup(hp->filename + nmelen + 1)) == NULL ||
(hs = HASHnew(hp, b->ttype, BUNlast(b),
- MAX(HASHmask(cnt), 1 << 16), BUN_NONE)) ==
NULL) {
+ mask, BUN_NONE)) == NULL) {
if (hp) {
if (hp->filename)
GDKfree(hp->filename);
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -130,7 +130,7 @@ HASHnew(Heap *hp, int tpe, BUN size, BUN
((size_t *) hp->base)[2] = mask;
((size_t *) hp->base)[3] = width;
((size_t *) hp->base)[4] = count;
- ALGODEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask
" BUNFMT ",width %d, nil " BUNFMT ", total " BUNFMT " bytes);\n", size, mask,
width, h->nil, (size + mask) * width);
+ ALGODEBUG fprintf(stderr, "#HASHnew: create hash(size " BUNFMT ", mask
" BUNFMT ", width %d, nil " BUNFMT ", total " BUNFMT " bytes);\n", size, mask,
width, h->nil, (size + mask) * width);
return h;
}
diff --git a/monetdb5/optimizer/opt_deadcode.c
b/monetdb5/optimizer/opt_deadcode.c
--- a/monetdb5/optimizer/opt_deadcode.c
+++ b/monetdb5/optimizer/opt_deadcode.c
@@ -58,7 +58,7 @@ OPTdeadcodeImplementation(Client cntxt,
varused[getArg(p,0)]++; // force keeping
continue;
}
- if ( getModuleId(p) == batRef && isUpdateInstruction(p)){
+ if ( getModuleId(p) == batRef && isUpdateInstruction(p) &&
!p->barrier){
/* bat.append and friends are intermediates that need
not be retained
* unless they are used */
} else
diff --git a/sql/backends/monet5/sql_optimizer.c
b/sql/backends/monet5/sql_optimizer.c
--- a/sql/backends/monet5/sql_optimizer.c
+++ b/sql/backends/monet5/sql_optimizer.c
@@ -60,11 +60,24 @@ SQLgetColumnSize(sql_trans *tr, sql_colu
return size;
}
+/*
+ * The maximal space occupied by a query is calculated
+ * under the assumption that the complete database should fit in memory.
+ * The assumption is that the plan does not contain duplicate bind operations.
+ * Calculation of the precise footprint is much more complex
+ * and can not deal with intermediate structures, or fast
+ * access using sorted probing.
+ *
+ * A run where we only take the size of a table only once,
+ * caused major degration on SF100 Q3 with SSD(>6x)
+ */
+
static lng
SQLgetSpace(mvc *m, MalBlkPtr mb, int prepare)
{
sql_trans *tr = m->session->tr;
lng size,space = 0, i;
+ str lasttable = 0;
for (i = 0; i < mb->stop; i++) {
InstrPtr p = mb->stmt[i];
@@ -90,9 +103,10 @@ SQLgetSpace(mvc *m, MalBlkPtr mb, int pr
continue;
/* we have to sum the cost of all three components of a
BAT */
- if (c && (!isRemote(c->t) && !isMergeTable(c->t))) {
+ if (c && (!isRemote(c->t) && !isMergeTable(c->t)) &&
(lasttable == 0 || strcmp(lasttable,tname)==0)) {
size = SQLgetColumnSize(tr, c, access);
- space += size; // accumulate once
+ space += size; // accumulate once per table
+ //lasttable = tname; invalidate this attempt
if( !prepare && size == 0 && ! t->system){
//mnstr_printf(GDKout,"found empty
column %s.%s.%s prepare %d size "LLFMT"\n",sname,tname,cname,prepare,size);
setFunctionId(p, emptybindRef);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list