Changeset: f913719e8320 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/f913719e8320
Modified Files:
        gdk/gdk_batop.c
        monetdb5/modules/mal/remote.c
        sql/backends/monet5/sql.c
        sql/server/rel_optimizer.c
        sql/server/rel_schema.c
        sql/storage/bat/bat_logger.c
Branch: default
Log Message:

Merge with Jul2021 branch.


diffs (truncated from 440 to 300 lines):

diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -61,13 +61,13 @@ insert_string_bat(BAT *b, BAT *n, struct
        BATiter ni;             /* iterator */
        size_t toff = ~(size_t) 0;      /* tail offset */
        BUN p, r;               /* loop variables */
-       const void *tp;         /* tail value pointer */
+       const void *tp = NULL;  /* tail value pointer */
        unsigned char tbv;      /* tail value-as-bte */
        unsigned short tsv;     /* tail value-as-sht */
 #if SIZEOF_VAR_T == 8
        unsigned int tiv;       /* tail value-as-int */
 #endif
-       var_t v = GDK_VAROFFSET; /* value */
+       var_t v;                /* value */
        size_t off;             /* offset within n's string heap */
        BUN cnt = ci->ncand;
        BUN oldcnt = BATcount(b);
@@ -80,143 +80,88 @@ insert_string_bat(BAT *b, BAT *n, struct
        if (cnt == 0)
                return GDK_SUCCEED;
        ni = bat_iterator(n);
-       tp = NULL;
-       if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
-                           !GDK_ELIMDOUBLES(ni.vh))) {
-               if (b->batRole == TRANSIENT || b->tvheap == ni.vh) {
-                       /* If b is in the transient farm (i.e. b will
-                        * never become persistent), we try some
-                        * clever tricks to avoid copying:
-                        * - if b is empty, we just let it share the
-                         *   string heap with n;
-                        * - otherwise, if b's string heap and n's
-                         *   string heap are the same (i.e. shared),
-                         *   we leave it that way (this includes the
-                         *   case that b is persistent and n shares
-                         *   its string heap with b);
-                        * - otherwise, if b shares its string heap
-                         *   with some other bat, we materialize it
-                         *   and we will have to copy strings.
-                        */
-                       bat bid = b->batCacheid;
 
-                       /* if candidates are not dense, there is no
-                        * wholesale copying of n's offset heap, but
-                        * we may still be able to share the string
-                        * heap */
-                       if (mayshare &&
-                           oldcnt == 0 &&
-                           b->tvheap != ni.vh &&
-                           ci->tpe == cand_dense) {
-                               /* make sure locking happens in a
-                                * predictable order: lowest id
-                                * first */
-                               MT_thread_setalgorithm("share vheap, copy 
heap");
-                               MT_lock_set(&b->theaplock);
-                               if (b->tvheap->parentid != bid)
-                                       BBPunshare(b->tvheap->parentid);
-                               HEAPdecref(b->tvheap, true);
-                               HEAPincref(ni.vh);
-                               b->tvheap = ni.vh;
-                               BBPshare(ni.vh->parentid);
-                               b->batDirtydesc = true;
-                               MT_lock_unset(&b->theaplock);
-                               toff = 0;
-                               v = ni.width == 1 ? GDK_VAROFFSET + 1 :
-                                       ni.width == 2 ? GDK_VAROFFSET + (1 << 
9) :
-#if SIZEOF_VAR_T == 8
-                                       ni.width != 4 ? (var_t) 1 << 33 :
-#endif
-                                       (var_t) 1 << 17;
-                       } else if (b->tvheap->parentid == ni.vh->parentid &&
-                                  ci->tpe == cand_dense) {
-                               MT_thread_setalgorithm("copy heap");
-                               toff = 0;
-                       } else if (b->tvheap->parentid != bid &&
-                                  unshare_varsized_heap(b) != GDK_SUCCEED) {
-                               bat_iterator_end(&ni);
-                               return GDK_FAIL;
-                       }
-               } else if (oldcnt == 0) {
-                       v = ni.width == 1 ? GDK_VAROFFSET + 1 :
-                               ni.width == 2 ? GDK_VAROFFSET + (1 << 9) :
-#if SIZEOF_VAR_T == 8
-                               ni.width != 4 ? (var_t) 1 << 33 :
-#endif
-                               (var_t) 1 << 17;
-                       MT_thread_setalgorithm("copy vheap, copy heap");
-                       if (b->tvheap->size < ni.vh->free) {
-                               if (HEAPgrow(&b->theaplock, &b->tvheap, 
ni.vh->free, force) != GDK_SUCCEED) {
-                                       bat_iterator_end(&ni);
-                                       return GDK_FAIL;
-                               }
-                       }
-                       memcpy(b->tvheap->base, ni.vh->base, ni.vh->free);
-                       b->tvheap->free = ni.vh->free;
-                       toff = 0;
+       if (b->tvheap == ni.vh) {
+               /* vheaps are already shared, continue doing so: we just
+                * need to append the offsets */
+               toff = 0;
+               MT_thread_setalgorithm("shared vheap");
+       } else if (mayshare && b->batRole == TRANSIENT && oldcnt == 0) {
+               /* we can share the vheaps, so we then only need to
+                * append the offsets */
+               MT_lock_set(&b->theaplock);
+               if (b->tvheap->parentid != b->batCacheid)
+                       BBPunshare(b->tvheap->parentid);
+               HEAPdecref(b->tvheap, b->tvheap->parentid == b->batCacheid);
+               HEAPincref(ni.vh);
+               b->tvheap = ni.vh;
+               BBPshare(ni.vh->parentid);
+               b->batDirtydesc = true;
+               MT_lock_unset(&b->theaplock);
+               toff = 0;
+               MT_thread_setalgorithm("share vheap");
+       } else {
+               /* no heap sharing, so also make sure the heap isn't
+                * shared currently (we're not allowed to write in
+                * another bat's heap) */
+               if (b->tvheap->parentid != b->batCacheid &&
+                   unshare_varsized_heap(b) != GDK_SUCCEED) {
+                       bat_iterator_end(&ni);
+                       return GDK_FAIL;
                }
-               if (toff == ~(size_t) 0 && cnt > 1024 && b->tvheap->free >= 
ni.vh->free) {
-                       /* If b and n aren't sharing their string
-                        * heaps, we try to determine whether to copy
-                        * n's whole string heap to the end of b's, or
-                        * whether we will insert each string from n
-                        * individually.  We do this by testing a
-                        * sample of n's strings and extrapolating
-                        * from that sample whether n uses a
-                        * significant part of its string heap for its
-                        * strings (i.e. whether there are many unused
-                        * strings in n's string heap).  If n doesn't
-                        * have many strings in the first place, we
-                        * skip this and just insert them all
-                        * individually.  We also check whether a
-                        * significant number of n's strings happen to
-                        * have the same offset in b.  In the latter
-                        * case we also want to insert strings
-                        * individually, but reusing the string in b's
-                        * string heap. */
-                       int match = 0, i;
+               if (oldcnt == 0 || (!GDK_ELIMDOUBLES(b->tvheap) &&
+                                   !GDK_ELIMDOUBLES(ni.vh))) {
+                       /* we'll consider copying the string heap completely
+                        *
+                        * we first estimate how much space the string heap
+                        * should occupy, given the number of rows we need to
+                        * insert, then, if that is way smaller than the actual
+                        * space occupied, we will skip the copy and just insert
+                        * one by one */
                        size_t len = 0;
-                       for (i = 0; i < 1024; i++) {
+                       for (int i = 0; i < 1024; i++) {
                                p = (BUN) (((double) rand() / RAND_MAX) * (cnt 
- 1));
                                p = canditer_idx(ci, p) - n->hseqbase;
-                               off = BUNtvaroff(ni, p);
-                               if (off < b->tvheap->free &&
-                                   strcmp(b->tvheap->base + off, ni.vh->base + 
off) == 0)
-                                       match++;
-                               len += (strlen(ni.vh->base + off) + 8) & ~7;
+                               len += strlen(BUNtvar(ni, p)) + 1;
                        }
-                       if (match < 768 && (size_t) (ni.count * (double) len / 
1024) >= ni.vh->free / 2) {
-                               /* append string heaps */
-                               toff = oldcnt == 0 ? 0 : b->tvheap->free;
-                               /* make sure we get alignment right */
-                               toff = (toff + GDK_VARALIGN - 1) & 
~(GDK_VARALIGN - 1);
-                               /* if in "force" mode, the heap may be
-                                * shared when memory mapped */
+                       len = (len + 512) / 1024; /* rounded average length */
+                       r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
+                       /* r is estimate of number of strings in
+                        * double-eliminated area */
+                       if (r < ci->ncand)
+                               len = GDK_ELIMLIMIT + (ci->ncand - r) * len;
+                       else
+                               len = GDK_STRHASHSIZE + ci->ncand * (len + 12);
+                       /* len is total estimated expected size of vheap */
+
+                       if (len > ni.vh->free / 2) {
+                               /* we copy the string heap, perhaps appending */
+                               if (oldcnt == 0) {
+                                       toff = 0;
+                                       MT_thread_setalgorithm("copy vheap");
+                               } else {
+                                       toff = (b->tvheap->free + GDK_VARALIGN 
- 1) & ~(GDK_VARALIGN - 1);
+                                       MT_thread_setalgorithm("append vheap");
+                               }
+
                                if (HEAPgrow(&b->theaplock, &b->tvheap, toff + 
ni.vh->size, force) != GDK_SUCCEED) {
                                        bat_iterator_end(&ni);
                                        return GDK_FAIL;
                                }
-                               MT_thread_setalgorithm("append vheap");
                                memcpy(b->tvheap->base + toff, ni.vh->base, 
ni.vh->free);
                                b->tvheap->free = toff + ni.vh->free;
-                               if (toff > 0) {
-                                       /* flush double-elimination
-                                        * hash table */
-                                       memset(b->tvheap->base, 0,
-                                              GDK_STRHASHSIZE);
-                               }
-                               /* make sure b is wide enough */
-                               v = b->tvheap->free;
                        }
                }
-       } else if (b->tvheap != ni.vh &&
-                  unshare_varsized_heap(b) != GDK_SUCCEED) {
-               bat_iterator_end(&ni);
-               return GDK_FAIL;
        }
+       /* if toff has the initial value of ~0, we insert strings
+        * individually, otherwise we only copy (insert) offsets */
+       if (toff == ~(size_t) 0)
+               v = GDK_VAROFFSET;
+       else
+               v = b->tvheap->free - 1;
 
        /* make sure there is (vertical) space in the offset heap, we
-        * may also widen if v was set to some limit above */
+        * may also widen thanks to v, set above */
        if (GDKupgradevarheap(b, v, oldcnt + cnt < b->batCapacity ? 
b->batCapacity : oldcnt + cnt, b->batCount) != GDK_SUCCEED) {
                bat_iterator_end(&ni);
                return GDK_FAIL;
@@ -225,6 +170,7 @@ insert_string_bat(BAT *b, BAT *n, struct
        if (toff == 0 && ni.width == b->twidth && ci->tpe == cand_dense) {
                /* we don't need to do any translation of offset
                 * values, so we can use fast memcpy */
+               MT_thread_setalgorithm("memcpy offsets");
                memcpy(Tloc(b, BUNlast(b)), (const char *) ni.base + ((ci->seq 
- n->hseqbase) << ni.shift), cnt << ni.shift);
        } else if (toff != ~(size_t) 0) {
                /* we don't need to insert any actual strings since we
diff --git a/monetdb5/modules/mal/remote.c b/monetdb5/modules/mal/remote.c
--- a/monetdb5/modules/mal/remote.c
+++ b/monetdb5/modules/mal/remote.c
@@ -1517,10 +1517,16 @@ static str RMTbincopyto(Client cntxt, Ma
 
        sendtheap = b->ttype != TYPE_void && b->tvarsized;
        if (isVIEW(b) && sendtheap && VIEWvtparent(b) && BATcount(b) < 
BATcount(BBP_cache(VIEWvtparent(b)))) {
-               if ((b = BATdescriptor(bid)) == NULL)
+               if ((b = BATdescriptor(bid)) == NULL) {
+                       BBPunfix(bid);
                        throw(MAL, "remote.bincopyto", RUNTIME_OBJECT_MISSING);
+               }
                v = COLcopy(b, b->ttype, true, TRANSIENT);
                BBPunfix(b->batCacheid);
+               if (v == NULL) {
+                       BBPunfix(bid);
+                       throw(MAL, "remote.bincopyto", GDK_EXCEPTION);
+               }
        }
 
        mnstr_printf(cntxt->fdout, /*JSON*/"{"
diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -2601,7 +2601,7 @@ mvc_export_table_wrap( Client cntxt, Mal
        }
        if ((ok = mvc_export_result(cntxt->sqlcontext, s, res, tostdout, 
mb->starttime, mb->optimize)) < 0) {
                msg = createException(SQL, "sql.resultSet", SQLSTATE(45000) 
"Result set construction failed: %s", mvc_export_error(cntxt->sqlcontext, s, 
ok));
-               if (!tostdout)
+               if (!onclient && !tostdout)
                        close_stream(s);
                goto wrapup_result_set1;
        }
@@ -2832,7 +2832,7 @@ mvc_export_row_wrap( Client cntxt, MalBl
        }
        if ((ok = mvc_export_result(cntxt->sqlcontext, s, res, strcmp(filename, 
"stdout") == 0, mb->starttime, mb->optimize)) < 0) {
                msg = createException(SQL, "sql.resultSet", SQLSTATE(45000) 
"Result set construction failed: %s", mvc_export_error(cntxt->sqlcontext, s, 
ok));
-               if (!tostdout)
+               if (!onclient && !tostdout)
                        close_stream(s);
                goto wrapup_result_set;
        }
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -9532,7 +9532,8 @@ rel_basecount(visitor *v, sql_rel *rel)
                if (is_basetable(bt->op) && !e->l) { /* count(*) */
                        /* change into select cnt('schema','table') */;
                        sql_table *t = bt->l;
-                       if (!isTable(t))
+                       /* I need to get the declared table's frame number to 
make this work correctly for those */
+                       if (!isTable(t) || isDeclaredTable(t))
                                return rel;
                        sql_subfunc *cf = sql_bind_func(v->sql, "sys", "cnt", 
sql_bind_localtype("str"), sql_bind_localtype("str"), F_FUNC);
                        list *exps = sa_list(v->sql->sa);
diff --git a/sql/server/rel_schema.c b/sql/server/rel_schema.c
--- a/sql/server/rel_schema.c
+++ b/sql/server/rel_schema.c
@@ -1472,6 +1472,8 @@ sql_drop_view(sql_query *query, dlist *q
                }
                return NULL;
        }
+       if (!isView(t))
+               return sql_error(sql, 02, SQLSTATE(42000) "DROP VIEW: unable to 
drop view '%s': is a table", tname);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to