Changeset: 28e6713b8d2d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=28e6713b8d2d
Modified Files:
        clients/Tests/MAL-signatures.stable.out
        clients/Tests/MAL-signatures.stable.out.int128
        clients/Tests/exports.stable.out
        gdk/ChangeLog
        gdk/gdk.h
        gdk/gdk_align.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_logger.c
        monetdb5/modules/kernel/bat5.c
        monetdb5/modules/kernel/bat5.h
        monetdb5/modules/kernel/bat5.mal
        monetdb5/modules/mal/Tests/inspect05.stable.out.int128
        monetdb5/modules/mal/mat.c
        sql/backends/monet5/sql.c
        sql/storage/bat/bat_storage.c
        sql/storage/bat/bat_table.c
        sql/test/BugTracker-2016/Tests/storagemodel.stable.out
        sql/test/BugTracker-2016/Tests/storagemodel.stable.out.int128
Branch: default
Log Message:

Implemented a candidate list for BATappend.


diffs (truncated from 1252 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -537,6 +537,8 @@ Ready.
 [ "bam",       "sam_export",   "pattern bam.sam_export(output_path:str):void 
",        "sam_exportf;", "Export results in the bam.export table to a SAM 
file"  ]
 [ "bam",       "seq_char",     "command 
bam.seq_char(ref_pos:int,alg_seq:str,alg_pos:int,alg_cigar:str):str ", 
"seq_char;",    "Calculate the character in the alignment string (alg_str) that 
is aligned to position 'ref_pos', conforming to the given cigar string" ]
 [ "bam",       "seq_length",   "command bam.seq_length(cigar:str):int ",       
"seq_length;",  "Calculate the real length of a DNA sequence, given its CIGAR 
string."  ]
+[ "bat",       "append",       "command 
bat.append(i:bat[:any_1],u:bat[:any_1],s:bat[:oid],force:bit):bat[:any_1] ",   
"BKCappend_cand_force_wrap;",   "append the content of u with candidate list s 
to i"    ]
+[ "bat",       "append",       "command 
bat.append(i:bat[:any_1],u:bat[:any_1],s:bat[:oid]):bat[:any_1] ",     
"BKCappend_cand_wrap;", "append the content of u with candidate list s to i"    
]
 [ "bat",       "append",       "command 
bat.append(i:bat[:any_1],u:bat[:any_1],force:bit):bat[:any_1] ",       
"BKCappend_force_wrap;",        "append the content of u to i"  ]
 [ "bat",       "append",       "command 
bat.append(i:bat[:any_1],u:any_1,force:bit):bat[:any_1] ",     
"BKCappend_val_force_wrap;",    "append the value u to i"       ]
 [ "bat",       "append",       "command 
bat.append(i:bat[:any_1],u:any_1):bat[:any_1] ",       "BKCappend_val_wrap;",  
"append the value u to i"       ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -641,6 +641,8 @@ Ready.
 [ "bam",       "sam_export",   "pattern bam.sam_export(output_path:str):void 
",        "sam_exportf;", "Export results in the bam.export table to a SAM 
file"  ]
 [ "bam",       "seq_char",     "command 
bam.seq_char(ref_pos:int,alg_seq:str,alg_pos:int,alg_cigar:str):str ", 
"seq_char;",    "Calculate the character in the alignment string (alg_str) that 
is aligned to position 'ref_pos', conforming to the given cigar string" ]
 [ "bam",       "seq_length",   "command bam.seq_length(cigar:str):int ",       
"seq_length;",  "Calculate the real length of a DNA sequence, given its CIGAR 
string."  ]
+[ "bat",       "append",       "command 
bat.append(i:bat[:any_1],u:bat[:any_1],s:bat[:oid],force:bit):bat[:any_1] ",   
"BKCappend_cand_force_wrap;",   "append the content of u with candidate list s 
to i"    ]
+[ "bat",       "append",       "command 
bat.append(i:bat[:any_1],u:bat[:any_1],s:bat[:oid]):bat[:any_1] ",     
"BKCappend_cand_wrap;", "append the content of u with candidate list s to i"    
]
 [ "bat",       "append",       "command 
bat.append(i:bat[:any_1],u:bat[:any_1],force:bit):bat[:any_1] ",       
"BKCappend_force_wrap;",        "append the content of u to i"  ]
 [ "bat",       "append",       "command 
bat.append(i:bat[:any_1],u:any_1,force:bit):bat[:any_1] ",     
"BKCappend_val_force_wrap;",    "append the value u to i"       ]
 [ "bat",       "append",       "command 
bat.append(i:bat[:any_1],u:any_1):bat[:any_1] ",       "BKCappend_val_wrap;",  
"append the value u to i"       ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -19,7 +19,7 @@ int ATOMlen(int id, const void *v);
 str ATOMname(int id);
 ptr ATOMnil(int id);
 int ATOMprint(int id, const void *val, stream *fd);
-gdk_return BATappend(BAT *b, BAT *c, bit force);
+gdk_return BATappend(BAT *b, BAT *n, BAT *s, bit force);
 void BATassertProps(BAT *b);
 atomDesc BATatoms[];
 BAT *BATattach(int tt, const char *heapfile, int role);
@@ -779,6 +779,8 @@ str BATXMLstr2xml(bat *ret, const bat *b
 str BATXMLxml2str(bat *ret, const bat *bid);
 str BATXMLxmltext(bat *ret, const bat *bid);
 str BATXMLxquery(bat *ret, const bat *bid, const char *const *expr);
+str BKCappend_cand_force_wrap(bat *r, const bat *bid, const bat *uid, const 
bat *sid, const bit *force);
+str BKCappend_cand_wrap(bat *r, const bat *bid, const bat *uid, const bat 
*sid);
 str BKCappend_force_wrap(bat *r, const bat *bid, const bat *uid, const bit 
*force);
 str BKCappend_val_force_wrap(bat *r, const bat *bid, const void *u, const bit 
*force);
 str BKCappend_val_wrap(bat *r, const bat *bid, const void *u);
diff --git a/gdk/ChangeLog b/gdk/ChangeLog
--- a/gdk/ChangeLog
+++ b/gdk/ChangeLog
@@ -1,6 +1,10 @@
 # ChangeLog file for MonetDB
 # This file is updated with Maddlog
 
+* Mon Dec  5 2016 Sjoerd Mullender <[email protected]>
+- BATappend now takes an optional (NULL if not used) candidate list for
+  the to-be-appended BAT.
+
 * Thu Dec  1 2016 Sjoerd Mullender <[email protected]>
 - New function BATkeyed(BAT *b) that determines (possibly using a hash
   table) whether all values in b are distinct.
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -1030,7 +1030,7 @@ gdk_export bte ATOMelmshift(int sz);
  * @- BUN manipulation
  * @multitable @columnfractions 0.08 0.7
  * @item BAT*
- * @tab BATappend (BAT *b, BAT *c, bit force)
+ * @tab BATappend (BAT *b, BAT *n, BAT *s, bit force)
  * @item BAT*
  * @tab BUNappend (BAT *b, ptr right, bit force)
  * @item BAT*
@@ -1262,7 +1262,7 @@ gdk_export bte ATOMelmshift(int sz);
 
 gdk_export gdk_return GDKupgradevarheap(BAT *b, var_t v, int copyall, int 
mayshare);
 gdk_export gdk_return BUNappend(BAT *b, const void *right, bit force);
-gdk_export gdk_return BATappend(BAT *b, BAT *c, bit force);
+gdk_export gdk_return BATappend(BAT *b, BAT *n, BAT *s, bit force);
 
 gdk_export gdk_return BUNdelete(BAT *b, oid o);
 gdk_export gdk_return BATdel(BAT *b, BAT *d);
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -451,7 +451,7 @@ VIEWreset(BAT *b)
                b->batCapacity = cnt;
 
                /* insert all of v in b, and quit */
-               BATappend(b, v, FALSE);
+               BATappend(b, v, NULL, FALSE);
                BBPreclaim(v);
        }
        return GDK_SUCCEED;
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -2002,6 +2002,12 @@ BATmode(BAT *b, int mode)
  *             then all values are equal.
  * revsorted   The column is reversely sorted (descending).  If
  *             also sorted, then all values are equal.
+ * nosorted    BUN position which proofs not sorted (given position
+ *             and one before are not ordered correctly).
+ * norevsorted BUN position which proofs not revsorted (given position
+ *             and one before are not ordered correctly).
+ * nokey       Pair of BUN positions that proof not all values are
+ *             distinct (i.e. values at given locations are equal).
  *
  * In addition there is a property "unique" that, when set, indicates
  * that values must be kept unique (and hence that the "key" property
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -17,6 +17,7 @@
 #include "monetdb_config.h"
 #include "gdk.h"
 #include "gdk_private.h"
+#include "gdk_cand.h"
 
 gdk_return
 unshare_string_heap(BAT *b)
@@ -55,11 +56,11 @@ unshare_string_heap(BAT *b)
  * of inserting individual strings.  See the comments in the code for
  * more information. */
 static gdk_return
-insert_string_bat(BAT *b, BAT *n, int force)
+insert_string_bat(BAT *b, BAT *n, BAT *s, int force)
 {
        BATiter ni;             /* iterator */
        size_t toff = ~(size_t) 0;      /* tail offset */
-       BUN p, q;               /* loop variables */
+       BUN p, r;               /* loop variables */
        const void *tp;         /* tail value pointer */
        unsigned char tbv;      /* tail value-as-bte */
        unsigned short tsv;     /* tail value-as-sht */
@@ -68,15 +69,21 @@ insert_string_bat(BAT *b, BAT *n, int fo
 #endif
        var_t v;                /* value */
        size_t off;             /* offset within n's string heap */
+       BUN start, end, cnt;
+       const oid *restrict cand = NULL, *candend = NULL;
 
        assert(b->ttype == TYPE_str);
        /* only transient bats can use some other bat's string heap */
        assert(b->batRole == TRANSIENT ||
               b->tvheap->parentid == abs(b->batCacheid));
-       if (n->batCount == 0)
+       if (n->batCount == 0 || (s && s->batCount == 0))
                return GDK_SUCCEED;
        ni = bat_iterator(n);
        tp = NULL;
+       CANDINIT(n, s, start, end, cnt, cand, candend);
+       cnt = cand ? (BUN) (candend - cand) : end - start;
+       if (cnt == 0)
+               return GDK_SUCCEED;
        if ((!GDK_ELIMDOUBLES(b->tvheap) || b->batCount == 0) &&
            !GDK_ELIMDOUBLES(n->tvheap) &&
            b->tvheap->hashash == n->tvheap->hashash) {
@@ -97,7 +104,12 @@ insert_string_bat(BAT *b, BAT *n, int fo
                         */
                        bat bid = b->batCacheid;
 
-                       if (b->batCount == 0 && b->tvheap != n->tvheap) {
+                       /* if cand != NULL, there is no wholesale
+                        * copying of n's offset heap, but we may
+                        * still be able to share the string heap */
+                       if (b->batCount == 0 &&
+                           b->tvheap != n->tvheap &&
+                           cand == NULL) {
                                if (b->tvheap->parentid != bid) {
                                        BBPunshare(b->tvheap->parentid);
                                } else {
@@ -107,14 +119,15 @@ insert_string_bat(BAT *b, BAT *n, int fo
                                BBPshare(n->tvheap->parentid);
                                b->tvheap = n->tvheap;
                                toff = 0;
-                       } else if (b->tvheap->parentid == n->tvheap->parentid) {
+                       } else if (b->tvheap->parentid == n->tvheap->parentid &&
+                                  cand == NULL) {
                                toff = 0;
                        } else if (b->tvheap->parentid != bid &&
                                   unshare_string_heap(b) != GDK_SUCCEED) {
                                return GDK_FAIL;
                        }
                }
-               if (toff == ~(size_t) 0 && n->batCount > 1024) {
+               if (toff == ~(size_t) 0 && cnt > 1024) {
                        /* If b and n aren't sharing their string
                         * heaps, we try to determine whether to copy
                         * n's whole string heap to the end of b's, or
@@ -136,7 +149,11 @@ insert_string_bat(BAT *b, BAT *n, int fo
                        int match = 0, i;
                        size_t len = b->tvheap->hashash ? 1024 * EXTRALEN : 0;
                        for (i = 0; i < 1024; i++) {
-                               p = (BUN) (((double) rand() / RAND_MAX) * 
(BATcount(n) - 1));
+                               p = (BUN) (((double) rand() / RAND_MAX) * (cnt 
- 1));
+                               if (cand)
+                                       p = cand[p] - n->hseqbase;
+                               else
+                                       p += start;
                                off = BUNtvaroff(ni, p);
                                if (off < b->tvheap->free &&
                                    strcmp(b->tvheap->base + off, 
n->tvheap->base + off) == 0 &&
@@ -214,9 +231,9 @@ insert_string_bat(BAT *b, BAT *n, int fo
        if (toff == 0 && n->twidth == b->twidth) {
                /* we don't need to do any translation of offset
                 * values, so we can use fast memcpy */
-               memcpy(Tloc(b, BUNlast(b)), Tloc(n, 0),
-                      BATcount(n) * n->twidth);
-               BATsetcount(b, BATcount(b) + BATcount(n));
+               memcpy(Tloc(b, BUNlast(b)), Tloc(n, start),
+                      cnt * n->twidth);
+               BATsetcount(b, BATcount(b) + cnt);
        } else if (toff != ~(size_t) 0) {
                /* we don't need to insert any actual strings since we
                 * have already made sure that they are all in b's
@@ -235,21 +252,30 @@ insert_string_bat(BAT *b, BAT *n, int fo
 #endif
                const var_t *restrict tvp = (const var_t *) Tloc(n, 0);
 
-               BATloop(n, p, q) {
+               for (;;) {
+                       if (cand) {
+                               if (cand == candend)
+                                       break;
+                               p = *cand++ - n->hseqbase;
+                       } else {
+                               p = start++;
+                       }
+                       if (p >= end)
+                               break;
                        switch (n->twidth) {
                        case 1:
-                               v = (var_t) *tbp++ + GDK_VAROFFSET;
+                               v = (var_t) tbp[p] + GDK_VAROFFSET;
                                break;
                        case 2:
-                               v = (var_t) *tsp++ + GDK_VAROFFSET;
+                               v = (var_t) tsp[p] + GDK_VAROFFSET;
                                break;
 #if SIZEOF_VAR_T == 8
                        case 4:
-                               v = (var_t) *tip++;
+                               v = (var_t) tip[p];
                                break;
 #endif
                        default:
-                               v = *tvp++;
+                               v = tvp[p];
                                break;
                        }
                        v = (var_t) ((((size_t) v << GDK_VARSHIFT) + toff) >> 
GDK_VARSHIFT);
@@ -279,8 +305,24 @@ insert_string_bat(BAT *b, BAT *n, int fo
                /* if b's string heap is much smaller than n's string
                 * heap, don't bother checking whether n's string
                 * values occur in b's string heap */
-               BATloop(n, p, q) {
-                       bunfastapp(b, BUNtvar(ni, p));
+               r = BUNlast(b);
+               if (cand) {
+                       oid hseq = n->hseqbase;
+                       while (cand < candend) {
+                               tp = BUNtvar(ni, *cand - hseq);
+                               bunfastapp(b, tp);
+                               HASHins(b, r, tp);
+                               r++;
+                               cand++;
+                       }
+               } else {
+                       while (start < end) {
+                               tp = BUNtvar(ni, start);
+                               bunfastapp(b, tp);
+                               HASHins(b, r, tp);
+                               r++;
+                               start++;
+                       }
                }
        } else {
                /* Insert values from n individually into b; however,
@@ -289,7 +331,17 @@ insert_string_bat(BAT *b, BAT *n, int fo
                 * string heap (in case b's string heap is a copy of
                 * n's).  If this is the case, we just copy the
                 * offset, otherwise we insert normally.  */
-               BATloop(n, p, q) {
+               r = BUNlast(b);
+               for (;;) {
+                       if (cand) {
+                               if (cand == candend)
+                                       break;
+                               p = *cand++ - n->hseqbase;
+                       } else {
+                               p = start++;
+                       }
+                       if (p >= end)
+                               break;
                        off = BUNtvaroff(ni, p); /* the offset */
                        tp = n->tvheap->base + off; /* the string */
                        if (off < b->tvheap->free &&
@@ -337,6 +389,8 @@ insert_string_bat(BAT *b, BAT *n, int fo
                        } else {
                                bunfastapp(b, tp);
                        }
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to