Changeset: de4e3bf2c42b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/de4e3bf2c42b
Modified Files:
        gdk/gdk_aggr.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_private.h
        gdk/gdk_select.c
        gdk/gdk_string.c
Branch: default
Log Message:

Merge with Jul2021 branch.


diffs (truncated from 1235 to 300 lines):

diff --git a/common/utils/mstring.h b/common/utils/mstring.h
--- a/common/utils/mstring.h
+++ b/common/utils/mstring.h
@@ -79,4 +79,80 @@ strconcat_len(char *restrict dst, size_t
        return i;
 }
 
+#ifndef __GNUC__
+/* __builtin_expect returns its first argument; it is expected to be
+ * equal to the second argument */
+#define __builtin_expect(expr, expect) (expr)
 #endif
+
+/*
+ * UTF-8 encoding is as follows:
+ * U-00000000 - U-0000007F: 0xxxxxxx
+ * U-00000080 - U-000007FF: 110zzzzx 10xxxxxx
+ * U-00000800 - U-0000FFFF: 1110zzzz 10zxxxxx 10xxxxxx
+ * U-00010000 - U-0010FFFF: 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx
+ *
+ * To be correctly coded UTF-8, the sequence should be the shortest
+ * possible encoding of the value being encoded.  This means that at
+ * least one of the z bits must be non-zero.  Also note that the four
+ * byte sequence can encode more than is allowed and that the values
+ * U+D800..U+DFFF are not allowed to be encoded.
+ */
+static inline bool
+checkUTF8(const char *v)
+{
+       /* It is unlikely that this functions returns false, because
+        * it is likely that the string presented is a correctly coded
+        * UTF-8 string.  So we annotate the tests that are very
+        * unlikely to succeed, i.e. the ones that lead to a return of
+        * false, as being expected to return 0 using the
+        * __builtin_expect function. */
+       if (v != NULL) {
+               if (v[0] != '\200' || v[1] != '\0') {
+                       /* check that string is correctly encoded UTF-8 */
+                       for (size_t i = 0; v[i]; i++) {
+                               /* we do not annotate all tests, only the ones
+                                * leading directly to an unlikely return
+                                * statement */
+                               if ((v[i] & 0x80) == 0) {
+                                       ;
+                               } else if ((v[i] & 0xE0) == 0xC0) {
+                                       if (__builtin_expect(((v[i] & 0x1E) == 
0), 0))
+                                               return false;
+                                       if (__builtin_expect(((v[++i] & 0xC0) 
!= 0x80), 0))
+                                               return false;
+                               } else if ((v[i] & 0xF0) == 0xE0) {
+                                       if ((v[i++] & 0x0F) == 0) {
+                                               if (__builtin_expect(((v[i] & 
0xE0) != 0xA0), 0))
+                                                       return false;
+                                       } else {
+                                               if (__builtin_expect(((v[i] & 
0xC0) != 0x80), 0))
+                                                       return false;
+                                       }
+                                       if (__builtin_expect(((v[++i] & 0xC0) 
!= 0x80), 0))
+                                               return false;
+                               } else if (__builtin_expect(((v[i] & 0xF8) == 
0xF0), 1)) {
+                                       if ((v[i++] & 0x07) == 0) {
+                                               if (__builtin_expect(((v[i] & 
0x30) == 0), 0))
+                                                       return false;
+                                       }
+                                       if (__builtin_expect(((v[i] & 0xC0) != 
0x80), 0))
+                                               return false;
+                                       if (__builtin_expect(((v[++i] & 0xC0) 
!= 0x80), 0))
+                                               return false;
+                                       if (__builtin_expect(((v[++i] & 0xC0) 
!= 0x80), 0))
+                                               return false;
+                               } else {
+                                       return false;
+                               }
+                       }
+               }
+       }
+       return true;
+}
+
+#ifndef __GNUC__
+#undef __builtin_expect
+#endif
+
+#endif
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -3795,27 +3795,49 @@ BATmin_skipnil(BAT *b, void *aggr, bit s
                                pos = ords[r];
                        }
                        HEAPdecref(oidxh, false);
-               } else if ((VIEWtparent(b) == 0 ||
-                           (/* DISABLES CODE */ (0) &&
-                            BATcount(b) == 
BATcount(BBP_cache(VIEWtparent(b))))) &&
-                          BATcheckimprints(b)) {
-                       Imprints *imprints = VIEWtparent(b) ? 
BBP_cache(VIEWtparent(b))->timprints : b->timprints;
-                       int i;
-
-                       MT_thread_setalgorithm(VIEWtparent(b) ? "using parent 
imprints" : "using imprints");
-                       pos = oid_nil;
-                       /* find first non-empty bin */
-                       for (i = 0; i < imprints->bits; i++) {
-                               if (imprints->stats[i + 128]) {
-                                       pos = imprints->stats[i] + b->hseqbase;
-                                       break;
+               } else {
+                       Imprints *imprints = NULL;
+                       if ((VIEWtparent(b) == 0 ||
+                            BATcount(b) == 
BATcount(BBP_cache(VIEWtparent(b)))) &&
+                           BATcheckimprints(b)) {
+                               if (VIEWtparent(b)) {
+                                       BAT *pb = BBP_cache(VIEWtparent(b));
+                                       MT_lock_set(&pb->batIdxLock);
+                                       imprints = pb->timprints;
+                                       if (imprints != NULL)
+                                               IMPSincref(imprints);
+                                       else
+                                               imprints = NULL;
+                                       MT_lock_unset(&pb->batIdxLock);
+                               } else {
+                                       MT_lock_set(&b->batIdxLock);
+                                       imprints = b->timprints;
+                                       if (imprints != NULL)
+                                               IMPSincref(imprints);
+                                       else
+                                               imprints = NULL;
+                                       MT_lock_unset(&b->batIdxLock);
                                }
                        }
-               } else {
-                       struct canditer ci;
-                       BUN ncand = canditer_init(&ci, b, NULL);
-                       (void) do_groupmin(&pos, b, NULL, 1, 0, 0, &ci, ncand,
-                                          skipnil, false);
+                       if (imprints) {
+                               int i;
+
+                               MT_thread_setalgorithm(VIEWtparent(b) ? "using 
parent imprints" : "using imprints");
+                               pos = oid_nil;
+                               /* find first non-empty bin */
+                               for (i = 0; i < imprints->bits; i++) {
+                                       if (imprints->stats[i + 128]) {
+                                               pos = imprints->stats[i] + 
b->hseqbase;
+                                               break;
+                                       }
+                               }
+                               IMPSdecref(imprints, false);
+                       } else {
+                               struct canditer ci;
+                               BUN ncand = canditer_init(&ci, b, NULL);
+                               (void) do_groupmin(&pos, b, NULL, 1, 0, 0, &ci, 
ncand,
+                                                  skipnil, false);
+                       }
                }
                if (is_oid_nil(pos)) {
                        res = ATOMnilptr(b->ttype);
@@ -3928,27 +3950,49 @@ BATmax_skipnil(BAT *b, void *aggr, bit s
                                bat_iterator_end(&bi);
                        }
                        HEAPdecref(oidxh, false);
-               } else if ((VIEWtparent(b) == 0 ||
-                           (/* DISABLES CODE */ (0) &&
-                            BATcount(b) == 
BATcount(BBP_cache(VIEWtparent(b))))) &&
-                          BATcheckimprints(b)) {
-                       Imprints *imprints = VIEWtparent(b) ? 
BBP_cache(VIEWtparent(b))->timprints : b->timprints;
-                       int i;
-
-                       MT_thread_setalgorithm(VIEWtparent(b) ? "using parent 
imprints" : "using imprints");
-                       pos = oid_nil;
-                       /* find last non-empty bin */
-                       for (i = imprints->bits - 1; i >= 0; i--) {
-                               if (imprints->stats[i + 128]) {
-                                       pos = imprints->stats[i + 64] + 
b->hseqbase;
-                                       break;
+               } else {
+                       Imprints *imprints = NULL;
+                       if ((VIEWtparent(b) == 0 ||
+                            BATcount(b) == 
BATcount(BBP_cache(VIEWtparent(b)))) &&
+                           BATcheckimprints(b)) {
+                               if (VIEWtparent(b)) {
+                                       BAT *pb = BBP_cache(VIEWtparent(b));
+                                       MT_lock_set(&pb->batIdxLock);
+                                       imprints = pb->timprints;
+                                       if (imprints != NULL)
+                                               IMPSincref(imprints);
+                                       else
+                                               imprints = NULL;
+                                       MT_lock_unset(&pb->batIdxLock);
+                               } else {
+                                       MT_lock_set(&b->batIdxLock);
+                                       imprints = b->timprints;
+                                       if (imprints != NULL)
+                                               IMPSincref(imprints);
+                                       else
+                                               imprints = NULL;
+                                       MT_lock_unset(&b->batIdxLock);
                                }
                        }
-               } else {
-                       struct canditer ci;
-                       BUN ncand = canditer_init(&ci, b, NULL);
-                       (void) do_groupmax(&pos, b, NULL, 1, 0, 0, &ci, ncand,
-                                          skipnil, false);
+                       if (imprints) {
+                               int i;
+
+                               MT_thread_setalgorithm(VIEWtparent(b) ? "using 
parent imprints" : "using imprints");
+                               pos = oid_nil;
+                               /* find last non-empty bin */
+                               for (i = imprints->bits - 1; i >= 0; i--) {
+                                       if (imprints->stats[i + 128]) {
+                                               pos = imprints->stats[i + 64] + 
b->hseqbase;
+                                               break;
+                                       }
+                               }
+                               IMPSdecref(imprints, false);
+                       } else {
+                               struct canditer ci;
+                               BUN ncand = canditer_init(&ci, b, NULL);
+                               (void) do_groupmax(&pos, b, NULL, 1, 0, 0, &ci, 
ncand,
+                                                  skipnil, false);
+                       }
                }
                if (is_oid_nil(pos)) {
                        res = ATOMnilptr(b->ttype);
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -302,15 +302,18 @@ VIEWunlink(BAT *b)
                        HEAPdecref(b->tvheap, false);
                        b->tvheap = NULL;
                }
+
                MT_lock_unset(&b->theaplock);
 
-               /* unlink properties shared with parent */
-               if (tpb && b->tprops && b->tprops == tpb->tprops)
-                       b->tprops = NULL;
-
+               MT_lock_set(&b->batIdxLock);
                /* unlink imprints shared with parent */
-               if (tpb && b->timprints && b->timprints == tpb->timprints)
+               if (b->timprints &&
+                   b->timprints != (Imprints *) 1 &&
+                   b->timprints->imprints.parentid != b->batCacheid) {
+                       IMPSdecref(b->timprints, false);
                        b->timprints = NULL;
+               }
+               MT_lock_unset(&b->batIdxLock);
        }
 }
 
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1839,18 +1839,21 @@ BATkey(BAT *b, bool flag)
                b->tseqbase = oid_nil;
        } else
                b->tnokey[0] = b->tnokey[1] = 0;
-       if (/* DISABLES CODE */ (0) && flag && VIEWtparent(b)) {
+       gdk_return rc = GDK_SUCCEED;
+       if (flag && VIEWtparent(b)) {
                /* if a view is key, then so is the parent if the two
                 * are aligned */
                BAT *bp = BBP_cache(VIEWtparent(b));
+               MT_lock_set(&bp->theaplock);
                if (BATcount(b) == BATcount(bp) &&
                    ATOMtype(BATttype(b)) == ATOMtype(BATttype(bp)) &&
                    !BATtkey(bp) &&
                    ((BATtvoid(b) && BATtvoid(bp) && b->tseqbase == 
bp->tseqbase) ||
                     BATcount(b) == 0))
-                       return BATkey(bp, true);
+                       rc = BATkey(bp, true);
+               MT_lock_unset(&bp->theaplock);
        }
-       return GDK_SUCCEED;
+       return rc;
 }
 
 void
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -2323,8 +2323,7 @@ BATsort(BAT **sorted, BAT **order, BAT *
        }
        if (VIEWtparent(b)) {
                pb = BBP_cache(VIEWtparent(b));
-               if (/* DISABLES CODE */ (1) ||
-                   b->tbaseoff != pb->tbaseoff ||
+               if (b->tbaseoff != pb->tbaseoff ||
                    BATcount(b) != BATcount(pb) ||
                    b->hseqbase != pb->hseqbase ||
                    BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)
@@ -2344,6 +2343,9 @@ BATsort(BAT **sorted, BAT **order, BAT *
                                HEAPincref(oidxh);
                        }
                        mkorderidx = false;
+               } else if (b != pb) {
+                       /* don't build orderidx on parent bat */
+                       mkorderidx = false;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to