Changeset: de4e3bf2c42b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/de4e3bf2c42b
Modified Files:
gdk/gdk_aggr.c
gdk/gdk_bat.c
gdk/gdk_batop.c
gdk/gdk_private.h
gdk/gdk_select.c
gdk/gdk_string.c
Branch: default
Log Message:
Merge with Jul2021 branch.
diffs (truncated from 1235 to 300 lines):
diff --git a/common/utils/mstring.h b/common/utils/mstring.h
--- a/common/utils/mstring.h
+++ b/common/utils/mstring.h
@@ -79,4 +79,80 @@ strconcat_len(char *restrict dst, size_t
return i;
}
+#ifndef __GNUC__
+/* __builtin_expect returns its first argument; it is expected to be
+ * equal to the second argument */
+#define __builtin_expect(expr, expect) (expr)
#endif
+
+/*
+ * UTF-8 encoding is as follows:
+ * U-00000000 - U-0000007F: 0xxxxxxx
+ * U-00000080 - U-000007FF: 110zzzzx 10xxxxxx
+ * U-00000800 - U-0000FFFF: 1110zzzz 10zxxxxx 10xxxxxx
+ * U-00010000 - U-0010FFFF: 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx
+ *
+ * To be correctly coded UTF-8, the sequence should be the shortest
+ * possible encoding of the value being encoded. This means that at
+ * least one of the z bits must be non-zero. Also note that the four
+ * byte sequence can encode more than is allowed and that the values
+ * U+D800..U+DFFF are not allowed to be encoded.
+ */
+static inline bool
+checkUTF8(const char *v)
+{
+ /* It is unlikely that this functions returns false, because
+ * it is likely that the string presented is a correctly coded
+ * UTF-8 string. So we annotate the tests that are very
+ * unlikely to succeed, i.e. the ones that lead to a return of
+ * false, as being expected to return 0 using the
+ * __builtin_expect function. */
+ if (v != NULL) {
+ if (v[0] != '\200' || v[1] != '\0') {
+ /* check that string is correctly encoded UTF-8 */
+ for (size_t i = 0; v[i]; i++) {
+ /* we do not annotate all tests, only the ones
+ * leading directly to an unlikely return
+ * statement */
+ if ((v[i] & 0x80) == 0) {
+ ;
+ } else if ((v[i] & 0xE0) == 0xC0) {
+ if (__builtin_expect(((v[i] & 0x1E) ==
0), 0))
+ return false;
+ if (__builtin_expect(((v[++i] & 0xC0)
!= 0x80), 0))
+ return false;
+ } else if ((v[i] & 0xF0) == 0xE0) {
+ if ((v[i++] & 0x0F) == 0) {
+ if (__builtin_expect(((v[i] &
0xE0) != 0xA0), 0))
+ return false;
+ } else {
+ if (__builtin_expect(((v[i] &
0xC0) != 0x80), 0))
+ return false;
+ }
+ if (__builtin_expect(((v[++i] & 0xC0)
!= 0x80), 0))
+ return false;
+ } else if (__builtin_expect(((v[i] & 0xF8) ==
0xF0), 1)) {
+ if ((v[i++] & 0x07) == 0) {
+ if (__builtin_expect(((v[i] &
0x30) == 0), 0))
+ return false;
+ }
+ if (__builtin_expect(((v[i] & 0xC0) !=
0x80), 0))
+ return false;
+ if (__builtin_expect(((v[++i] & 0xC0)
!= 0x80), 0))
+ return false;
+ if (__builtin_expect(((v[++i] & 0xC0)
!= 0x80), 0))
+ return false;
+ } else {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+#ifndef __GNUC__
+#undef __builtin_expect
+#endif
+
+#endif
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -3795,27 +3795,49 @@ BATmin_skipnil(BAT *b, void *aggr, bit s
pos = ords[r];
}
HEAPdecref(oidxh, false);
- } else if ((VIEWtparent(b) == 0 ||
- (/* DISABLES CODE */ (0) &&
- BATcount(b) ==
BATcount(BBP_cache(VIEWtparent(b))))) &&
- BATcheckimprints(b)) {
- Imprints *imprints = VIEWtparent(b) ?
BBP_cache(VIEWtparent(b))->timprints : b->timprints;
- int i;
-
- MT_thread_setalgorithm(VIEWtparent(b) ? "using parent
imprints" : "using imprints");
- pos = oid_nil;
- /* find first non-empty bin */
- for (i = 0; i < imprints->bits; i++) {
- if (imprints->stats[i + 128]) {
- pos = imprints->stats[i] + b->hseqbase;
- break;
+ } else {
+ Imprints *imprints = NULL;
+ if ((VIEWtparent(b) == 0 ||
+ BATcount(b) ==
BATcount(BBP_cache(VIEWtparent(b)))) &&
+ BATcheckimprints(b)) {
+ if (VIEWtparent(b)) {
+ BAT *pb = BBP_cache(VIEWtparent(b));
+ MT_lock_set(&pb->batIdxLock);
+ imprints = pb->timprints;
+ if (imprints != NULL)
+ IMPSincref(imprints);
+ else
+ imprints = NULL;
+ MT_lock_unset(&pb->batIdxLock);
+ } else {
+ MT_lock_set(&b->batIdxLock);
+ imprints = b->timprints;
+ if (imprints != NULL)
+ IMPSincref(imprints);
+ else
+ imprints = NULL;
+ MT_lock_unset(&b->batIdxLock);
}
}
- } else {
- struct canditer ci;
- BUN ncand = canditer_init(&ci, b, NULL);
- (void) do_groupmin(&pos, b, NULL, 1, 0, 0, &ci, ncand,
- skipnil, false);
+ if (imprints) {
+ int i;
+
+ MT_thread_setalgorithm(VIEWtparent(b) ? "using
parent imprints" : "using imprints");
+ pos = oid_nil;
+ /* find first non-empty bin */
+ for (i = 0; i < imprints->bits; i++) {
+ if (imprints->stats[i + 128]) {
+ pos = imprints->stats[i] +
b->hseqbase;
+ break;
+ }
+ }
+ IMPSdecref(imprints, false);
+ } else {
+ struct canditer ci;
+ BUN ncand = canditer_init(&ci, b, NULL);
+ (void) do_groupmin(&pos, b, NULL, 1, 0, 0, &ci,
ncand,
+ skipnil, false);
+ }
}
if (is_oid_nil(pos)) {
res = ATOMnilptr(b->ttype);
@@ -3928,27 +3950,49 @@ BATmax_skipnil(BAT *b, void *aggr, bit s
bat_iterator_end(&bi);
}
HEAPdecref(oidxh, false);
- } else if ((VIEWtparent(b) == 0 ||
- (/* DISABLES CODE */ (0) &&
- BATcount(b) ==
BATcount(BBP_cache(VIEWtparent(b))))) &&
- BATcheckimprints(b)) {
- Imprints *imprints = VIEWtparent(b) ?
BBP_cache(VIEWtparent(b))->timprints : b->timprints;
- int i;
-
- MT_thread_setalgorithm(VIEWtparent(b) ? "using parent
imprints" : "using imprints");
- pos = oid_nil;
- /* find last non-empty bin */
- for (i = imprints->bits - 1; i >= 0; i--) {
- if (imprints->stats[i + 128]) {
- pos = imprints->stats[i + 64] +
b->hseqbase;
- break;
+ } else {
+ Imprints *imprints = NULL;
+ if ((VIEWtparent(b) == 0 ||
+ BATcount(b) ==
BATcount(BBP_cache(VIEWtparent(b)))) &&
+ BATcheckimprints(b)) {
+ if (VIEWtparent(b)) {
+ BAT *pb = BBP_cache(VIEWtparent(b));
+ MT_lock_set(&pb->batIdxLock);
+ imprints = pb->timprints;
+ if (imprints != NULL)
+ IMPSincref(imprints);
+ else
+ imprints = NULL;
+ MT_lock_unset(&pb->batIdxLock);
+ } else {
+ MT_lock_set(&b->batIdxLock);
+ imprints = b->timprints;
+ if (imprints != NULL)
+ IMPSincref(imprints);
+ else
+ imprints = NULL;
+ MT_lock_unset(&b->batIdxLock);
}
}
- } else {
- struct canditer ci;
- BUN ncand = canditer_init(&ci, b, NULL);
- (void) do_groupmax(&pos, b, NULL, 1, 0, 0, &ci, ncand,
- skipnil, false);
+ if (imprints) {
+ int i;
+
+ MT_thread_setalgorithm(VIEWtparent(b) ? "using
parent imprints" : "using imprints");
+ pos = oid_nil;
+ /* find last non-empty bin */
+ for (i = imprints->bits - 1; i >= 0; i--) {
+ if (imprints->stats[i + 128]) {
+ pos = imprints->stats[i + 64] +
b->hseqbase;
+ break;
+ }
+ }
+ IMPSdecref(imprints, false);
+ } else {
+ struct canditer ci;
+ BUN ncand = canditer_init(&ci, b, NULL);
+ (void) do_groupmax(&pos, b, NULL, 1, 0, 0, &ci,
ncand,
+ skipnil, false);
+ }
}
if (is_oid_nil(pos)) {
res = ATOMnilptr(b->ttype);
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -302,15 +302,18 @@ VIEWunlink(BAT *b)
HEAPdecref(b->tvheap, false);
b->tvheap = NULL;
}
+
MT_lock_unset(&b->theaplock);
- /* unlink properties shared with parent */
- if (tpb && b->tprops && b->tprops == tpb->tprops)
- b->tprops = NULL;
-
+ MT_lock_set(&b->batIdxLock);
/* unlink imprints shared with parent */
- if (tpb && b->timprints && b->timprints == tpb->timprints)
+ if (b->timprints &&
+ b->timprints != (Imprints *) 1 &&
+ b->timprints->imprints.parentid != b->batCacheid) {
+ IMPSdecref(b->timprints, false);
b->timprints = NULL;
+ }
+ MT_lock_unset(&b->batIdxLock);
}
}
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1839,18 +1839,21 @@ BATkey(BAT *b, bool flag)
b->tseqbase = oid_nil;
} else
b->tnokey[0] = b->tnokey[1] = 0;
- if (/* DISABLES CODE */ (0) && flag && VIEWtparent(b)) {
+ gdk_return rc = GDK_SUCCEED;
+ if (flag && VIEWtparent(b)) {
/* if a view is key, then so is the parent if the two
* are aligned */
BAT *bp = BBP_cache(VIEWtparent(b));
+ MT_lock_set(&bp->theaplock);
if (BATcount(b) == BATcount(bp) &&
ATOMtype(BATttype(b)) == ATOMtype(BATttype(bp)) &&
!BATtkey(bp) &&
((BATtvoid(b) && BATtvoid(bp) && b->tseqbase ==
bp->tseqbase) ||
BATcount(b) == 0))
- return BATkey(bp, true);
+ rc = BATkey(bp, true);
+ MT_lock_unset(&bp->theaplock);
}
- return GDK_SUCCEED;
+ return rc;
}
void
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -2323,8 +2323,7 @@ BATsort(BAT **sorted, BAT **order, BAT *
}
if (VIEWtparent(b)) {
pb = BBP_cache(VIEWtparent(b));
- if (/* DISABLES CODE */ (1) ||
- b->tbaseoff != pb->tbaseoff ||
+ if (b->tbaseoff != pb->tbaseoff ||
BATcount(b) != BATcount(pb) ||
b->hseqbase != pb->hseqbase ||
BATatoms[b->ttype].atomCmp != BATatoms[pb->ttype].atomCmp)
@@ -2344,6 +2343,9 @@ BATsort(BAT **sorted, BAT **order, BAT *
HEAPincref(oidxh);
}
mkorderidx = false;
+ } else if (b != pb) {
+ /* don't build orderidx on parent bat */
+ mkorderidx = false;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list