Changeset: d5d8593eecaf for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/d5d8593eecaf
Modified Files:
monetdb5/modules/atoms/str.c
monetdb5/modules/kernel/batstr.c
Branch: sw_ew_c_sorting
Log Message:
Cleanup MAL str functions plus make proper comparisons with ext string
functions.
diffs (truncated from 681 to 300 lines):
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3766,25 +3766,6 @@ str_is_iprefix(const char *s, const char
return utf8ncasecmp(s, prefix, plen);
}
-static str
-STRstartswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
-{
- (void) cntxt;
- (void) mb;
- bit *res = getArgReference(stk, pci, 0);
- const str *arg1 = getArgReference(stk, pci, 1),
- *arg2 = getArgReference(stk, pci, 2);
- bit icase = pci->argc == 4
- && *getArgReference_bit(stk, pci, 3) ? true : false;
- str s = *arg1, prefix = *arg2, msg = MAL_SUCCEED;
- int plen = str_strlen(prefix);
-
- *res = (strNil(s) || strNil(prefix)) ? bit_nil :
- icase ? str_is_iprefix(s, prefix, plen) :
str_is_prefix(s, prefix,
-
plen);
- return msg;
-}
-
bit
str_is_suffix(const char *s, const char *suffix, int sul)
{
@@ -3807,27 +3788,6 @@ str_is_isuffix(const char *s, const char
return utf8casecmp(s + sl - sul, suffix);
}
-
-/* returns whether arg1 ends with arg2 */
-static str
-STRendswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
-{
- (void) cntxt;
- (void) mb;
- bit *res = getArgReference(stk, pci, 0);
- const str *arg1 = getArgReference(stk, pci, 1),
- *arg2 = getArgReference(stk, pci, 2);
- bit icase = pci->argc == 4
- && *getArgReference_bit(stk, pci, 3) ? true : false;
- str s = *arg1, suffix = *arg2, msg = MAL_SUCCEED;
- int sul = str_strlen(suffix);
-
- *res = (strNil(s) || strNil(suffix)) ? bit_nil :
- icase ? str_is_isuffix(s, suffix, sul) :
str_is_suffix(s, suffix,
-
sul);
- return msg;
-}
-
bit
str_contains(const char *h, const char *n, int nlen)
{
@@ -3844,25 +3804,69 @@ str_icontains(const char *h, const char
return utf8casestr(h, n) ? 0 : 1;
}
+#define STR_MAPARGS(STK, PCI, R, S1, S2, ICASE)
\
+ do{
\
+ R = getArgReference(STK, PCI, 0);
\
+ S1 = *getArgReference_str(STK, PCI, 1);
\
+ S2 = *getArgReference_str(STK, PCI, 2);
\
+ icase = PCI->argc == 4 &&
\
+ *getArgReference_bit(STK, PCI, 3) ? true : false;
\
+
\
+ } while(0)
+
+static str
+STRstartswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+ (void) cntxt;
+ (void) mb;
+
+ str s1, s2;
+ bit *r, icase;
+
+ STR_MAPARGS(stk, pci, r, s1, s2, icase);
+
+ int s2_len = str_strlen(s2);
+ *r = (strNil(s1) || strNil(s2)) ? bit_nil :
+ icase ? str_is_iprefix(s1, s2, s2_len) == 0 :
+ str_is_prefix(s1, s2, s2_len) == 0;
+ return MAL_SUCCEED;
+}
+
+static str
+STRendswith(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+ (void) cntxt;
+ (void) mb;
+
+ str s1, s2;
+ bit *r, icase;
+
+ STR_MAPARGS(stk, pci, r, s1, s2, icase);
+
+ int s2_len = str_strlen(s2);
+ *r = (strNil(s1) || strNil(s2)) ? bit_nil :
+ icase ? str_is_isuffix(s1, s2, s2_len) == 0 :
+ str_is_suffix(s1, s2, s2_len) == 0;
+ return MAL_SUCCEED;
+}
+
/* returns whether haystack contains needle */
static str
STRcontains(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
(void) cntxt;
(void) mb;
- bit *res = getArgReference(stk, pci, 0);
- const str *arg1 = getArgReference(stk, pci, 1),
- *arg2 = getArgReference(stk, pci, 2);
- bit icase = pci->argc == 4
- && *getArgReference_bit(stk, pci, 3) ? true : false;
- str haystack = *arg1, needle = *arg2, msg = MAL_SUCCEED;
- int needle_len = str_strlen(needle);
-
- *res = (strNil(haystack) || strNil(needle)) ? bit_nil :
- icase ? str_icontains(haystack, needle,
- needle_len) :
str_contains(haystack, needle,
-
needle_len);
- return msg;
+
+ str s1, s2;
+ bit *r, icase;
+
+ STR_MAPARGS(stk, pci, r, s1, s2, icase);
+
+ int s2_len = str_strlen(s2);
+ *r = (strNil(s1) || strNil(s2)) ? bit_nil :
+ icase ? str_icontains(s1, s2, s2_len) == 0 :
+ str_contains(s1, s2, s2_len) == 0;
+ return MAL_SUCCEED;
}
int
@@ -5150,7 +5154,7 @@ BBPnreclaim(int nargs, ...)
} while (0)
static str
-do_string_select(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q,
+str_select(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q,
BUN *rcnt, const char *key, bool anti,
bit (*str_cmp)(const char *, const char *,
int),
bool keep_nulls)
@@ -5170,10 +5174,9 @@ do_string_select(BAT *bn, BAT *b, BAT *s
qry_ctx->querytimeout) : 0;
if (anti) /* keep nulls ? (use
false for now) */
- scanloop_anti(v && *v != '\200'
- && str_cmp(v, key, klen) == 0,
keep_nulls);
+ scanloop_anti(v && *v != '\200' && str_cmp(v, key, klen) != 0,
keep_nulls);
else
- scanloop(v && *v != '\200' && str_cmp(v, key, klen) != 0,
keep_nulls);
+ scanloop(v && *v != '\200' && str_cmp(v, key, klen) == 0,
keep_nulls);
bailout:
bat_iterator_end(&bi);
@@ -5182,41 +5185,40 @@ do_string_select(BAT *bn, BAT *b, BAT *s
}
static str
-string_select(bat *ret, const bat *bid, const bat *sid, const str *key,
- const bit *anti, bit (*str_cmp)(const char *, const
char *, int),
+STRselect(bat *r_id, const bat *b_id, const bat *cb_id, const char *key,
+ const bit anti, bit (*str_cmp)(const char *, const
char *, int),
const str fname)
{
- BAT *b, *s = NULL, *bn = NULL, *old_s = NULL;;
str msg = MAL_SUCCEED;
+
+ BAT *b, *cb = NULL, *r = NULL, *old_s = NULL;;
BUN p = 0, q = 0, rcnt = 0;
struct canditer ci;
- bool with_strimps = false, with_strimps_anti = false;
-
- if ((b = BATdescriptor(*bid)) == NULL) {
- msg = createException(MAL, fname,
- SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
- goto bailout;
- }
- if (sid && !is_bat_nil(*sid) && (s = BATdescriptor(*sid)) == NULL) {
- msg = createException(MAL, fname,
- SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
- goto bailout;
+ bool with_strimps = false,
+ with_strimps_anti = false;
+
+ if (!(b = BATdescriptor(*b_id)))
+ throw(MAL, fname, RUNTIME_OBJECT_MISSING);
+
+ if (cb_id && !is_bat_nil(*cb_id) && !(cb = BATdescriptor(*cb_id))) {
+ BBPreclaim(b);
+ throw(MAL, fname, RUNTIME_OBJECT_MISSING);
}
assert(ATOMstorage(b->ttype) == TYPE_str);
if (BAThasstrimps(b)) {
if (STRMPcreate(b, NULL) == GDK_SUCCEED) {
- BAT *tmp_s = STRMPfilter(b, s, *key, *anti);
+ BAT *tmp_s = STRMPfilter(b, cb, key, anti);
if (tmp_s) {
- old_s = s;
- s = tmp_s;
- if (!*anti)
+ old_s = cb;
+ cb = tmp_s;
+ if (!anti)
with_strimps = true;
else
with_strimps_anti = true;
}
- } else { /* If we cannot filter
with the strimp just continue normally */
+ } else {
GDKclrerr();
}
}
@@ -5225,20 +5227,19 @@ string_select(bat *ret, const bat *bid,
"string_select: strcmp
function using strimps" :
(with_strimps_anti ?
"string_select: strcmp
function using strimps anti"
- :
- "string_select: strcmp
function with no accelerator"));
-
- canditer_init(&ci, b, s);
- if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
- msg = createException(MAL, fname, SQLSTATE(HY013)
MAL_MALLOC_FAIL);
- goto bailout;
+ : "string_select:
strcmp function with no accelerator"));
+
+ canditer_init(&ci, b, cb);
+ if (!(r = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) {
+ BBPnreclaim(2, b, cb);
+ throw(MAL, fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);
}
- if (!s || BATtdense(s)) {
- if (s) {
- assert(BATtdense(s));
- p = (BUN) s->tseqbase;
- q = p + BATcount(s);
+ if (!cb || BATtdense(cb)) {
+ if (cb) {
+ assert(BATtdense(cb));
+ p = (BUN) cb->tseqbase;
+ q = p + BATcount(cb);
if ((oid) p < b->hseqbase)
p = b->hseqbase;
if ((oid) q > b->hseqbase + BATcount(b))
@@ -5249,101 +5250,131 @@ string_select(bat *ret, const bat *bid,
}
}
- msg = do_string_select(bn, b, s, &ci, p, q, &rcnt, *key, *anti
- && !with_strimps_anti,
str_cmp, with_strimps_anti);
-
- if (!msg) { /* set some properties
*/
- BATsetcount(bn, rcnt);
- bn->tsorted = true;
- bn->trevsorted = bn->batCount <= 1;
- bn->tkey = true;
- bn->tnil = false;
- bn->tnonil = true;
- bn->tseqbase = rcnt == 0 ? 0 : rcnt == 1 ? *(const oid *)
Tloc(bn, 0) : rcnt == b->batCount ? b->hseqbase : oid_nil;
+ msg = str_select(r, b, cb, &ci, p, q, &rcnt, key, anti
+ && !with_strimps_anti, str_cmp,
with_strimps_anti);
+
+ if (!msg) {
+ BATsetcount(r, rcnt);
+ r->tsorted = r->batCount <= 1;
+ r->trevsorted = r->batCount <= 1;
+ r->tkey = false;
+ r->tnil = false;
+ r->tnonil = true;
+ r->tseqbase = rcnt == 0 ?
+ 0 : rcnt == 1 ?
+ *(const oid *) Tloc(r, 0) : rcnt == b->batCount ?
b->hseqbase : oid_nil;
+
if (with_strimps_anti) {
BAT *rev;
if (old_s) {
- rev = BATdiffcand(old_s, bn);
+ rev = BATdiffcand(old_s, r);
#ifndef NDEBUG
- BAT *is = BATintersectcand(old_s, bn);
+ BAT *is = BATintersectcand(old_s, r);
if (is) {
- assert(is->batCount == bn->batCount);
+ assert(is->batCount == r->batCount);
BBPreclaim(is);
}
- assert(rev->batCount == old_s->batCount -
bn->batCount);
+ assert(rev->batCount == old_s->batCount -
r->batCount);
#endif
- }
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]