Changeset: d8fa85199335 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/d8fa85199335 Modified Files: monetdb5/modules/atoms/str.c Branch: sw_ew_c_sorting Log Message:
Sorted based starts/ends with(cost decision wip). Use strimps for contains
still.
diffs (truncated from 1185 to 300 lines):
diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3757,13 +3757,13 @@ STRupper(str *res, const str *arg1)
bit
str_is_prefix(const char *s, const char *prefix, int plen)
{
- return strncmp(s, prefix, plen) == 0;
+ return strncmp(s, prefix, plen);
}
bit
str_is_iprefix(const char *s, const char *prefix, int plen)
{
- return utf8ncasecmp(s, prefix, plen) == 0;
+ return utf8ncasecmp(s, prefix, plen);
}
static str
@@ -3791,9 +3791,9 @@ str_is_suffix(const char *s, const char
int sl = str_strlen(s);
if (sl < sul)
- return 0;
+ return -1;
else
- return strcmp(s + sl - sul, suffix) == 0;
+ return strcmp(s + sl - sul, suffix);
}
bit
@@ -3802,9 +3802,9 @@ str_is_isuffix(const char *s, const char
int sl = str_strlen(s);
if (sl < sul)
- return 0;
+ return -1;
else
- return utf8casecmp(s + sl - sul, suffix) == 0;
+ return utf8casecmp(s + sl - sul, suffix);
}
@@ -3833,10 +3833,7 @@ str_contains(const char *h, const char *
{
(void) nlen;
/* 64bit: should return lng */
- if (strstr(h, n) != NULL)
- return TRUE;
- else
- return FALSE;
+ return strstr(h, n) ? 0 : 1;
}
bit
@@ -3844,10 +3841,7 @@ str_icontains(const char *h, const char
{
(void) nlen;
/* 64bit: should return lng */
- if (utf8casestr(h, n) != NULL)
- return TRUE;
- else
- return FALSE;
+ return utf8casestr(h, n) ? 0 : 1;
}
/* returns whether haystack contains needle */
@@ -5365,176 +5359,422 @@ STRcontainsselect(Client cntxt, MalBlkPt
B->tseqbase = 0;
\
} while (0)
-#define str_join_loop(STRCMP, STR_LEN)
\
+#define CONTAINS_JOIN_LOOP(STR_CMP, STR_LEN)
\
do {
\
+ canditer_init(&rci, r, cr);
\
for (BUN ridx = 0; ridx < rci.ncand; ridx++) {
\
BAT *filtered_sl = NULL;
\
GDK_CHECK_TIMEOUT(timeoffset, counter,
GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
ro = canditer_next(&rci);
\
vr = VALUE(r, ro - rbase);
\
- rlen = STR_LEN;
\
- nl = 0;
\
+ vr_len = STR_LEN;
\
+ matches = 0;
\
if (with_strimps)
\
- filtered_sl = STRMPfilter(l, sl, vr, anti);
\
+ filtered_sl = STRMPfilter(l, cl, vr, anti);
\
if (filtered_sl)
\
canditer_init(&lci, l, filtered_sl);
\
else
\
- canditer_init(&lci, l, sl);
\
+ canditer_init(&lci, l, cl);
\
for (BUN lidx = 0; lidx < lci.ncand; lidx++) {
\
lo = canditer_next(&lci);
\
vl = VALUE(l, lo - lbase);
\
- if (strNil(vl)) {
\
+ if (strNil(vl))
\
continue;
\
- } else if (!(STRCMP)) {
\
+ if (STR_CMP)
\
continue;
\
- }
\
- if (BATcount(r1) == BATcapacity(r1)) {
\
- newcap = BATgrows(r1);
\
- BATsetcount(r1, BATcount(r1));
\
- if (r2)
\
- BATsetcount(r2, BATcount(r2));
\
- if (BATextend(r1, newcap) !=
GDK_SUCCEED || \
- (r2 && BATextend(r2, newcap) !=
GDK_SUCCEED)) { \
- msg = createException(MAL,
"str.%s", fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
+ if (BATcount(rl) == BATcapacity(rl)) {
\
+ newcap = BATgrows(rl);
\
+ BATsetcount(rl, BATcount(rl));
\
+ if (rr)
\
+ BATsetcount(rr, BATcount(rr));
\
+ if (BATextend(rl, newcap) !=
GDK_SUCCEED || \
+ (rr && BATextend(rr, newcap) !=
GDK_SUCCEED)) { \
+ msg = createException(MAL,
fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
goto exit;
\
}
\
- assert(!r2 || BATcapacity(r1) ==
BATcapacity(r2)); \
+ assert(!rr || BATcapacity(rl) ==
BATcapacity(rr)); \
}
\
- if (BATcount(r1) > 0) {
\
+ if (BATcount(rl) > 0) {
\
if (lastl + 1 != lo)
\
- r1->tseqbase = oid_nil;
\
- if (nl == 0) {
\
- if (r2)
\
- r2->trevsorted = false;
\
+ rl->tseqbase = oid_nil;
\
+ if (matches == 0) {
\
+ if (rr)
\
+ rr->trevsorted = false;
\
if (lastl > lo) {
\
- r1->tsorted = false;
\
- r1->tkey = false;
\
+ rl->tsorted = false;
\
+ rl->tkey = false;
\
} else if (lastl < lo) {
\
- r1->trevsorted = false;
\
+ rl->trevsorted = false;
\
} else {
\
- r1->tkey = false;
\
+ rl->tkey = false;
\
}
\
}
\
}
\
- APPEND(r1, lo);
\
- if (r2)
\
- APPEND(r2, ro);
\
+ APPEND(rl, lo);
\
+ if (rr)
\
+ APPEND(rr, ro);
\
lastl = lo;
\
- nl++;
\
+ matches++;
\
}
\
BBPreclaim(filtered_sl);
\
- if (r2) {
\
- if (nl > 1) {
\
- r2->tkey = false;
\
- r2->tseqbase = oid_nil;
\
- r1->trevsorted = false;
\
- } else if (nl == 0) {
\
- rskipped = BATcount(r2) > 0;
\
+ if (rr) {
\
+ if (matches > 1) {
\
+ rr->tkey = false;
\
+ rr->tseqbase = oid_nil;
\
+ rl->trevsorted = false;
\
+ } else if (matches == 0) {
\
+ rskipped = BATcount(rr) > 0;
\
} else if (rskipped) {
\
- r2->tseqbase = oid_nil;
\
+ rr->tseqbase = oid_nil;
\
}
\
- } else if (nl > 1) {
\
- r1->trevsorted = false;
\
+ } else if (matches > 1) {
\
+ rl->trevsorted = false;
\
}
\
}
\
} while (0)
-#define str_antijoin_loop(STRCMP, STR_LEN)
\
+#define STR_JOIN_NESTED_LOOP(STR_CMP, STR_LEN, FNAME)
\
do {
\
+ canditer_init(&rci, r, cr);
\
+ for (BUN ridx = 0; ridx < rci.ncand; ridx++) {
\
+ GDK_CHECK_TIMEOUT(timeoffset, counter,
GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
+ ro = canditer_next(&rci);
\
+ vr = VALUE(r, ro - rbase);
\
+ if (strNil(vr))
\
+ continue;
\
+ vr_len = STR_LEN;
\
+ matches = 0;
\
+ canditer_init(&lci, l, cl);
\
+ for (BUN lidx = 0; lidx < lci.ncand; lidx++) {
\
+ lo = canditer_next(&lci);
\
+ vl = VALUE(l, lo - lbase);
\
+ if (strNil(vl))
\
+ continue;
\
+ if (STR_CMP)
\
+ continue;
\
+ if (BATcount(rl) == BATcapacity(rl)) {
\
+ newcap = BATgrows(rl);
\
+ BATsetcount(rl, BATcount(rl));
\
+ if (rr)
\
+ BATsetcount(rr, BATcount(rr));
\
+ if (BATextend(rl, newcap) !=
GDK_SUCCEED || \
+ (rr && BATextend(rr, newcap) !=
GDK_SUCCEED)) { \
+ msg = createException(MAL,
FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
+ goto exit;
\
+ }
\
+ assert(!rr || BATcapacity(rl) ==
BATcapacity(rr)); \
+ }
\
+ if (BATcount(rl) > 0) {
\
+ if (last_lo + 1 != lo)
\
+ rl->tseqbase = oid_nil;
\
+ if (matches == 0) {
\
+ if (rr)
\
+ rr->trevsorted = false;
\
+ if (last_lo > lo) {
\
+ rl->tsorted = false;
\
+ rl->tkey = false;
\
+ } else if (last_lo < lo) {
\
+ rl->trevsorted = false;
\
+ } else {
\
+ rl->tkey = false;
\
+ }
\
+ }
\
+ }
\
+ APPEND(rl, lo);
\
+ if (rr)
\
+ APPEND(rr, ro);
\
+ last_lo = lo;
\
+ matches++;
\
+ }
\
+ if (rr) {
\
+ if (matches > 1) {
\
+ rr->tkey = false;
\
+ rr->tseqbase = oid_nil;
\
+ rl->trevsorted = false;
\
+ } else if (matches == 0) {
\
+ rskipped = BATcount(rr) > 0;
\
+ } else if (rskipped) {
\
+ rr->tseqbase = oid_nil;
\
+ }
\
+ } else if (matches > 1) {
\
+ rl->trevsorted = false;
\
+ }
\
+ }
\
+ } while (0)
+
+#define STARTSWITH_SORTED_LOOP(STR_CMP, STR_LEN, FNAME)
\
+ do {
\
+ canditer_init(&rci, sorted_r, sorted_cr);
\
for (BUN ridx = 0; ridx < rci.ncand; ridx++) {
\
GDK_CHECK_TIMEOUT(timeoffset, counter,
GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
ro = canditer_next(&rci);
\
vr = VALUE(r, ro - rbase);
\
- rlen = STR_LEN;
\
- nl = 0;
\
- canditer_init(&lci, l, sl);
\
+ if (strNil(vr))
\
+ continue;
\
+ vr_len = STR_LEN;
\
+ matches = 0;
\
+ canditer_init(&lci, sorted_l, sorted_cl);
\
for (BUN lidx = 0; lidx < lci.ncand; lidx++) {
\
lo = canditer_next(&lci);
\
vl = VALUE(l, lo - lbase);
\
- if (strNil(vl)) {
\
+ if (strNil(vl))
\
continue;
\
- } else if (!(STRCMP)) {
\
+ cmp = STR_CMP;
\
+ if (cmp < 0)
\
continue;
\
- }
\
- if (BATcount(r1) == BATcapacity(r1)) {
\
- newcap = BATgrows(r1);
\
- BATsetcount(r1, BATcount(r1));
\
- if (r2)
\
- BATsetcount(r2, BATcount(r2));
\
- if (BATextend(r1, newcap) !=
GDK_SUCCEED || \
- (r2 && BATextend(r2, newcap) !=
GDK_SUCCEED)) { \
- msg = createException(MAL,
"str.%s", fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
+ else if (cmp > 0)
\
+ break;
\
+ if (BATcount(rl) == BATcapacity(rl)) {
\
+ newcap = BATgrows(rl);
\
+ BATsetcount(rl, BATcount(rl));
\
+ if (rr)
\
+ BATsetcount(rr, BATcount(rr));
\
+ if (BATextend(rl, newcap) !=
GDK_SUCCEED || \
+ (rr && BATextend(rr, newcap) !=
GDK_SUCCEED)) { \
+ msg = createException(MAL,
FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
goto exit;
\
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]
