Changeset: d8fa85199335 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/d8fa85199335
Modified Files:
        monetdb5/modules/atoms/str.c
Branch: sw_ew_c_sorting
Log Message:

Sorted based starts/ends with(cost decision wip). Use strimps for contains 
still.


diffs (truncated from 1185 to 300 lines):

diff --git a/monetdb5/modules/atoms/str.c b/monetdb5/modules/atoms/str.c
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -3757,13 +3757,13 @@ STRupper(str *res, const str *arg1)
 bit
 str_is_prefix(const char *s, const char *prefix, int plen)
 {
-       return strncmp(s, prefix, plen) == 0;
+       return strncmp(s, prefix, plen);
 }
 
 bit
 str_is_iprefix(const char *s, const char *prefix, int plen)
 {
-       return utf8ncasecmp(s, prefix, plen) == 0;
+       return utf8ncasecmp(s, prefix, plen);
 }
 
 static str
@@ -3791,9 +3791,9 @@ str_is_suffix(const char *s, const char 
        int sl = str_strlen(s);
 
        if (sl < sul)
-               return 0;
+               return -1;
        else
-               return strcmp(s + sl - sul, suffix) == 0;
+               return strcmp(s + sl - sul, suffix);
 }
 
 bit
@@ -3802,9 +3802,9 @@ str_is_isuffix(const char *s, const char
        int sl = str_strlen(s);
 
        if (sl < sul)
-               return 0;
+               return -1;
        else
-               return utf8casecmp(s + sl - sul, suffix) == 0;
+               return utf8casecmp(s + sl - sul, suffix);
 }
 
 
@@ -3833,10 +3833,7 @@ str_contains(const char *h, const char *
 {
        (void) nlen;
        /* 64bit: should return lng */
-       if (strstr(h, n) != NULL)
-               return TRUE;
-       else
-               return FALSE;
+       return strstr(h, n) ? 0 : 1;
 }
 
 bit
@@ -3844,10 +3841,7 @@ str_icontains(const char *h, const char 
 {
        (void) nlen;
        /* 64bit: should return lng */
-       if (utf8casestr(h, n) != NULL)
-               return TRUE;
-       else
-               return FALSE;
+       return utf8casestr(h, n) ? 0 : 1;
 }
 
 /* returns whether haystack contains needle */
@@ -5365,176 +5359,422 @@ STRcontainsselect(Client cntxt, MalBlkPt
                B->tseqbase = 0;                                                
\
        } while (0)
 
-#define str_join_loop(STRCMP, STR_LEN)                                         
                        \
+#define CONTAINS_JOIN_LOOP(STR_CMP, STR_LEN)                                   
                \
        do {                                                                    
                                                        \
+               canditer_init(&rci, r, cr);                                     
                                \
                for (BUN ridx = 0; ridx < rci.ncand; ridx++) {                  
                \
                        BAT *filtered_sl = NULL;                                
                                        \
                        GDK_CHECK_TIMEOUT(timeoffset, counter, 
GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
                        ro = canditer_next(&rci);                               
                                        \
                        vr = VALUE(r, ro - rbase);                              
                                        \
-                       rlen = STR_LEN;                                         
                                        \
-                       nl = 0;                                                 
                                                \
+                       vr_len = STR_LEN;                                       
                                                \
+                       matches = 0;                                            
                                                \
                        if (with_strimps)                                       
                                                \
-                               filtered_sl = STRMPfilter(l, sl, vr, anti);     
                \
+                               filtered_sl = STRMPfilter(l, cl, vr, anti);     
                \
                        if (filtered_sl)                                        
                                                \
                                canditer_init(&lci, l, filtered_sl);            
                        \
                        else                                                    
                                                        \
-                               canditer_init(&lci, l, sl);                     
                                \
+                               canditer_init(&lci, l, cl);                     
                                \
                        for (BUN lidx = 0; lidx < lci.ncand; lidx++) {          
                \
                                lo = canditer_next(&lci);                       
                                        \
                                vl = VALUE(l, lo - lbase);                      
                                        \
-                               if (strNil(vl)) {                               
                                                \
+                               if (strNil(vl))                                 
                                        \
                                        continue;                               
                                                        \
-                               } else if (!(STRCMP)) {                         
                                \
+                               if (STR_CMP)                                    
                                                \
                                        continue;                               
                                                        \
-                               }                                               
                                                                \
-                               if (BATcount(r1) == BATcapacity(r1)) {          
                        \
-                                       newcap = BATgrows(r1);                  
                                        \
-                                       BATsetcount(r1, BATcount(r1));          
                                \
-                                       if (r2)                                 
                                                \
-                                               BATsetcount(r2, BATcount(r2));  
                                \
-                                       if (BATextend(r1, newcap) != 
GDK_SUCCEED ||             \
-                                               (r2 && BATextend(r2, newcap) != 
GDK_SUCCEED)) { \
-                                               msg = createException(MAL, 
"str.%s", fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
+                               if (BATcount(rl) == BATcapacity(rl)) {          
                        \
+                                       newcap = BATgrows(rl);                  
                                        \
+                                       BATsetcount(rl, BATcount(rl));          
                                \
+                                       if (rr)                                 
                                                \
+                                               BATsetcount(rr, BATcount(rr));  
                                \
+                                       if (BATextend(rl, newcap) != 
GDK_SUCCEED ||             \
+                                               (rr && BATextend(rr, newcap) != 
GDK_SUCCEED)) { \
+                                               msg = createException(MAL, 
fname, SQLSTATE(HY013) MAL_MALLOC_FAIL);     \
                                                goto exit;                      
                                                        \
                                        }                                       
                                                                \
-                                       assert(!r2 || BATcapacity(r1) == 
BATcapacity(r2));      \
+                                       assert(!rr || BATcapacity(rl) == 
BATcapacity(rr));      \
                                }                                               
                                                                \
-                               if (BATcount(r1) > 0) {                         
                                \
+                               if (BATcount(rl) > 0) {                         
                                \
                                        if (lastl + 1 != lo)                    
                                        \
-                                               r1->tseqbase = oid_nil;         
                                \
-                                       if (nl == 0) {                          
                                                \
-                                               if (r2)                         
                                                \
-                                                       r2->trevsorted = false; 
                                \
+                                               rl->tseqbase = oid_nil;         
                                \
+                                       if (matches == 0) {                     
                                        \
+                                               if (rr)                         
                                                \
+                                                       rr->trevsorted = false; 
                                \
                                                if (lastl > lo) {               
                                                \
-                                                       r1->tsorted = false;    
                                        \
-                                                       r1->tkey = false;       
                                                \
+                                                       rl->tsorted = false;    
                                        \
+                                                       rl->tkey = false;       
                                                \
                                                } else if (lastl < lo) {        
                                        \
-                                                       r1->trevsorted = false; 
                                \
+                                                       rl->trevsorted = false; 
                                \
                                                } else {                        
                                                        \
-                                                       r1->tkey = false;       
                                                \
+                                                       rl->tkey = false;       
                                                \
                                                }                               
                                                                \
                                        }                                       
                                                                \
                                }                                               
                                                                \
-                               APPEND(r1, lo);                                 
                                        \
-                               if (r2)                                         
                                                \
-                                       APPEND(r2, ro);                         
                                        \
+                               APPEND(rl, lo);                                 
                                        \
+                               if (rr)                                         
                                                \
+                                       APPEND(rr, ro);                         
                                        \
                                lastl = lo;                                     
                                                \
-                               nl++;                                           
                                                        \
+                               matches++;                                      
                                                        \
                        }                                                       
                                                                \
                        BBPreclaim(filtered_sl);                                
                                        \
-                       if (r2) {                                               
                                                        \
-                               if (nl > 1) {                                   
                                                \
-                                       r2->tkey = false;                       
                                                \
-                                       r2->tseqbase = oid_nil;                 
                                \
-                                       r1->trevsorted = false;                 
                                \
-                               } else if (nl == 0) {                           
                                        \
-                                       rskipped = BATcount(r2) > 0;            
                                \
+                       if (rr) {                                               
                                                        \
+                               if (matches > 1) {                              
                                                \
+                                       rr->tkey = false;                       
                                                \
+                                       rr->tseqbase = oid_nil;                 
                                \
+                                       rl->trevsorted = false;                 
                                \
+                               } else if (matches == 0) {                      
                                        \
+                                       rskipped = BATcount(rr) > 0;            
                                \
                                } else if (rskipped) {                          
                                        \
-                                       r2->tseqbase = oid_nil;                 
                                \
+                                       rr->tseqbase = oid_nil;                 
                                \
                                }                                               
                                                                \
-                       } else if (nl > 1) {                                    
                                        \
-                               r1->trevsorted = false;                         
                                \
+                       } else if (matches > 1) {                               
                                        \
+                               rl->trevsorted = false;                         
                                \
                        }                                                       
                                                                \
                }                                                               
                                                                \
        } while (0)
 
-#define str_antijoin_loop(STRCMP, STR_LEN)                                     
                        \
+#define STR_JOIN_NESTED_LOOP(STR_CMP, STR_LEN, FNAME)                          
        \
        do {                                                                    
                                                        \
+               canditer_init(&rci, r, cr);                                     
                                \
+               for (BUN ridx = 0; ridx < rci.ncand; ridx++) {                  
                \
+                       GDK_CHECK_TIMEOUT(timeoffset, counter, 
GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
+                       ro = canditer_next(&rci);                               
                                        \
+                       vr = VALUE(r, ro - rbase);                              
                                        \
+                       if (strNil(vr))                                         
                                        \
+                               continue;                                       
                                                        \
+                       vr_len = STR_LEN;                                       
                                                \
+                       matches = 0;                                            
                                                \
+                       canditer_init(&lci, l, cl);                             
                                \
+                       for (BUN lidx = 0; lidx < lci.ncand; lidx++) {          
                \
+                               lo = canditer_next(&lci);                       
                                        \
+                               vl = VALUE(l, lo - lbase);                      
                                        \
+                               if (strNil(vl))                                 
                                        \
+                                       continue;                               
                                                        \
+                               if (STR_CMP)                                    
                                                \
+                                       continue;                               
                                                        \
+                               if (BATcount(rl) == BATcapacity(rl)) {          
                        \
+                                       newcap = BATgrows(rl);                  
                                        \
+                                       BATsetcount(rl, BATcount(rl));          
                                \
+                                       if (rr)                                 
                                                \
+                                               BATsetcount(rr, BATcount(rr));  
                                \
+                                       if (BATextend(rl, newcap) != 
GDK_SUCCEED ||             \
+                                               (rr && BATextend(rr, newcap) != 
GDK_SUCCEED)) { \
+                                               msg = createException(MAL, 
FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
+                                               goto exit;                      
                                                        \
+                                       }                                       
                                                                \
+                                       assert(!rr || BATcapacity(rl) == 
BATcapacity(rr));      \
+                               }                                               
                                                                \
+                               if (BATcount(rl) > 0) {                         
                                \
+                                       if (last_lo + 1 != lo)                  
                                        \
+                                               rl->tseqbase = oid_nil;         
                                \
+                                       if (matches == 0) {                     
                                        \
+                                               if (rr)                         
                                                \
+                                                       rr->trevsorted = false; 
                                \
+                                               if (last_lo > lo) {             
                                        \
+                                                       rl->tsorted = false;    
                                        \
+                                                       rl->tkey = false;       
                                                \
+                                               } else if (last_lo < lo) {      
                                        \
+                                                       rl->trevsorted = false; 
                                \
+                                               } else {                        
                                                        \
+                                                       rl->tkey = false;       
                                                \
+                                               }                               
                                                                \
+                                       }                                       
                                                                \
+                               }                                               
                                                                \
+                               APPEND(rl, lo);                                 
                                        \
+                               if (rr)                                         
                                                \
+                                       APPEND(rr, ro);                         
                                        \
+                               last_lo = lo;                                   
                                                \
+                               matches++;                                      
                                                        \
+                       }                                                       
                                                                \
+                       if (rr) {                                               
                                                        \
+                               if (matches > 1) {                              
                                                \
+                                       rr->tkey = false;                       
                                                \
+                                       rr->tseqbase = oid_nil;                 
                                \
+                                       rl->trevsorted = false;                 
                                \
+                               } else if (matches == 0) {                      
                                        \
+                                       rskipped = BATcount(rr) > 0;            
                                \
+                               } else if (rskipped) {                          
                                        \
+                                       rr->tseqbase = oid_nil;                 
                                \
+                               }                                               
                                                                \
+                       } else if (matches > 1) {                               
                                        \
+                               rl->trevsorted = false;                         
                                \
+                       }                                                       
                                                                \
+               }                                                               
                                                                \
+       } while (0)
+
+#define STARTSWITH_SORTED_LOOP(STR_CMP, STR_LEN, FNAME)                        
        \
+       do {                                                                    
                                                        \
+               canditer_init(&rci, sorted_r, sorted_cr);                       
                        \
                for (BUN ridx = 0; ridx < rci.ncand; ridx++) {                  
                \
                        GDK_CHECK_TIMEOUT(timeoffset, counter, 
GOTO_LABEL_TIMEOUT_HANDLER(exit)); \
                        ro = canditer_next(&rci);                               
                                        \
                        vr = VALUE(r, ro - rbase);                              
                                        \
-                       rlen = STR_LEN;                                         
                                        \
-                       nl = 0;                                                 
                                                \
-                       canditer_init(&lci, l, sl);                             
                                \
+                       if (strNil(vr))                                         
                                        \
+                               continue;                                       
                                                        \
+                       vr_len = STR_LEN;                                       
                                                \
+                       matches = 0;                                            
                                                \
+                       canditer_init(&lci, sorted_l, sorted_cl);               
                        \
                        for (BUN lidx = 0; lidx < lci.ncand; lidx++) {          
                \
                                lo = canditer_next(&lci);                       
                                        \
                                vl = VALUE(l, lo - lbase);                      
                                        \
-                               if (strNil(vl)) {                               
                                                \
+                               if (strNil(vl))                                 
                                        \
                                        continue;                               
                                                        \
-                               } else if (!(STRCMP)) {                         
                                \
+                               cmp = STR_CMP;                                  
                                                \
+                               if (cmp < 0)                                    
                                                \
                                        continue;                               
                                                        \
-                               }                                               
                                                                \
-                               if (BATcount(r1) == BATcapacity(r1)) {          
                        \
-                                       newcap = BATgrows(r1);                  
                                        \
-                                       BATsetcount(r1, BATcount(r1));          
                                \
-                                       if (r2)                                 
                                                \
-                                               BATsetcount(r2, BATcount(r2));  
                                \
-                                       if (BATextend(r1, newcap) != 
GDK_SUCCEED ||             \
-                                               (r2 && BATextend(r2, newcap) != 
GDK_SUCCEED)) { \
-                                               msg = createException(MAL, 
"str.%s", fname, SQLSTATE(HY013) MAL_MALLOC_FAIL); \
+                               else if (cmp > 0)                               
                                                \
+                                       break;                                  
                                                        \
+                               if (BATcount(rl) == BATcapacity(rl)) {          
                        \
+                                       newcap = BATgrows(rl);                  
                                        \
+                                       BATsetcount(rl, BATcount(rl));          
                                \
+                                       if (rr)                                 
                                                \
+                                               BATsetcount(rr, BATcount(rr));  
                                \
+                                       if (BATextend(rl, newcap) != 
GDK_SUCCEED ||             \
+                                               (rr && BATextend(rr, newcap) != 
GDK_SUCCEED)) { \
+                                               msg = createException(MAL, 
FNAME, SQLSTATE(HY013) MAL_MALLOC_FAIL);     \
                                                goto exit;                      
                                                        \
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to