Update of /cvsroot/monetdb/MonetDB5/src/modules/kernel
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv5179/src/modules/kernel

Modified Files:
        algebra.mx batstr.mx 
Log Message:
(re)introduced module batstr (as opt_remap needs simple translation from
single value at a time module into bluk version module name). Example
str.trim into batstr.trim. This make a lot of functions in sql a lot faster.

Fixed a couple of variable atom referencing bugs in batstr.mx

added a mal function for the new rangejoin to algebra.mx 

opt_mergetable got lots of fixes


Index: batstr.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/modules/kernel/batstr.mx,v
retrieving revision 1.54
retrieving revision 1.55
diff -u -d -r1.54 -r1.55
--- batstr.mx   26 Nov 2007 12:03:24 -0000      1.54
+++ batstr.mx   21 Dec 2007 15:29:35 -0000      1.55
@@ -28,18 +28,16 @@
 the GRPsplit operation.
 @{
 @mal
-command batcalc.length( s:bat[:oid,:str] ) :bat[:oid,:int]
+module batstr;
+
+command batstr.length( s:bat[:oid,:str] ) :bat[:oid,:int]
 address STRbatLength
 comment "Return the length of a string.";
 
-command batcalc.nbytes( s:bat[:oid,:str] ) :bat[:oid,:int]
+command batstr.nbytes( s:bat[:oid,:str] ) :bat[:oid,:int]
 address STRbatBytes
 comment "Return the string length in bytes.";
 
-command batcalc.match(b:bat[:oid,:str], pat:str):bat[:oid,:bit]
-address STRbatmatchCst
-comment "POSIX pattern matching against a string BAT";
-
 command batcalc.==( l:bat[:oid,:str], r:bat[:oid,:str]) :bat[:oid,:bit]
 address STRbatEqual
 comment "Equate a bat of strings against each other";
@@ -54,48 +52,48 @@
 address STRbatNotEqualCst
 comment "Equate a bat of strings against a singleton";
 
-command batcalc.toLower( s:bat[:oid,:str] ) :bat[:oid,:str]
+command batstr.toLower( s:bat[:oid,:str] ) :bat[:oid,:str]
 address STRbatLower
 comment "Convert a string to lower case.";
-command batcalc.toUpper( s:bat[:oid,:str] ) :bat[:oid,:str]
+command batstr.toUpper( s:bat[:oid,:str] ) :bat[:oid,:str]
 address STRbatUpper
 comment "Convert a string to upper case.";
 
-command batcalc.trim( s:bat[:oid,:str] ) :bat[:oid,:str]
+command batstr.trim( s:bat[:oid,:str] ) :bat[:oid,:str]
 address STRbatStrip
 comment "Strip whitespaces around a string.";
-command batcalc.ltrim( s:bat[:oid,:str] ) :bat[:oid,:str]
+command batstr.ltrim( s:bat[:oid,:str] ) :bat[:oid,:str]
 address STRbatLtrim
 comment "Strip whitespaces from start of a string.";
-command batcalc.rtrim( s:bat[:oid,:str] ) :bat[:oid,:str]
+command batstr.rtrim( s:bat[:oid,:str] ) :bat[:oid,:str]
 address STRbatRtrim
 comment "Strip whitespaces from end of a string.";
 
-command 
batcalc.startsWith(s:bat[:oid,:str],prefix:bat[:oid,:str]):bat[:oid,:bit]
+command 
batstr.startsWith(s:bat[:oid,:str],prefix:bat[:oid,:str]):bat[:oid,:bit]
 address STRbatPrefix
 comment "Prefix check.";
-command batcalc.startsWith(s:bat[:oid,:str],prefix:str):bat[:oid,:bit]
+command batstr.startsWith(s:bat[:oid,:str],prefix:str):bat[:oid,:bit]
 address STRbatPrefixcst
 comment "Prefix check.";
 
-command batcalc.endsWith( s:bat[:oid,:str], suffix:bat[:oid,:str] ) 
:bat[:oid,:bit]
+command batstr.endsWith( s:bat[:oid,:str], suffix:bat[:oid,:str] ) 
:bat[:oid,:bit]
 address STRbatSuffix
 comment "Suffix check.";
-command batcalc.endsWith( s:bat[:oid,:str], suffix:str ) :bat[:oid,:bit]
+command batstr.endsWith( s:bat[:oid,:str], suffix:str ) :bat[:oid,:bit]
 address STRbatSuffixcst
 comment "Suffix check.";
 
-command batcalc.search( s:bat[:oid,:str], c:bat[:oid,:str] ) :bat[:oid,:int]
+command batstr.search( s:bat[:oid,:str], c:bat[:oid,:str] ) :bat[:oid,:int]
 address STRbatstrSearch
 comment "Search for a substring. Returns position, -1 if not found.";
-command batcalc.search( s:bat[:oid,:str], c:str ) :bat[:oid,:int]
+command batstr.search( s:bat[:oid,:str], c:str ) :bat[:oid,:int]
 address STRbatstrSearchcst
 comment "Search for a substring. Returns position, -1 if not found.";
 
-command batcalc.r_search( s:bat[:oid,:str], c:bat[:oid,:str] ) :bat[:oid,:int]
+command batstr.r_search( s:bat[:oid,:str], c:bat[:oid,:str] ) :bat[:oid,:int]
 address STRbatRstrSearch
 comment "Reverse search for a substring. Returns position, -1 if not found.";
-command batcalc.r_search( s:bat[:oid,:str], c:str ) :bat[:oid,:int]
+command batstr.r_search( s:bat[:oid,:str], c:str ) :bat[:oid,:int]
 address STRbatRstrSearchcst
 comment "Reverse search for a substring. Returns position, -1 if not found.";
 
@@ -109,38 +107,38 @@
 address STRcstConcatbat
 comment "Concatenate two strings.";
 
-command batcalc.string(b:bat[:oid,:str],offset:bat[:oid,:int]) :bat[:oid,:str]
+command batstr.string(b:bat[:oid,:str],offset:bat[:oid,:int]) :bat[:oid,:str]
 address STRbatTail
 comment "Return the tail s[offset..n] of a string s[0..n].";
-command batcalc.string(b:bat[:oid,:str],offset:int) :bat[:oid,:str]
+command batstr.string(b:bat[:oid,:str],offset:int) :bat[:oid,:str]
 address STRbatTailcst
 comment "Return the tail s[offset..n] of a string s[0..n].";
 
-command batcalc.chrAt( s:bat[:oid,:str], index:bat[:oid,:int]) :bat[:oid,:chr]
+command batstr.chrAt( s:bat[:oid,:str], index:bat[:oid,:int]) :bat[:oid,:chr]
 address STRbatChrAt
 comment "String array lookup operation.";
-command batcalc.chrAt( s:bat[:oid,:str], index:int) :bat[:oid,:chr]
+command batstr.chrAt( s:bat[:oid,:str], index:int) :bat[:oid,:chr]
 address STRbatChrAtcst
 comment "String array lookup operation.";
 
-command batcalc.substring( s:bat[:oid,:str], start:bat[:oid,:int], 
+command batstr.substring( s:bat[:oid,:str], start:bat[:oid,:int], 
        index:bat[:oid,:int]) :bat[:oid,:str]
 address STRbatsubstring
 comment "Substring extraction using [start,start+length]";
 
-command batcalc.substring( s:bat[:oid,:str], start:int, index:int)
+command batstr.substring( s:bat[:oid,:str], start:int, index:int)
        :bat[:oid,:str]
 address STRbatsubstringcst
 comment "Substring extraction using [start,start+length]";
 
-command batcalc.unicodeAt(s:bat[:oid,:str], index:bat[:oid,:int]) 
:bat[:oid,:int]
+command batstr.unicodeAt(s:bat[:oid,:str], index:bat[:oid,:int]) 
:bat[:oid,:int]
 address STRbatWChrAt
 comment "get a unicode character (as an int) from a string position.";
-command batcalc.unicodeAt(s:bat[:oid,:str], index:int) :bat[:oid,:int]
+command batstr.unicodeAt(s:bat[:oid,:str], index:int) :bat[:oid,:int]
 address STRbatWChrAtcst
 comment "get a unicode character (as an int) from a string position.";
 
-command 
batcalc.substitute(s:bat[:oid,:str],src:str,dst:str,rep:bit):bat[:oid,:str]
+command 
batstr.substitute(s:bat[:oid,:str],src:str,dst:str,rep:bit):bat[:oid,:str]
 address STRbatSubstitutecst
 comment "Substitute first occurrence of 'src' by
        'dst'.  Iff repeated = true this is
@@ -150,11 +148,11 @@
        size, repeating is only done iff src is
        not a substring of dst.";
 
-command batcalc.like(s:bat[:oid,:str],pat:str):bat[:oid,:oid]
+command batstr.like(s:bat[:oid,:str],pat:str):bat[:oid,:oid]
 address STRbatlike2
 comment "Perform SQL like operation against a string bat";
 
-command batcalc.like(s:bat[:oid,:str],pat:str,esc:str):bat[:oid,:oid]
+command batstr.like(s:bat[:oid,:str],pat:str,esc:str):bat[:oid,:oid]
 address STRbatlike
 comment "Perform SQL like operation against a string bat";
 @+ Implementation
@@ -165,9 +163,6 @@
 #include <string.h>
 #include "mal_exception.h"
 #include "str.h"
-#ifdef HAVE_REGEX_H
-#include <regex.h>
-#endif
 
 #ifdef HAVE_LANGINFO_H
 #include <langinfo.h>
@@ -223,9 +218,6 @@
        BBPkeepref(*(X));\
        BBPreleaseref(Z->batCacheid);
 
-batstr_export str STRbatsubstringcst(int *ret, int *bid, int *start, int 
*length);
-batstr_export str STRbatsubstring(int *ret, int *bid, int *start, int *length);
-
 @= BATint
 batstr_export str [EMAIL PROTECTED](int *ret, int *l);
 str [EMAIL PROTECTED](int *ret, int *l)
@@ -270,10 +262,10 @@
        BAT *bn, *b;
        BUN p,q;
        @3 x;
-       @4 y, *yp = &y;
+       str y, *yp = &y;
 
        prepareOperand(b,l,"@1");
-       prepareResult(bn,b,[EMAIL PROTECTED],"@1");
+       prepareResult(bn,b,TYPE_str,"@1");
 
        bi = bat_iterator(b);
 
@@ -282,11 +274,12 @@
 
                x = (@3) BUNtail(bi,p);
                if (x== 0 || *x == 0 || strcmp(x,@3_nil)== 0)
-                       y = (@4)@4_nil;
+                       y = (str)str_nil;
                else 
                        @2(yp,x);
-               bunfastins(bn, h, *yp);
-               GDKfree(*yp);
+               bunfastins(bn, h, y);
+               if (y != str_nil)
+                       GDKfree(y);
        }
        finalizeResult(ret,bn,b);
        return MAL_SUCCEED;
@@ -296,11 +289,11 @@
        throw(MAL, "[EMAIL PROTECTED]", "bunins failed");
 }
 @c
-@:BATstr(Lower,strLower,str,str)@
-@:BATstr(Upper,strUpper,str,str)@
-@:BATstr(Strip,strStrip,str,str)@
-@:BATstr(Ltrim,strLtrim,str,str)@
-@:BATstr(Rtrim,strRtrim,str,str)@
+@:BATstr(Lower,strLower,str)@
+@:BATstr(Upper,strUpper,str)@
+@:BATstr(Strip,strStrip,str)@
+@:BATstr(Ltrim,strLtrim,str)@
+@:BATstr(Rtrim,strRtrim,str)@
 
 @-
 @c
@@ -348,7 +341,7 @@
 implementation for shifted window arithmetic as well.
 @= chkSize
        if( BATcount(@1) != BATcount(@2) )
-       throw(MAL, "[EMAIL PROTECTED]", "requires bats of identical size");
+       throw(MAL, "[EMAIL PROTECTED]", "requires bats of identical size");
 @c
 @= STRbatcmp
 batstr_export str [EMAIL PROTECTED](int *ret, int *l, int *r);
@@ -413,6 +406,7 @@
                ptr tr = BUNtail(righti,p);
                @4(vp, tl, tr);
                bunfastins(bn, h, vp);
+               @7;
        }
        BBPreleaseref(right->batCacheid);
        finalizeResult(ret,bn,left);
@@ -443,6 +437,7 @@
                ptr tl = BUNtail(lefti,p);
                @4(vp, tl, cst);
                bunfastins(bn, h, vp);
+               @7;
        }
        finalizeResult(ret,bn,left);
        return MAL_SUCCEED;
@@ -474,6 +469,7 @@
                str tr = (str) BUNtail(righti,p);
                @4(vp, &tl, &tr);
                bunfastins(bn, h, @6);
+               @7;
        }
        BBPreleaseref(right->batCacheid);
        finalizeResult(ret,bn,left);
@@ -504,6 +500,7 @@
                str tl = (str) BUNtail(lefti,p);
                @4(vp, &tl, cst);
                bunfastins(bn, h, @6);
+               @7;
        }
        finalizeResult(ret,bn,left);
        return MAL_SUCCEED;
@@ -532,6 +529,7 @@
                str tr = (str) BUNtail(righti,p);
                @4(vp, cst, &tr);
                bunfastins(bn, h, @6);
+               @7;
        }
        finalizeResult(ret,bn,right);
        return MAL_SUCCEED;
@@ -542,14 +540,14 @@
        throw(MAL, "batstr"@5, "bunins failed");
 }
 @c
-@:binarySTRstr(Prefix,bit,str,STRPrefix,"prefix",vp)@
-@:binarySTRstr(Suffix,bit,str,STRSuffix,"suffix",vp)@
-@:binarySTRstr(strSearch,int,str,STRstrSearch,"search",vp)@
-@:binarySTRstr(RstrSearch,int,str,STRReverseStrSearch,"r_search",vp)@
-@:binarySTRstr(Concat,str,str,STRConcat,"+",v)@
-@:binarySTR(Tail,str,int,strTail,"tail")@
-@:binarySTR(WChrAt,int,int,strWChrAt,"chrAt")@
-@:binarySTR(ChrAt,chr,int,strChrAt,"chrAt")@
+@:binarySTRstr(Prefix,bit,str,STRPrefix,"prefix",vp,)@
+@:binarySTRstr(Suffix,bit,str,STRSuffix,"suffix",vp,)@
+@:binarySTRstr(strSearch,int,str,STRstrSearch,"search",vp,)@
+@:binarySTRstr(RstrSearch,int,str,STRReverseStrSearch,"r_search",vp,)@
+@:binarySTRstr(Concat,str,str,STRConcat,"+",v,GDKfree(v))@
+@:binarySTR(Tail,str,int,strTail,"tail",v,GDKfree(v))@
+@:binarySTR(WChrAt,int,int,strWChrAt,"chrAt",vp,)@
+@:binarySTR(ChrAt,chr,int,strChrAt,"chrAt",vp,)@
 
 batstr_export str STRbatSubstitutecst(int *ret, int *l, str *arg2, str *arg3, 
bit *rep);
 str
@@ -570,11 +568,14 @@
                ptr h = BUNhead(bi, p);
 
                x = (str) BUNtail(bi, p);
-               if (x == 0 || *x == 0 || strcmp(x, str_nil) == 0)
+               if (x == 0 || *x == 0 || strcmp(x, str_nil) == 0) {
                        y = (str)str_nil;
-               else
+               } else {
                        STRSubstitute(yp, xp, arg2, arg3, rep);
-               bunfastins(bn, h, yp);
+               }
+               bunfastins(bn, h, y);
+               if (y != str_nil)
+                       GDKfree(yp);
        }
        finalizeResult(ret, bn, b);
        return MAL_SUCCEED;
@@ -584,56 +585,6 @@
        throw(MAL, "batstr.subString", "bunins failed");
 }
 
-batstr_export str STRbatmatchCst(int *ret, int *bid, str *pat);
-str
-STRbatmatchCst(int *ret, int *bid, str *pat)
-{
-#ifdef HAVE_REGEX_H
-       regex_t prg;
-       BATiter bi;
-       BAT *b, *bn;
-       int tpe;
-       BUN p, q;
-       bit bnil = bit_nil;
-       ptr nilptr;
-       int error = regcomp(&prg, *pat, REG_EXTENDED | REG_NOSUB);
-
-       if (error < 0)
-               throw(MAL, "str.match", "Compilation of regular expression 
failed");
-
-       prepareOperand(b, bid, "match");
-       bn = BATnew(BAThtype(b), TYPE_bit, BATcount(b));
-
-       tpe = BATttype(b);
-       nilptr = ATOMnilptr(tpe);
-       bi = bat_iterator(b);
-       BATloop(b, p, q) {
-               ptr h = BUNhead(bi, p);
-               ptr t = BUNtail(bi, p);
-
-               if (ATOMcmp(tpe, t, nilptr) == 0) {
-                       bunfastins(bn, h, &bnil);
-               } else {
-                       bit match = regexec(&prg, t, 0, 0, 0) == 0;
-
-                       bunfastins(bn, h, &match);
-               }
-       }
-bunins_failed:
-       bn->hsorted = b->hsorted;
-       bn->tsorted = 0;
-       if (!(bn->batDirty&2)) bn = BATsetaccess(bn, BAT_READ); 
-       *ret = bn->batCacheid;
-       BBPkeepref(bn->batCacheid);
-       BBPreleaseref(b->batCacheid);
-       return MAL_SUCCEED;
-#else
-       (void) ret;
-       (void) bid;
-       (void) pat;
-       throw(MAL, "str.match", "No implementation available");
-#endif
-}
 @-
 The pattern matching routine is optimized for SQL pattern structures.
 @c
@@ -647,6 +598,7 @@
        BAT *b,*bn;
        BUN p, q;
        oid o = oid_nil;
+
        if( (b= BATdescriptor(*bid)) == NULL)
                throw(MAL, "batstr.like","Can not find BAT");
        bn= BATnew(BAThtype(b),TYPE_void, BATcount(b)/10+5);
@@ -660,8 +612,8 @@
                ptr h = BUNhead(bi, p);
                ptr t = BUNtail(bi, p);
 
-               if ( STRlike((str) t, *pat, *esc) )
-                       bunfastins(bn, h,&o);
+               if (STRlike((str) t, *pat, *esc)) 
+                       bunfastins(bn, h, &o);
        }
 bunins_failed:
        if (!(bn->batDirty&2)) bn = BATsetaccess(bn, BAT_READ); 
@@ -689,10 +641,11 @@
        BUN p, q;
        str res;
        oid o=oid_nil;
+       char *msg = MAL_SUCCEED;
 
        if( (b= BATdescriptor(*bid)) == NULL)
-               throw(MAL, "batstr.like","Can not find BAT");
-       bn= BATnew(BAThtype(b),TYPE_void, BATcount(b)/10+5);
+               throw(MAL, "batstr.substring","Can not find BAT");
+       bn= BATnew(BAThtype(b),TYPE_str, BATcount(b)/10+5);
        BATseqbase(BATmirror(b),o);
        bn->hsorted = b->hsorted;
        bn->tsorted = b->tsorted;
@@ -702,16 +655,19 @@
                ptr h = BUNhead(bi, p);
                str t =  (str) BUNtail(bi, p);
 
-               if ( STRsubstring(&res, &t, start, length) )
-                       bunfastins(bn, h, (ptr)&res);
+               if ((msg=STRsubstring(&res, &t, start, length))) 
+                       goto bunins_failed;
+               bunfastins(bn, h, (ptr)res);
+               GDKfree(res);
        }
 bunins_failed:
        if (!(bn->batDirty&2)) bn = BATsetaccess(bn, BAT_READ); 
        *ret = bn->batCacheid;
        BBPkeepref(bn->batCacheid);
        BBPreleaseref(b->batCacheid);
-       return MAL_SUCCEED;
+       return msg;
 }
+
 batstr_export str STRbatsubstring(int *ret, int *l, int *r, int *t);
 str STRbatsubstring(int *ret, int *l, int *r, int *t)
 {   
@@ -754,7 +710,8 @@
                int *t1 = (int *) BUNtail(starti,p);
                int *t2 = (int *) BUNtail(lengthi,p);
                STRsubstring(vp, &tl, t1, t2);
-               bunfastins(bn, h, vp);
+               bunfastins(bn, h, *vp);
+               GDKfree(*vp);
        }
        BBPreleaseref(start->batCacheid);
        BBPreleaseref(length->batCacheid);

Index: algebra.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/modules/kernel/algebra.mx,v
retrieving revision 1.188
retrieving revision 1.189
diff -u -d -r1.188 -r1.189
--- algebra.mx  13 Dec 2007 20:22:43 -0000      1.188
+++ algebra.mx  21 Dec 2007 15:29:35 -0000      1.189
@@ -495,6 +495,10 @@
                if the left-tail value is within the range [right-head - minus, 
                right-head + plus]. Works only for the builtin numerical types, 
                and their derivates.";
+
+command join(l:bat[:any_1,:any_2], rl:bat[:any_3,:any_2], 
rh:bat[:any_3,:any_2], li:bit, hi:bit) :bat[:any_1,:any_3] 
+address ALGrangejoin;
+
 @+ Projection operations
 @mal
 command project(b:bat[:any_1,:any_2]) :bat[:any_1,:oid]
@@ -920,6 +924,7 @@
 algebra_export str ALGthetajoinEstimate(int *result, int *lid, int *rid, int 
*opc, lng *estimate);
 algebra_export str ALGthetajoin(int *result, int *lid, int *rid, int *opc);
 algebra_export str ALGbandjoin(int *result, int *lid, int *rid, ptr *minus, 
ptr *plus);
+algebra_export str ALGrangejoin(int *result, int *lid, int *rlid, int *rhid, 
bit *li, bit *hi);
 
 @= ALGunaryExport
 algebra_export str [EMAIL PROTECTED](int *result, int *bid);
@@ -1031,7 +1036,7 @@
 @c
 #include "mal_config.h"
 #include "algebra.h"
-
+#include "gdk_rangejoin.h"
 
 @* Command Implementations in C
 This module contains just a wrapper implementations; since all described
@@ -2607,6 +2612,36 @@
        throw(MAL, "algebra.bandjoin", "GDKerror");
 }
 
+str 
+ALGrangejoin(int *result, int *lid, int *rlid, int *rhid, bit *li, bit *hi)
+{
+       BAT *left, *rightl, *righth, *bn = NULL;
+
+       if ((left = BATdescriptor(*lid)) == NULL) {
+               throw(MAL, "algebra.rangejoin", "Cannot access descriptor");
+       }
+       if ((rightl = BATdescriptor(*rlid)) == NULL) {
+               BBPreleaseref(left->batCacheid);
+               throw(MAL, "algebra.rangejoin", "Cannot access descriptor");
+       }
+       if ((righth = BATdescriptor(*rhid)) == NULL) {
+               BBPreleaseref(left->batCacheid);
+               BBPreleaseref(rightl->batCacheid);
+               throw(MAL, "algebra.rangejoin", "Cannot access descriptor");
+       }
+       bn = BATrangejoin(left, rightl, righth, *li, *hi);
+       BBPreleaseref(left->batCacheid);
+       BBPreleaseref(rightl->batCacheid);
+       BBPreleaseref(righth->batCacheid);
+       if (bn) {
+               if (!(bn->batDirty&2)) bn = BATsetaccess(bn, BAT_READ);
+               *result = bn->batCacheid;
+               BBPkeepref(*result);
+               return MAL_SUCCEED;
+       }
+       throw(MAL, "algebra.rangejoin", "GDKerror");
+}
+
 @-
 Let cut this text down with some Mx macro's
 @= ALGunary


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to