Update of /cvsroot/monetdb/MonetDB5/src/optimizer
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv26037
Modified Files:
opt_mergetable.mx
Log Message:
first steps of partitioned storage for sql
Index: opt_mergetable.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/optimizer/opt_mergetable.mx,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -d -r1.18 -r1.19
--- opt_mergetable.mx 28 Aug 2007 20:06:55 -0000 1.18
+++ opt_mergetable.mx 6 Nov 2007 18:09:11 -0000 1.19
@@ -138,8 +138,7 @@
#include "mal_interpreter.h"
#include "mal_builder.h"
-/* #define DEBUG_OPT_MERGETABLE show partial result */
-#define BPMtest
+/*#define DEBUG_OPT_MERGETABLE show partial result */
@c
#include "mal_config.h"
#include "opt_mergetable.h"
@@ -152,6 +151,95 @@
if( mvar[i]== idx) return i;
return -1;
}
+
+INLINE static int
+mat_add(InstrPtr *mat, int *mvar, int mtop, InstrPtr q)
+{
+ mat[mtop] = q;
+ mvar[mtop] = getArg(q,0);
+ return mtop+1;
+}
+
+/* todo make general re-name func function */
+static InstrPtr
+mat_union(MalBlkPtr mb, InstrPtr p, InstrPtr *mat, int m)
+{
+ int k;
+ InstrPtr r = newInstruction(mb, ASSIGNsymbol);
+
+ setModuleId(r,matRef);
+ setFunctionId(r,newRef);
+ getArg(r,0) = getArg(p,0);
+
+ for(k=1; k<mat[m]->argc; k++) {
+ r = pushArgument(mb, r, getArg(mat[m],k));
+ }
+ r = pushArgument(mb, r, getArg(p,2));
+ freeInstruction(p);
+ pushInstruction(mb, r);
+ return r;
+}
+
+static InstrPtr
+mat_mark(MalBlkPtr mb, InstrPtr p, InstrPtr *mat, int m)
+{
+ int tpe = getArgType(mb,p,0);
+ int k,o = newTmpVariable(mb,TYPE_oid);
+ InstrPtr q, r = newInstruction(mb, ASSIGNsymbol);
+
+ getArg(r,0) = o;
+ /* get the mark base id */
+ r = pushArgument(mb, r, getArg(p, 2));
+ pushInstruction(mb, r);
+
+ r = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(r,matRef);
+ setFunctionId(r,newRef);
+ getArg(r,0) = getArg(p,0);
+
+ for(k=1; k<mat[m]->argc; k++) {
+ q = copyInstruction(p);
+ getArg(q,0) = newTmpVariable(mb, tpe);
+ getArg(q,1) = getArg(mat[m],k);
+ getArg(q,2) = o;
+ pushInstruction(mb,q);
+
+ /* add result to mat */
+ r = pushArgument(mb,r,getArg(q,0));
+
+ /* increment oid */
+ if (k < mat[m]->argc-1) {
+ InstrPtr ca, c, bc = newInstruction(mb, ASSIGNsymbol);
+
+ /* bc = bat.count */
+ setModuleId(bc, aggrRef);
+ setFunctionId(bc, countRef);
+ getArg(bc, 0) = newTmpVariable(mb, TYPE_int);
+ bc = pushArgument(mb, bc, getArg(mat[m],k));
+ pushInstruction(mb, bc);
+
+ /* convert int to oid */
+ c = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(c, calcRef);
+ setFunctionId(c, oidRef);
+ getArg(c, 0) = newTmpVariable(mb, TYPE_oid);
+ c = pushArgument(mb, c, getArg(bc,0));
+ pushInstruction(mb, c);
+
+ /* ca = calc.add */
+ ca = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(ca, calcRef);
+ setFunctionId(ca, plusRef);
+ getArg(ca, 0) = o;
+ ca = pushArgument(mb, ca, o);
+ ca = pushArgument(mb, ca, getArg(c, 0));
+ pushInstruction(mb, ca);
+ }
+ }
+ freeInstruction(p);
+ pushInstruction(mb, r);
+ return r;
+}
@-
Packing the BATs into a single one is handled here
Note that MATs can be defined recursively.
@@ -159,16 +247,17 @@
static InstrPtr
MATpackAll(MalBlkPtr mb, InstrPtr r, int m, InstrPtr *mat, int *mvar, int
*mtop){
int j,l,k;
- r = newInstruction(mb, ASSIGNsymbol);
if( mat[m]->argc-mat[m]->retc ==1){
/* simple assignment is sufficient */
+ r = newInstruction(mb, ASSIGNsymbol);
getArg(r,0)= getArg(mat[m],0);
getArg(r,1)= getArg(mat[m],1);
} else{
- if( r== NULL){
+ if (r == NULL){
+ r = newInstruction(mb, ASSIGNsymbol);
setModuleId(r,matRef);
setFunctionId(r,packRef);
- getArg(r,0)= getArg(mat[m],0);
+ getArg(r,0) = getArg(mat[m],0);
}
for(l=mat[m]->retc; l< mat[m]->argc; l++){
k= isMATalias( getArg(mat[m],l),mvar,*mtop);
@@ -207,123 +296,52 @@
}
return cnt;
}
[EMAIL PROTECTED]
-A sql.bind() operation should be trapped to inspect the partition catalog
-for definitions. For now we simulate a partitioned table, comprised
-of the original base and an empty one.
[EMAIL PROTECTED]
-static InstrPtr
-MATpartitionTableDummy(MalBlkPtr mb, InstrPtr p)
-{
- int v= getArg(p,0);
- InstrPtr q;
-#ifndef BPMtest
- InstrPtr r,p2;
-
- freezeVarType(mb,getArg(p,0));
-#ifdef DEBUG_OPT_MERGETABLE
- stream_printf(GDKout,"found bind\n");
- printInstruction(GDKout,mb,p,0);
-#endif
- pushInstruction(mb,p);
-
- p2= newStmt(mb,batRef,newRef);
- setVarType(mb,getArg(p2,0),getVarType(mb,v));
- p2= pushArgument(mb,p2,v);
-
- q= newStmt(mb,matRef,newRef);
- setVarType(mb,getArg(q,0),getVarType(mb,v));
- getArg(p,0)= getArg(q,0);
- getArg(p2,1)= getArg(p,0);
- getArg(q,0)= v;
- q= pushArgument(mb, q, getArg(p,0));
- q= pushArgument(mb, q, getArg(p2,0));
-
- /* the oid ranges should be sequentially aligned */
- r = newStmt(mb,batRef, putName("setBase",7));
- r= pushArgument(mb, r, getArg(p,0));
- r= pushArgument(mb, r, getArg(p2,0));
-#ifdef DEBUG_OPT_MERGETABLE
- stream_printf(GDKout,"new block\n");
- printInstruction(GDKout,mb,p,0);
- printInstruction(GDKout,mb,p2,0);
- printInstruction(GDKout,mb,q,0);
- printInstruction(GDKout,mb,r,0);
-#endif
[EMAIL PROTECTED]
-The SQL optimizer has already deduced the BAT id,
-which we can reuse to access the partitions in the BPM
-catalog.
[EMAIL PROTECTED]
-#else
- int bid;
- int top;
-
- pushInstruction(mb,p);
- if( getProps(mb,getArg(p,0))==0)
- return NULL;
- bid = *(int*) getPropertyValue(getProps(mb,getArg(p,0)),"bid");
-#ifdef DEBUG_OPT_MERGETABLE
- stream_printf(GDKout,"replace bid %d\n");
-#endif
- top= mb->stop;
- BPMexpand(mb, v, bid);
- q = newInstruction(mb, ASSIGNsymbol);
- setModuleId(q,matRef);
- setFunctionId(q,newRef);
- getArg(q,0)= v;
- for(; top<mb->stop; top++){
- setVarType(mb,getArg(getInstrPtr(mb,top),0),getVarType(mb,v));
- q= pushArgument(mb, q, getArg(getInstrPtr(mb,top),0));
- }
-#endif
- return q;
-}
+typedef struct hrange {
+ oid l,h;
+ char used;
+} hrange;
static int
-OPTmergetableImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr p){
- InstrPtr *old=0, q,r, *mat;
- int oldtop,fm,i,k,m,mtop=0, *mvar,tpe;
- int size,match,actions=0;
+OPTmergetableImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr p)
+{
+ InstrPtr *old=0, q, r, *mat;
+ int oldtop, fm, i, k, m, mtop=0, *mvar, tpe;
+ int size, match, actions=0;
+ hrange *hr;
#ifdef DEBUG_OPT_MERGETABLE
- tream_printf(GDKout,"Start MAT optimizer\n");
-
+ stream_printf(GDKout,"Start MAT optimizer\n");
#endif
-
old = mb->stmt;
oldtop= mb->stop;
- size = (mb->stop *1.2 < mb->ssize)? mb->ssize: (int) (mb->stop *1.2);
- mb->stmt = (InstrPtr *) GDKzalloc(size * sizeof(InstrPtr));
- mb->ssize = size ;
+ size = (mb->stop * 1.2 < mb->ssize)? mb->ssize:(int)(mb->stop * 1.2);
+ mb->stmt = (InstrPtr *) GDKzalloc(size * sizeof(InstrPtr));
+ mb->ssize = size;
mb->stop = 0;
+ hr = (hrange *) GDKzalloc(size * sizeof(hrange));
/* the number of MATs is limited to the variable stack*/
- mat= (InstrPtr*) alloca(mb->vtop * sizeof(InstrPtr));
+ mat = (InstrPtr*) alloca(mb->vtop * sizeof(InstrPtr));
memset((char*) mat, 0, mb->vtop * sizeof(InstrPtr));
- mvar= (int*) alloca(mb->vtop * sizeof(int));
+ mvar = (int*) alloca(mb->vtop * sizeof(int));
memset((char*) mvar, 0, mb->vtop * sizeof(InstrPtr));
for( i=0; i<oldtop; i++){
- p= old[i];
- if( (getModuleId(p)== matRef && getFunctionId(p)==newRef) ||
- (getModuleId(p)== matRef && getFunctionId(p)==packRef)
){
- mat[mtop]= p;
+ p = old[i];
+ if (getModuleId(p)== matRef &&
+ (getFunctionId(p)==newRef || getFunctionId(p)==packRef)) {
+ mat[mtop] = p;
mvar[mtop] = getArg(p,0);
mtop++;
pushInstruction(mb,p);
continue;
}
- if( (getModuleId(p)== sqlRef && getFunctionId(p)==bindRef)){
- int v= getArg(p,0);
- q= MATpartitionTableDummy(mb,p);
- if( q){
- mat[mtop]= q;
- mvar[mtop] = v;
- mtop++;
- actions++;
- }
+ if (getModuleId(p)== matRef && getFunctionId(p) == hrangeRef) {
+ hrange *h = &hr[getArg(p,1)];
+ h->used = 1;
+ h->l = getConstant(mb, getArg(p,2)).val.oval;
+ h->h = getConstant(mb, getArg(p,3)).val.oval;
continue;
}
@-
@@ -331,7 +349,7 @@
Otherwise we have to decide on either packing them or replacement.
@c
match= MATcount(p,mvar,mtop);
- if( match== 0 ){
+ if (match == 0) {
pushInstruction(mb,p);
#ifdef DEBUG_OPT_MERGETABLE
stream_printf(GDKout,"simply move\n");
@@ -339,6 +357,14 @@
#endif
continue;
}
[EMAIL PROTECTED]
+First we handle horizontal aligned mats. This information is passed using
+mat.hrange(b,x,y). So if this is available, we can simplify batcalc operations
+and for fetch joins we can use this information to do per part joins only.
[EMAIL PROTECTED]
+ if (match == 2) {
+
+ }
@-
Pack MAT arguments, except one, to limit plan explosion.
The preferred partitioned one is the first argment as it
@@ -346,23 +372,22 @@
Look at the depth of the MAT definition to limit the explosion.
@c
for( fm= p->argc-1; fm>p->retc ; fm--)
- if( (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0)
- break;
+ if ((m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0)
+ break;
- for( k=fm-1; match > 1 && k >= p->retc ; k--)
- if( (m=isMATalias(getArg(p,k),mvar,mtop)) >= 0 ){
+ for( k=fm-1; match > 1 && k >= p->retc ; k--) {
+ if ((m=isMATalias(getArg(p,k),mvar,mtop)) >= 0) {
#ifdef DEBUG_OPT_MERGETABLE
- stream_printf(GDKout,"Pack k=%d\n",k);
- printInstruction(GDKout,mb,p,0);
+ stream_printf(GDKout,"Pack k=%d\n",k);
+ printInstruction(GDKout,mb,p,0);
#endif
- r= MATpackAll(mb, NULL, m, mat, mvar, &mtop);
- getArg(p,k)= getArg(r,0);
- match--;
+ r= MATpackAll(mb, NULL, m, mat, mvar, &mtop);
+ getArg(p,k) = getArg(r,0);
+ match--;
+ }
}
@-
From here we have to differentiate between the operations.
-This simply means that any SQL bat is extended with an always
-empty second component. If all work, we switch to the last.
Their semantics should be respected while producing code.
@-
The insertions are sent to the first component of the MAT.
@@ -373,40 +398,54 @@
be packed to facilitate updates. We assume the first
partition is dominant.
@c
- if( getModuleId(p)== batRef &&
- ( getFunctionId(p)== insertRef ||
- getFunctionId(p)== appendRef
- ) &&
- (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0){
- getArg(p,1) = getArg(mat[m],mat[m]->retc);
- pushInstruction(mb,p);
+ if (((getModuleId(p)== batRef &&
+ (getFunctionId(p)== insertRef ||
+ getFunctionId(p)== appendRef)) ||
+ (getModuleId(p) == algebraRef &&
+ getFunctionId(p)== kunionRef)) &&
+ (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0) {
+ /* bat.insert(mat,b or val)
+ bat.append()
+ bat.kunion()
+ -> mat.union(mat, b or val) */
+ // pushInstruction(mb,p);
+ mtop= mat_add(mat, mvar, mtop, mat_union(mb, p, mat,
m));
+ actions++;
continue;
}
@-
-The kdifference operator is a good example where the
+The mark operators are a special case of apply on parts as we need too
+correct the mark base oid's
[EMAIL PROTECTED]
+ if (getModuleId(p)== algebraRef &&
+ (getFunctionId(p) == markTRef ||
+ getFunctionId(p) == markHRef) &&
+ (m=isMATalias(getArg(p,1),mvar,mtop)) >= 0) {
+ mtop= mat_add(mat, mvar, mtop, mat_mark(mb, p, mat, m));
+ actions++;
+ continue;
+ }
[EMAIL PROTECTED]
+The delete operator is a good example where the
action does not produce multiple results, but requires
an accumulation implementation.
@-
A similar situation occurs in delete operations
using partitioned input.
@c
- if(((getModuleId(p)== algebraRef &&
- getFunctionId(p) == kdifferenceRef) ||
- (getModuleId(p)== batRef &&
- getFunctionId(p) == deleteRef) ) &&
- fm == 2 &&
- (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0){
+
+ if((getModuleId(p) == batRef && getFunctionId(p) == deleteRef)
+ && fm == 2 && (m=isMATalias(getArg(p,fm),mvar,mtop)) >= 0) {
#ifdef DEBUG_OPT_MERGETABLE
- stream_printf(GDKout,"kdifference resolution
fm=%d\n",fm);
+ stream_printf(GDKout,"delete resolution fm=%d\n",fm);
printInstruction(GDKout,mb,p,0);
#endif
-
tpe= getVarType(mb,getArg(p,0));
for(k=1; k< mat[m]->argc; k++){
q= copyInstruction(p);
- if( k == mat[m]->argc-1)
+ if( k == mat[m]->argc-1) {
getArg(q,0)= getArg(p,0);
- else{
+ } else {
getArg(q,0) = newTmpVariable(mb, tpe);
getArg(p,1)= getArg(q,0);
}
@@ -429,32 +468,32 @@
stream_printf(GDKout,"isFragment resolution
fm=%d\n",fm);
printInstruction(GDKout,mb,p,0);
#endif
- if( isaBatType(getVarType(mb,getArg(p,0))) ){
+ if (isaBatType(getVarType(mb,getArg(p,0)))){
r = newInstruction(mb, ASSIGNsymbol);
setModuleId(r,matRef);
setFunctionId(r,newRef);
- getArg(r,0)= getArg(p,0);
- tpe= getArgType(mb,p,0);
+ getArg(r,0) = getArg(p,0);
+ tpe = getArgType(mb,p,0);
} else {
- tpe= TYPE_any;
- r=0;
+ tpe = TYPE_any;
+ r = 0;
}
- for(k=1; k< mat[m]->argc; k++){
- q= copyInstruction(p);
- getArg(q,fm) = getArg(mat[m],k);
+ for(k=1; k<mat[m]->argc; k++) {
+ q = copyInstruction(p);
getArg(q,0) = newTmpVariable(mb, tpe);
+ getArg(q,fm) = getArg(mat[m],k);
pushInstruction(mb,q);
- if( r)
- r= pushArgument(mb,r,getArg(q,0));
+ if (r)
+ r = pushArgument(mb,r,getArg(q,0));
}
freeInstruction(p);
actions++;
if(r){
- mat[mtop]= r;
+ mat[mtop] = r;
mvar[mtop] = getArg(r,0);
mtop++;
- /* pushInstruction(mb,r);*/
+ /* pushInstruction(mb,r); */
}
#ifdef DEBUG_OPT_MERGETABLE
stream_printf(GDKout,"Fragment result \n");
@@ -530,6 +569,7 @@
#endif
pushInstruction(mb,p);
}
+ GDKfree(hr);
GDKfree(old);
@-
As a final optimization, we could remove the mal.new definitions,
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins