Changeset: 059b0977071a for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=059b0977071a
Modified Files:
MonetDB5/src/optimizer/opt_tarantula.mx
Branch: default
Log Message:
Code reshuffling for variable propagation
The source code calls for a more global perspective on the re-use
of variables in the remainder of a plan. Queries compile, and seem
to run, but still incomplete.
diffs (truncated from 385 to 300 lines):
diff -r 134cdc4cc0c4 -r 059b0977071a MonetDB5/src/optimizer/opt_tarantula.mx
--- a/MonetDB5/src/optimizer/opt_tarantula.mx Fri Aug 20 17:04:56 2010 +0200
+++ b/MonetDB5/src/optimizer/opt_tarantula.mx Mon Aug 23 08:20:08 2010 +0200
@@ -391,84 +391,45 @@
Likewise, look for variables that are produced by other legs and will become
input parameters.
@c
static MalBlkPtr
-TARmakeleg(Client cntxt, MalBlkPtr mb, InstrPtr *old, int pc, int idx,
Lifespan span, int *map)
+TARmakeleg(Client cntxt, MalBlkPtr mb, InstrPtr *old, int pc, int idx, int
*input, int *output, InstrPtr *list)
{
MalBlkPtr tm = NULL;
InstrPtr p = NULL, sig;
- int i, j, top=0, fnd, block = 0;
+ int i, top=0, fnd, *alias;
char buf[BUFSIZ];
- InstrPtr *list = (InstrPtr*) GDKzalloc(sizeof(InstrPtr) * mb->ssize);
- char *needed= (char*) GDKzalloc(mb->vtop);
- int *alias= (int*) GDKzalloc(mb->vtop * sizeof(int));
- int itop = 0, *input = ( int*) GDKzalloc(mb->vtop * sizeof(int));
- int otop = 0, *output = ( int*) GDKzalloc(mb->vtop * sizeof(int));
Symbol s;
assert(old[pc]->fcnname == packRef);
- OPTDEBUGtarantula {
+ OPTDEBUGtarantula
mnstr_printf(cntxt->fdout,"#create leg for %d %d %d\n",pc,idx
-old[pc]->retc, getArg(old[pc],idx));
- }
-...@-
-Find the instructions needed. Beware pack operations
-already have been handled and produce their target.
-Build the variable admin table.
-...@c
- needed[getArg(old[pc],idx)]=1;
- output[otop++]= getArg(old[pc],idx);
- for ( j = old[0]->retc; j< old[0]->argc; j++)
- input[itop++]= getArg(old[0],j);
- for (i = pc-1; i > 0; i--){
- p = old[i];
-
- fnd = 0;
- switch( p->barrier ){
- case EXITsymbol: block++; break;
- case CATCHsymbol: block--; break;
- case BARRIERsymbol: block--; break;
- }
- if ( block ){
- for (j = 0; j < p->argc; j++)
- needed[getArg(p,j)]= 1;
- fnd = 1;
- }
- for (j = 0; j < p->retc; j++)
- fnd += needed[getArg(p,j)];
- if ( fnd) { /* instruction is needed */
- for (j = 0; j < p->argc; j++){
- assert(getArg(p,j) >=0);
- needed[getArg(p,j)]= 1;
- if ( getEndLifespan(span, getArg(p,j)) > pc)
- output[otop++]= getArg(p,j);
- if ( map[getArg(p,j)] != getArg(p,j) )
- input[itop++] = getArg(p,j);
- }
- list[top++] = p;
- }
- }
@-
The leg should have enough instructions to warrant a distributed execution.
This should involve
a careful analysis of the instructions assembled. For the time being, we only
allow for a leg
if at least a sql.bind operation belongs to the list and the leg has a
minimimal number of statements.
@c
fnd = 0;
- for( i=0; i<top; i++) {
+ for( i=0; list[i]; i++) {
fnd += getModuleId(list[i]) == sqlRef && getFunctionId(list[i]) ==
bindRef;
fnd += getModuleId(list[i]) == sqlRef && getFunctionId(list[i]) ==
bindidxRef;
fnd += getModuleId(list[i]) == sqlRef && getFunctionId(list[i]) ==
binddbatRef;
}
+ top = i;
- if ( top - fnd <= MINLEGSIZE) {
- GDKfree(list);
- GDKfree(alias);
- GDKfree(needed);
- GDKfree(input);
- GDKfree(output);
+ if ( top - fnd <= MINLEGSIZE)
return 0;
+
+ OPTDEBUGtarantula{
+ mnstr_printf(cntxt->fdout,"#input ");
+ for(i=0; input[i]; i++)
+ mnstr_printf(cntxt->fdout,"%d, ", input[i]);
+ mnstr_printf(cntxt->fdout,"\n#output ");
+ for(i=0; output[i]; i++)
+ mnstr_printf(cntxt->fdout,"%d, ", output[i]);
+ mnstr_printf(cntxt->fdout,"\n");
}
-...@-
-Create the leg function.
-...@c
+ alias= (int*) GDKzalloc(mb->vtop * sizeof(int));
+
snprintf(buf,BUFSIZ,"%s_%d_%d", getFunctionId(getInstrPtr(mb,0)),
getArg(old[pc],0), idx-old[pc]->retc);
putName(buf,strlen(buf));
@@ -478,15 +439,16 @@
sig= getInstrPtr(tm,0);
setVarType(tm,getArg(sig,0), getArgType(mb,old[pc],idx));
setVarUDFtype(tm,getArg(sig,0));
+ alias[output[0]] = getArg(sig,0);
/* add the return variables */
- for ( i = 1; i< otop; i++){
+ for ( i = 1; output[i]>0; i++){
alias[output[i]] = cloneVariable(tm,mb,output[i]);
sig = pushReturn(tm, sig, alias[output[i]]);
setVarUDFtype(tm,getArg(sig,i));
}
/* add the arguments from the query template */
- for ( i = 0; i< itop; i++){
+ for ( i = 0; input[i]> 0; i++){
alias[input[i]] = cloneVariable(tm,mb,input[i]);
sig = pushArgument(tm, sig, alias[input[i]]);
}
@@ -510,7 +472,7 @@
p->argc= 0;
for ( i = 0; i < sig->retc; i++)
p = pushReturn(tm,p, getArg(sig,i));
- for ( i = 0; i < otop; i++)
+ for ( i = 0; output[i]>0; i++)
p = pushArgument(tm,p, alias[output[i]]);
pushEndInstruction(tm);
clrDeclarations(tm);
@@ -518,11 +480,9 @@
if ( tm->errors )
mb->errors++;
- GDKfree(list);
+ OPTDEBUGtarantula
+ printFunction(cntxt->fdout, tm, 0, LIST_MAL_STMT | LIST_MAPI);
GDKfree(alias);
- GDKfree(needed);
- GDKfree(input);
- GDKfree(output);
return tm;
}
@@ -724,12 +684,12 @@
Watch out, the arguments should occupy the head of the stack.
@c
static void
-TARmakeRun(Client cntxt, MalBlkPtr mb, InstrPtr *old, int pc, int nodes, int
legs, MalBlkPtr leg)
+TARmakeRun(Client cntxt, MalBlkPtr mb, InstrPtr *old, int pc, int nodes, int
legs, int *input, int *output)
{
Symbol s;
MalBlkPtr tm;
char buf[BUFSIZ], fcn[BUFSIZ];
- InstrPtr lsig, sig, r, p,q;
+ InstrPtr sig, r, p,q;
int j,k,l,x=0;
snprintf(fcn,BUFSIZ,"exe_%s_%d", getFunctionId(getInstrPtr(mb,0)),
getArg(old[pc],0));
@@ -738,13 +698,11 @@
tm= s->def;
sig = getInstrPtr(tm,0);
- lsig= getInstrPtr(leg,0);
- /* the result is a mat.pack whose type comes from mb */
- setVarType(tm,getArg(sig,0), getArgType(mb,old[pc],0));
+ setVarType(tm,getArg(sig,0), getVarType(mb,getArg(old[pc],0)));
/* include the remaining return variables */
- for ( j=1;j < lsig->retc; j++)
- sig= pushReturn(tm, sig, cloneVariable(tm, leg,
getArg(lsig,j)));
+ for ( j=1;output[j]; j++)
+ sig= pushReturn(tm, sig, cloneVariable(tm, mb, output[j]));
/* add the execution nodes */
for( k=0 ; k<nodes; k++){
@@ -752,19 +710,17 @@
sig= pushArgument(tm, sig, newVariable(tm, GDKstrdup(buf),
TYPE_int));
}
/* input arguments */
- for( j =lsig->retc; j<lsig->argc; j++){
- int a= getArg(lsig,j);
- sig = pushArgument(tm, sig, cloneVariable(tm,leg,a));
- }
+ for ( j=0; input[j]; j++)
+ sig = pushArgument(tm, sig, cloneVariable(tm,mb,input[j]));
/* initialize the return variables */
r= newAssignment(tm);
getArg(r,0)= getArg(sig,0);
pushNil(tm,r,getArgType(tm,sig,0));
- for ( j=1;j < lsig->retc; j++){
+ for ( j=1;j < sig->retc; j++){
r= newAssignment(tm);
- getArg(r,0)= getArg(lsig,j);
- pushNil(tm,r,getArgType(tm,lsig,j));
+ getArg(r,0)= getArg(sig,j);
+ pushNil(tm,r,getArgType(tm,sig,j));
}
if ( tarantulaLocal == 0){
@@ -847,13 +803,17 @@
static int
OPTtarantulaImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci)
{
- InstrPtr q, p, sig, *old;
- int last=0, i, j, k, limit, actions=0;
- int tn=0;
+ InstrPtr q, p, pp, sig, *old;
+ int last=0, i, j, k, l, limit, actions=0, block = 0, fnd;
+ int tn=0, ta =0, vtop=0;
MalBlkPtr *leg;
char fcn[BUFSIZ];
Lifespan span;
- int *map;
+ int *map, top;
+ int itop = 0, *input;
+ int otop = 0, *output;
+ InstrPtr *list;
+ char *needed;
if( cntxt == 0){
/* confuscate, delay for later activation */
@@ -867,27 +827,11 @@
TARdiscover(cntxt);
mal_unset_lock(mal_contextLock,"tarantula.register");
@-
-Test the use of the variables and show them.
-Debugging code
-{
- int *alias= (int*) GDKzalloc(mb->vtop * sizeof(int));
- old = mb->stmt;
- for (i = 1; i < mb->stop; i++){
- p = old[i];
- for( j = p->retc; j< p->argc; j++)
- alias[getArg(p,j)]++;
- }
- for(i=0;i<mb->vtop;i++)
- if ( alias[i]>1)
- mnstr_printf(cntxt->fdout,"%s %d\n",getVarName(mb,i),alias[i]);
- GDKfree(alias);
-}
-...@-
All tarantula leg code is collected in a separate module
to ease future distribution and scheduling.
The optimizer works by looking only to the mat.pack statement.
-It recursively calls the optimizer used to create multiple versions, which,
+It repeatedly calls the optimizer used to create multiple versions, which,
when scheduled correctly, would not lead to duplicate work.
@c
(void) fixModule(cntxt->nspace,tarantulaRef);
@@ -906,7 +850,8 @@
pushInstruction(mb, old[0]);
span = newLifespan(mb);
- map= (int*) GDKzalloc(mb->vtop * sizeof(int));
+ map= (int*) GDKzalloc(2 * mb->vtop * sizeof(int));
+ vtop= mb->vtop;
for ( i = 0; i < mb->vtop; i++)
map[i] = i;
@@ -919,16 +864,72 @@
continue;
}
if( getModuleId(p)== matRef && getFunctionId(p)== packRef) {
- for (tn =0, j = p->retc; j < p->argc; j++) {
- leg[tn] = TARmakeleg(cntxt, mb, old, i,
j,span,map);
- if ( leg[tn] == 0)
+...@-
+The critical part is to determine the input/output variable set
+for this pack function. Some variables may have to be re-used
+in subsequent calls.
+...@c
+ for (tn =0, ta = p->retc; ta < p->argc; ta++) {
+ input = ( int*) GDKzalloc(mb->vtop *
sizeof(int));
+ output = ( int*) GDKzalloc(mb->vtop *
sizeof(int));
+ list = (InstrPtr*) GDKzalloc(sizeof(InstrPtr) *
mb->ssize);
+ needed= (char*) GDKzalloc(mb->vtop*2);
+
+ otop= itop= top = 0;
+
+ needed[getArg(p,ta)] = 1;
+ output[otop++]= getArg(p,ta);
+
+ /* get query arguments */
+ for ( j = old[0]->retc; j< old[0]->argc; j++)
+ input[itop++]= getArg(old[0],j);
+
+ /* find variables used outside leg scope */
+ /* find variables defined before by legs */
+ for (l = i-1; l > 0; l--){
+ pp = old[l];
+ /* find variables needed later and not
mapped already */
+ fnd = 0;
+ for (j = 0; j < pp->retc; j++)
+ fnd += needed[getArg(pp,j)] &&
map[getArg(pp,j)] == getArg(pp,j);
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list