Changeset: e373d30d7ece for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e373d30d7ece
Modified Files:
monetdb5/optimizer/opt_datacell.mx
monetdb5/optimizer/opt_pipes.mx
Branch: default
Log Message:
Introdyce datacell pipeline
Should be done differently.
diffs (truncated from 430 to 300 lines):
diff --git a/monetdb5/optimizer/opt_datacell.mx
b/monetdb5/optimizer/opt_datacell.mx
--- a/monetdb5/optimizer/opt_datacell.mx
+++ b/monetdb5/optimizer/opt_datacell.mx
@@ -18,111 +18,29 @@
@
@f opt_datacell
-@a M. Kersten, E. Liarou
-@- Datacell factory constructions
-For the DataCell project we need a scheme to transform a SQL basket expression
-into a factory. The adjustment made to the SQL compiler are minimal.
-We assume it places a barrier marker block around the basket expressions and
-within the SQL optimizer keeps the all decisions local.
-Furthermore, the bind operations refer to baskets known in the catalog.
-They are qualified as input or output baskets. [we have
-to differentiate between persistent/temporary as well]
-
-Consider the basket expression plan
-"[select sum(i) from r where r.i>0]" which is expected to be
-translated by the SQL front-end in the following plan:
-[derived from "insert into s select sum(i) from r where r.i>0]
-@verbatim
-function user.s0_0(A0:int):void;
- X7 := A0;
-barrier go:=datacell.basket(); # the marker of the [
- #generate binds for all columns
- X2:bat[:oid,:int] := datacell.bind("sys","r","i",0);
- X8 := algebra.thetauselect(X2,X7,">");
- X10 := calc.oid(0@0);
- X12 := algebra.markT(X8,X10);
- X13 := bat.reverse(X12);
- X14 := algebra.join(X13,X2);
- X17 := algebra.selectNotNil(X14);
- X18:lng := aggr.sum(X17);
- X19 := calc.int(X18);
- #generate binds for all columns
- X3:bat[:oid,:int] := datacell.bind("sys","s","j",1);
- #generate updates for all columns
- X20:= bat.append(X3,X19);
- #generate deletion based on pivot
- X21:= algebra.semijoin(X2,X17modified);
- bat.delete(X2,X21);
-exit go; # the marker of the ]
-end s0_0;
-@end verbatim
-The plan is turned into an basket factory as follows.
-It forms a reference basis for incremental processing
-where the first block initializes the state and
-the factory loop performs incrementals.
-@verbatim
-factory user.bqs0_0(A0:int);
- X7 := A0;
- # lock all baskets
- X2:bat[:oid,:int] := basket.lock("sys","r","i");
- X8 := algebra.thetauselect(X2,X7,">");
- X12 := algebra.markT(X8,0@0);
- X13 := bat.reverse(X12);
- X14 := algebra.join(X13,X2);
- X17 := algebra.selectNotNil(X14);
- X18:lng := aggr.sum(X17);
- Y19 := calc.int(Y18);
- # lock all baskets
- X3:bat[:oid,:int] := datacell.bind("sys","s","j",1);
- #generate updates for all columns
- X20:= bat.append(X3,X19);
- #generate deletion based on pivot
- X21:= algebra.semijoin(X2,X17);
- bat.delete(X2,X21);
- basket.unlock("sys","s","j");
- basket.unlock("sys","r","i");
- yield bqs0_0;
-barrier go:=true;
- X2:bat[:oid,:int] := basket.lock("sys","r","i");
- X8 := algebra.thetauselect(X2,X7,">");
- X12 := algebra.markT(X8,X10);
- X13 := bat.reverse(X12);
- X14 := algebra.join(X13,X2);
- X17 := algebra.selectNotNil(X14);
- # do the differential processing
- X18a := aggr.sum(X17);
- X18 := X18 + X18a;
- X19 := calc.int(X18);
- # lock all baskets
- X3:bat[:oid,:int] := datacell.lock("sys","s","j",1);
- #generate updates for all columns
- X20:= bat.append(X3,X19);
- #generate deletion based on pivot
- X21:= algebra.semijoin(X2,X17);
- bat.delete(X2,X21);
- basket.unlock("sys","s","j");
- basket.unlock("sys","r","i");
- yield bqs0_0;
- redo go:=true;
-exit go;
-end bqs0_0;
-@end verbatim
+@a M. Kersten
+@- Datacell optimizer
+Assume simple queries . Clear out all non-datacell schema related sql
statements, except
+for the bare minimum.
@{
@mal
pattern optimizer.datacell():str
address OPTdatacell;
pattern optimizer.datacell(mod:str, fcn:str):str
address OPTdatacell
-comment "Basket expression optimizer";
+comment "Datacell expression optimizer";
@h
#ifndef _OPT_DATACELL_
#define _OPT_DATACELL_
#include "opt_prelude.h"
#include "opt_support.h"
+#include "opt_pipes.h"
#define OPTDEBUGdatacell if ( optDebug & (1 <<DEBUG_OPT_DATACELL) )
+opt_export str OPTdatacell(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
p);
+#endif
@-
We keep a flow dependency table to detect.
@c
@@ -131,195 +49,112 @@
#include "opt_deadcode.h"
#include "mal_interpreter.h" /* for showErrors() */
#include "mal_builder.h"
-
-@-
-Basket expressions can be nested in a SQL query.
-This means we have to take them out one-by-one.
-@c
-static int
-BASKETblock(Client cntxt, MalBlkPtr mb, int *start, int *last)
-{
- int i, var= 0, baskets=0;
- InstrPtr p;
-
- (void) cntxt;
-
- for( i=1; i< mb->stop; i++){
- p= getInstrPtr(mb,i);
- if ( p->barrier== BARRIERsymbol &&
- getModuleId(p) == datacellRef &&
- getFunctionId(p) == basketRef){
- *start = i+1;
- var= getArg(p,0);
- baskets++;
- }
- /* check for closing stuff */
- if ( p->barrier == EXITsymbol &&
- getArg(p,0) == var){
- *last = i;
- return 1;
- }
- }
- return baskets;
-}
+#include "opt_statistics.h"
static int
OPTdatacellImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci)
{
- InstrPtr p, q, sig, *old, *locks;
- int i, j, k, first= 0, last= 0, limit, actions=0, ltop;
- char buf[256];
- int *declared;
- Symbol factory;
- MalBlkPtr mf;
+ int actions = 0;
+ int i, j,limit, vlimit, slimit;
+ InstrPtr p, *old;
+
+ (void) pci;
OPTDEBUGdatacell {
mnstr_printf(cntxt->fdout, "#Datacell optimizer started\n");
- printFunction(cntxt->fdout, mb, 0, LIST_MAL_STMT);
+ printFunction(cntxt->fdout, mb, stk, LIST_MAL_STMT);
+ } else (void) stk;
+
+ old= mb->stmt;
+ limit = mb->stop;
+ slimit = mb->ssize;
+ vlimit = mb->vtop;
+ if ( newMalBlkStmt(mb, slimit) < 0)
+ return 0;
+
+ for (i = 0; i < limit; i++) {
+ p = old[i];
+ if ( getModuleId(p) == sqlRef ){
+ if( getFunctionId(p ) == bindRef && getVarConstant(mb,
getArg(p, p->argc-1)).val.ival == 0) {
+ /* only the primary BAT is used */
+ pushInstruction(mb, p);
+ continue;
+ }
+ /* zap all expression arguments */
+ clrFunction(p);
+ p->argc= p->retc;
+ for ( j=0; j< p->retc; j++ )
+ if ( isaBatType(getArgType(mb,p,j) ) )
+ p = pushEmptyBAT(mb, p, getArgType(mb,p,j));
+ else
+ p = pushNil(mb, p, getArgType(mb,p,j));
+ }
+ pushInstruction(mb, p);
}
- (void) stk;
- (void) pci;
+
+ for (; i < slimit; i++)
+ if (old[i])
+ freeInstruction(old[i]);
- while ( BASKETblock(cntxt, mb, &first, &last) ) {
- actions++;
- OPTDEBUGdatacell
- mnstr_printf(cntxt->fdout, "#Basket expression %d -
%d\n", first,last);
-
- /* bake a factory */
- snprintf(buf,256,"bq%s", getArgName(mb,getInstrPtr(mb,0),0));
- factory= newFunction(putName("datacell",8),
putName(buf,strlen(buf)), FACTORYsymbol);
- insertSymbol(cntxt->nspace,factory);
- /* copy symbol table from source */
- mf= factory->def;
- mf->var = (VarPtr *) GDKrealloc(mf->var, sizeof(VarPtr) *
mb->vsize);
- mf->vsize = mb->vsize;
- mf->vtop = 0;
- for (i = 0; i < mb->vtop; i++) {
- copyVariable(mf, mb, getVar(mb, i));
- mf->vtop++;
- }
- sig= getInstrPtr(mf,0);
-/*
- setFunctionId(sig,putName(buf,strlen(buf)));
- sig->token = FACTORYsymbol;
-*/
-
- limit = mb->stop;
- old = mb->stmt;
- if ( newMalBlkStmt(mb,mb->ssize) < 0)
- return 0;
- /* copy the first part without modification */
- for (i = 0; i < first-1; i++)
- pushInstruction(mb,old[i]);
- q= newStmt(mb,putName("datacell",8),sig->fcnname);
-@-
-Copying the statements should be done with care. In particular,
-all variables not declared within the basket expression should
-be passed as arguments to the factory.
-Furthermore, we need to keep track of the aliases as they
-denote variables in the factory.
-[NEED more defensive work]
-@c
- declared = (int*) GDKzalloc(sizeof(int) * mb->vtop);
- if ( declared == NULL)
- return 0;
- locks = (InstrPtr*) GDKzalloc(sizeof(InstrPtr) * mb->stop);
- if ( locks == NULL){
- GDKfree(declared);
- return 0;
- }
- ltop = 0;
-@-
-Keep a list of bind operations, because they trigger unlocks.
-@c
- for (i = first; i < last; i++) {
- p = old[i];
- if( getModuleId(p) == basketRef &&
- getFunctionId(p) == bindRef){
- locks[ltop++] = p;
- p= newStmt(mf,basketRef,putName("lock",4));
- p= pushArgument(mf,p, getArg(old[i],0));
- }
- }
-
- for (i = first; i < last; i++) {
- p = old[i];
- for (k=p->retc; k<p->argc; k++)
- if( declared[getArg(p,k)]== 0){
- if ( !isVarConstant(mb,getArg(p,k))){
- q= pushArgument(mb,q,getArg(p,k));
- sig= pushArgument(mf,sig,getArg(p,k));
- }
- declared[getArg(p,k)]= TRUE;
- }
-
- for (k=0; k<p->retc; k++)
- declared[getArg(p,k)]= TRUE;
- pushInstruction(mf,p);
- }
-@-
-Time to unlock all baskets in reverse order
-@c
- for ( ltop--; ltop>=0; ltop--){
- p= newStmt(mf,basketRef,putName("unlock",6));
- p= pushArgument(mf,p, getArg(locks[ltop],0));
- }
-@-
-Once we have copied the first block to the factory
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list