Changeset: 6e2060655674 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6e2060655674
Modified Files:
        gdk/gdk_aggr.c
        monetdb5/modules/mal/pcre.c
        monetdb5/modules/mal/pcre.mal
        monetdb5/optimizer/opt_mergetable.c
        monetdb5/optimizer/opt_prelude.c
        monetdb5/optimizer/opt_prelude.h
        sql/backends/monet5/sql_gencode.c
        sql/server/rel_optimizer.c
        sql/test/BugTracker-2009/Tests/AVG_ReturnsNoLines.SF-2596084.stable.out
        sql/test/BugTracker-2011/Tests/aggregate-in-subquery.Bug-2739.stable.out
        
sql/test/BugTracker-2011/Tests/view_avg_incorrect_result.Bug-2790.stable.out
Branch: default
Log Message:

use safe avg implemenation


diffs (truncated from 742 to 300 lines):

diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -1316,6 +1316,13 @@ BATgroupavg(BAT **bnp, BAT **cntsp, BAT 
        GDKfree(rems);
        if (cntsp == NULL)
                GDKfree(cnts);
+       else {
+               BATsetcount(*cntsp, ngrp);
+               BATseqbase(*cntsp, min);
+               (*cntsp)->tkey = BATcount(*cntsp) <= 1;
+               (*cntsp)->tsorted = BATcount(*cntsp) <= 1;
+               (*cntsp)->trevsorted = BATcount(*cntsp) <= 1;
+       }
        BATsetcount(bn, ngrp);
        BATseqbase(bn, min);
        bn->tkey = BATcount(bn) <= 1;
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -99,6 +99,7 @@ pcre_export str PCREilike_select_pcre(in
 pcre_export str pcre_init(void);
 pcre_export str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit 
*caseignore, bit *anti);
 pcre_export str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str 
*esc, bit *caseignore, bit *anti);
+pcre_export str PCRElikesubselect3(bat *ret, bat *bid, str *pat, str *esc, bit 
*anti);
 
 /* current implementation assumes simple %keyword% [keyw%]* */
 typedef struct RE {
@@ -1709,6 +1710,13 @@ PCRElikesubselect1(bat *ret, bat *bid, s
        return PCRElikesubselect2(ret, bid, NULL, pat, esc, caseignore, anti);
 }
 
+str
+PCRElikesubselect3(bat *ret, bat *bid, str *pat, str *esc, bit *anti)
+{
+       bit f = FALSE;
+       return PCRElikesubselect2(ret, bid, NULL, pat, esc, &f, anti);
+}
+
 static str
 PCRElike_pcre(int *ret, int *b, str *pat, str *esc, bit us, bit ignore)
 {
diff --git a/monetdb5/modules/mal/pcre.mal b/monetdb5/modules/mal/pcre.mal
--- a/monetdb5/modules/mal/pcre.mal
+++ b/monetdb5/modules/mal/pcre.mal
@@ -185,9 +185,19 @@ comment "Select all head values of the f
        the tail the head value of the input BAT for which the
        relationship holds.  The output BAT is sorted on the tail value.";
 
-function likesubselect(b:bat[:oid,:str], pat:str, esc:str, anti:bit) 
:bat[:oid,:oid];
-       return likesubselect(b, pat, esc, false, anti);
-end likesubselect;
+command algebra.likesubselect(b:bat[:oid,:str], pat:str, esc:str, anti:bit) 
:bat[:oid,:oid]
+address PCRElikesubselect3
+comment "Select all head values of the first input BAT for which the
+       tail value is \"like\" the given (SQL-style) pattern and for
+       which the head value occurs in the tail of the second input
+       BAT.
+       Input is a dense-headed BAT, output is a dense-headed BAT with in
+       the tail the head value of the input BAT for which the
+       relationship holds.  The output BAT is sorted on the tail value.";
+
+#function likesubselect(b:bat[:oid,:str], pat:str, esc:str, anti:bit) 
:bat[:oid,:oid];
+       #return likesubselect(b, pat, esc, false, anti);
+#end likesubselect;
 
 function ilikesubselect(b:bat[:oid,:str], pat:str, esc:str, anti:bit) 
:bat[:oid,:oid];
        return likesubselect(b, pat, esc, true, anti);
diff --git a/monetdb5/optimizer/opt_mergetable.c 
b/monetdb5/optimizer/opt_mergetable.c
--- a/monetdb5/optimizer/opt_mergetable.c
+++ b/monetdb5/optimizer/opt_mergetable.c
@@ -583,9 +583,9 @@ mat_join3(MalBlkPtr mb, InstrPtr p, mat_
 static char *
 aggr_phase2(char *aggr)
 {
-       if (aggr == countRef || aggr == count_no_nilRef)
+       if (aggr == countRef || aggr == count_no_nilRef || aggr == avgRef)
                return sumRef;
-       if (aggr == subcountRef)
+       if (aggr == subcountRef || aggr == subavgRef)
                return subsumRef;
        /* min/max/sum/prod and unique are fine */
        return aggr;
@@ -594,36 +594,122 @@ aggr_phase2(char *aggr)
 static void
 mat_aggr(MalBlkPtr mb, InstrPtr p, mat_t *mat, int m)
 {
-       int tp = getArgType(mb,p,0), k;
-       int battp = (getModuleId(p)==aggrRef)?newBatType(TYPE_oid,tp):tp;
-       int v = newTmpVariable(mb, battp);
-       InstrPtr r = NULL, s = NULL, q = NULL;
+       int tp = getArgType(mb,p,0), k, tp2 = TYPE_lng;
+       int battp = (getModuleId(p)==aggrRef)?newBatType(TYPE_oid,tp):tp, 
battp2 = 0;
+       int isAvg = (getFunctionId(p) == avgRef);
+       InstrPtr r = NULL, s = NULL, q = NULL, u = NULL;
 
        /* we pack the partitial result */
        r = newInstruction(mb,ASSIGNsymbol);
        setModuleId(r, matRef);
        setFunctionId(r, packRef);
-       getArg(r,0) = v;
+       getArg(r,0) = newTmpVariable(mb, battp);
+
+       if (isAvg) { /* counts */
+               battp2 = newBatType(TYPE_oid, tp2);
+               u = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(u,matRef);
+               setFunctionId(u,packRef);
+               getArg(u,0) = newTmpVariable(mb, battp2);
+       }
        for(k=1; k< mat[m].mi->argc; k++) {
                q = newInstruction(mb,ASSIGNsymbol);
                setModuleId(q,getModuleId(p));
+               if (isAvg)
+                       setModuleId(q,batcalcRef);
                setFunctionId(q,getFunctionId(p));
                getArg(q,0) = newTmpVariable(mb, tp);
+               if (isAvg) 
+                       q = pushReturn(mb, q, newTmpVariable(mb, tp2));
                q = pushArgument(mb,q,getArg(mat[m].mi,k));
                pushInstruction(mb,q);
                
                r = pushArgument(mb,r,getArg(q,0));
+               if (isAvg) 
+                       u = pushArgument(mb,u,getArg(q,1));
        }
        pushInstruction(mb,r);
+       if (isAvg)
+               pushInstruction(mb, u);
 
+       /* Filter empty partitions */
        if (getModuleId(p) == aggrRef) {
                s = newInstruction(mb,ASSIGNsymbol);
                setModuleId(s, algebraRef);
                setFunctionId(s, selectNotNilRef);
-               getArg(s,0) = newTmpVariable(mb, newBatType(TYPE_oid,tp));
+               getArg(s,0) = newTmpVariable(mb, battp);
                s = pushArgument(mb, s, getArg(r,0));
                pushInstruction(mb, s);
                r = s;
+
+               if (isAvg) {
+                       s = newInstruction(mb,ASSIGNsymbol);
+                       setModuleId(s, algebraRef);
+                       setFunctionId(s, selectNotNilRef);
+                       getArg(s,0) = newTmpVariable(mb, battp2);
+                       s = pushArgument(mb, s, getArg(u,0));
+                       pushInstruction(mb, s);
+                       u = s;
+               }
+       }
+
+       /* for avg we do sum (avg*(count/sumcount) ) */
+       if (isAvg) {
+               InstrPtr v,w,x,y,cond;
+
+               /* lng w = sum counts */
+               w = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(w, aggrRef);
+               setFunctionId(w, sumRef);
+               getArg(w,0) = newTmpVariable(mb, tp2);
+               w = pushArgument(mb, w, getArg(u, 0));
+               pushInstruction(mb, w);
+
+               /*  y=count = ifthenelse(w=count==0,NULL,w=count)  */
+               cond = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(cond, calcRef);
+               setFunctionId(cond, eqRef); 
+               getArg(cond,0) = newTmpVariable(mb, TYPE_bit);
+               cond = pushArgument(mb, cond, getArg(w, 0));
+               cond = pushWrd(mb, cond, 0);
+               pushInstruction(mb,cond);
+
+               y = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(y, calcRef);
+               setFunctionId(y, ifthenelseRef); 
+               getArg(y,0) = newTmpVariable(mb, tp2);
+               y = pushArgument(mb, y, getArg(cond, 0));
+               y = pushNil(mb, y, tp2);
+               y = pushArgument(mb, y, getArg(w, 0));
+               pushInstruction(mb,y);
+
+               /* dbl v = double(count) */
+               v = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(v, batcalcRef);
+               setFunctionId(v, dblRef); 
+               getArg(v,0) = newTmpVariable(mb, newBatType(TYPE_oid, 
TYPE_dbl));
+               v = pushArgument(mb, v, getArg(u, 0));
+               pushInstruction(mb, v);
+
+               /* dbl x = v / y */
+               x = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(x, batcalcRef);
+               setFunctionId(x, divRef); 
+               getArg(x,0) = newTmpVariable(mb, newBatType(TYPE_oid, 
TYPE_dbl));
+               x = pushArgument(mb, x, getArg(v, 0));
+               x = pushArgument(mb, x, getArg(y, 0));
+               pushInstruction(mb, x);
+
+               /* dbl w = avg * x */
+               w = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(w, batcalcRef);
+               setFunctionId(w, mulRef); 
+               getArg(w,0) = newTmpVariable(mb, battp);
+               w = pushArgument(mb, w, getArg(r, 0));
+               w = pushArgument(mb, w, getArg(x, 0));
+               pushInstruction(mb, w);
+
+               r = w;
        }
 
        s = newInstruction(mb,ASSIGNsymbol);
@@ -667,30 +753,126 @@ group_by_ext(mat_t *mat, int mtop, int g
        return 0;
 }
 
+/* Per partition aggregates are merged and aggregated together. For 
+ * most (handled) aggregates thats relatively simple. AVG is somewhat
+ * more complex. */
 static void
 mat_group_aggr(MalBlkPtr mb, InstrPtr p, mat_t *mat, int b, int g, int e)
 {
-       int tp = getArgType(mb,p,0), k;
+       int tp = getArgType(mb,p,0), k, tp2 = 0;
        char *aggr2 = aggr_phase2(getFunctionId(p));
-       InstrPtr ai1 = newInstruction(mb, ASSIGNsymbol), ai2;
+       int isAvg = (getFunctionId(p) == subavgRef);
+       InstrPtr ai1 = newInstruction(mb, ASSIGNsymbol), ai10, ai2;
 
        setModuleId(ai1,matRef);
        setFunctionId(ai1,packRef);
        getArg(ai1,0) = newTmpVariable(mb, tp);
 
+       if (isAvg) { /* counts */
+               tp2 = newBatType(TYPE_oid, TYPE_wrd);
+               ai10 = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(ai10,matRef);
+               setFunctionId(ai10,packRef);
+               getArg(ai10,0) = newTmpVariable(mb, tp2);
+       }
+
        for(k=1; k<mat[b].mi->argc; k++) {
                InstrPtr q = copyInstruction(p);
+
                getArg(q,0) = newTmpVariable(mb, tp);
-               getArg(q,1) = getArg(mat[b].mi,k);
-               getArg(q,2) = getArg(mat[g].mi,k);
-               getArg(q,3) = getArg(mat[e].mi,k);
+               if (isAvg) {
+                       getArg(q,1) = newTmpVariable(mb, tp2);
+                       q = pushArgument(mb, q, getArg(q,1)); /* push at end, 
create space */
+                       q->retc = 2;
+                       getArg(q,q->argc-1) = getArg(q,q->argc-2);
+                       getArg(q,q->argc-2) = getArg(q,q->argc-3);
+               }
+               getArg(q,1+isAvg) = getArg(mat[b].mi,k);
+               getArg(q,2+isAvg) = getArg(mat[g].mi,k);
+               getArg(q,3+isAvg) = getArg(mat[e].mi,k);
                pushInstruction(mb,q);
 
                /* pack the result into a mat */
                ai1 = pushArgument(mb,ai1,getArg(q,0));
+               if (isAvg)
+                       ai10 = pushArgument(mb,ai10,getArg(q,1));
        }
        pushInstruction(mb, ai1);
+       if (isAvg)
+               pushInstruction(mb, ai10);
 
+       /* for avg we do sum (avg*(count/sumcount) ) */
+       if (isAvg) {
+               InstrPtr r,s,v,w, cond;
+
+               /* wrd s = sum counts */
+               s = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(s, aggrRef);
+               setFunctionId(s, subsumRef);
+               getArg(s,0) = newTmpVariable(mb, tp2);
+               s = pushArgument(mb, s, getArg(ai10, 0));
+               s = pushArgument(mb, s, mat[g].mv);
+               s = pushArgument(mb, s, mat[e].mv);
+               s = pushBit(mb, s, 1); /* skip nils */
+               s = pushBit(mb, s, 1);
+               pushInstruction(mb,s);
+
+               /*  w=count = ifthenelse(s=count==0,NULL,s=count)  */
+               cond = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(cond, batcalcRef);
+               setFunctionId(cond, eqRef); 
+               getArg(cond,0) = newTmpVariable(mb, newBatType(TYPE_oid, 
TYPE_bit));
+               cond = pushArgument(mb, cond, getArg(s, 0));
+               cond = pushWrd(mb, cond, 0);
+               pushInstruction(mb,cond);
+
+               w = newInstruction(mb, ASSIGNsymbol);
+               setModuleId(w, batcalcRef);
+               setFunctionId(w, ifthenelseRef); 
+               getArg(w,0) = newTmpVariable(mb, tp2);
+               w = pushArgument(mb, w, getArg(cond, 0));
+               w = pushNil(mb, w, TYPE_wrd);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to