Changeset: 6e2060655674 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6e2060655674
Modified Files:
gdk/gdk_aggr.c
monetdb5/modules/mal/pcre.c
monetdb5/modules/mal/pcre.mal
monetdb5/optimizer/opt_mergetable.c
monetdb5/optimizer/opt_prelude.c
monetdb5/optimizer/opt_prelude.h
sql/backends/monet5/sql_gencode.c
sql/server/rel_optimizer.c
sql/test/BugTracker-2009/Tests/AVG_ReturnsNoLines.SF-2596084.stable.out
sql/test/BugTracker-2011/Tests/aggregate-in-subquery.Bug-2739.stable.out
sql/test/BugTracker-2011/Tests/view_avg_incorrect_result.Bug-2790.stable.out
Branch: default
Log Message:
use safe avg implemenation
diffs (truncated from 742 to 300 lines):
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -1316,6 +1316,13 @@ BATgroupavg(BAT **bnp, BAT **cntsp, BAT
GDKfree(rems);
if (cntsp == NULL)
GDKfree(cnts);
+ else {
+ BATsetcount(*cntsp, ngrp);
+ BATseqbase(*cntsp, min);
+ (*cntsp)->tkey = BATcount(*cntsp) <= 1;
+ (*cntsp)->tsorted = BATcount(*cntsp) <= 1;
+ (*cntsp)->trevsorted = BATcount(*cntsp) <= 1;
+ }
BATsetcount(bn, ngrp);
BATseqbase(bn, min);
bn->tkey = BATcount(bn) <= 1;
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -99,6 +99,7 @@ pcre_export str PCREilike_select_pcre(in
pcre_export str pcre_init(void);
pcre_export str PCRElikesubselect1(bat *ret, bat *bid, str *pat, str *esc, bit
*caseignore, bit *anti);
pcre_export str PCRElikesubselect2(bat *ret, bat *bid, bat *sid, str *pat, str
*esc, bit *caseignore, bit *anti);
+pcre_export str PCRElikesubselect3(bat *ret, bat *bid, str *pat, str *esc, bit
*anti);
/* current implementation assumes simple %keyword% [keyw%]* */
typedef struct RE {
@@ -1709,6 +1710,13 @@ PCRElikesubselect1(bat *ret, bat *bid, s
return PCRElikesubselect2(ret, bid, NULL, pat, esc, caseignore, anti);
}
+str
+PCRElikesubselect3(bat *ret, bat *bid, str *pat, str *esc, bit *anti)
+{
+ bit f = FALSE;
+ return PCRElikesubselect2(ret, bid, NULL, pat, esc, &f, anti);
+}
+
static str
PCRElike_pcre(int *ret, int *b, str *pat, str *esc, bit us, bit ignore)
{
diff --git a/monetdb5/modules/mal/pcre.mal b/monetdb5/modules/mal/pcre.mal
--- a/monetdb5/modules/mal/pcre.mal
+++ b/monetdb5/modules/mal/pcre.mal
@@ -185,9 +185,19 @@ comment "Select all head values of the f
the tail the head value of the input BAT for which the
relationship holds. The output BAT is sorted on the tail value.";
-function likesubselect(b:bat[:oid,:str], pat:str, esc:str, anti:bit)
:bat[:oid,:oid];
- return likesubselect(b, pat, esc, false, anti);
-end likesubselect;
+command algebra.likesubselect(b:bat[:oid,:str], pat:str, esc:str, anti:bit)
:bat[:oid,:oid]
+address PCRElikesubselect3
+comment "Select all head values of the first input BAT for which the
+ tail value is \"like\" the given (SQL-style) pattern and for
+ which the head value occurs in the tail of the second input
+ BAT.
+ Input is a dense-headed BAT, output is a dense-headed BAT with in
+ the tail the head value of the input BAT for which the
+ relationship holds. The output BAT is sorted on the tail value.";
+
+#function likesubselect(b:bat[:oid,:str], pat:str, esc:str, anti:bit)
:bat[:oid,:oid];
+ #return likesubselect(b, pat, esc, false, anti);
+#end likesubselect;
function ilikesubselect(b:bat[:oid,:str], pat:str, esc:str, anti:bit)
:bat[:oid,:oid];
return likesubselect(b, pat, esc, true, anti);
diff --git a/monetdb5/optimizer/opt_mergetable.c
b/monetdb5/optimizer/opt_mergetable.c
--- a/monetdb5/optimizer/opt_mergetable.c
+++ b/monetdb5/optimizer/opt_mergetable.c
@@ -583,9 +583,9 @@ mat_join3(MalBlkPtr mb, InstrPtr p, mat_
static char *
aggr_phase2(char *aggr)
{
- if (aggr == countRef || aggr == count_no_nilRef)
+ if (aggr == countRef || aggr == count_no_nilRef || aggr == avgRef)
return sumRef;
- if (aggr == subcountRef)
+ if (aggr == subcountRef || aggr == subavgRef)
return subsumRef;
/* min/max/sum/prod and unique are fine */
return aggr;
@@ -594,36 +594,122 @@ aggr_phase2(char *aggr)
static void
mat_aggr(MalBlkPtr mb, InstrPtr p, mat_t *mat, int m)
{
- int tp = getArgType(mb,p,0), k;
- int battp = (getModuleId(p)==aggrRef)?newBatType(TYPE_oid,tp):tp;
- int v = newTmpVariable(mb, battp);
- InstrPtr r = NULL, s = NULL, q = NULL;
+ int tp = getArgType(mb,p,0), k, tp2 = TYPE_lng;
+ int battp = (getModuleId(p)==aggrRef)?newBatType(TYPE_oid,tp):tp,
battp2 = 0;
+ int isAvg = (getFunctionId(p) == avgRef);
+ InstrPtr r = NULL, s = NULL, q = NULL, u = NULL;
/* we pack the partitial result */
r = newInstruction(mb,ASSIGNsymbol);
setModuleId(r, matRef);
setFunctionId(r, packRef);
- getArg(r,0) = v;
+ getArg(r,0) = newTmpVariable(mb, battp);
+
+ if (isAvg) { /* counts */
+ battp2 = newBatType(TYPE_oid, tp2);
+ u = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(u,matRef);
+ setFunctionId(u,packRef);
+ getArg(u,0) = newTmpVariable(mb, battp2);
+ }
for(k=1; k< mat[m].mi->argc; k++) {
q = newInstruction(mb,ASSIGNsymbol);
setModuleId(q,getModuleId(p));
+ if (isAvg)
+ setModuleId(q,batcalcRef);
setFunctionId(q,getFunctionId(p));
getArg(q,0) = newTmpVariable(mb, tp);
+ if (isAvg)
+ q = pushReturn(mb, q, newTmpVariable(mb, tp2));
q = pushArgument(mb,q,getArg(mat[m].mi,k));
pushInstruction(mb,q);
r = pushArgument(mb,r,getArg(q,0));
+ if (isAvg)
+ u = pushArgument(mb,u,getArg(q,1));
}
pushInstruction(mb,r);
+ if (isAvg)
+ pushInstruction(mb, u);
+ /* Filter empty partitions */
if (getModuleId(p) == aggrRef) {
s = newInstruction(mb,ASSIGNsymbol);
setModuleId(s, algebraRef);
setFunctionId(s, selectNotNilRef);
- getArg(s,0) = newTmpVariable(mb, newBatType(TYPE_oid,tp));
+ getArg(s,0) = newTmpVariable(mb, battp);
s = pushArgument(mb, s, getArg(r,0));
pushInstruction(mb, s);
r = s;
+
+ if (isAvg) {
+ s = newInstruction(mb,ASSIGNsymbol);
+ setModuleId(s, algebraRef);
+ setFunctionId(s, selectNotNilRef);
+ getArg(s,0) = newTmpVariable(mb, battp2);
+ s = pushArgument(mb, s, getArg(u,0));
+ pushInstruction(mb, s);
+ u = s;
+ }
+ }
+
+ /* for avg we do sum (avg*(count/sumcount) ) */
+ if (isAvg) {
+ InstrPtr v,w,x,y,cond;
+
+ /* lng w = sum counts */
+ w = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(w, aggrRef);
+ setFunctionId(w, sumRef);
+ getArg(w,0) = newTmpVariable(mb, tp2);
+ w = pushArgument(mb, w, getArg(u, 0));
+ pushInstruction(mb, w);
+
+ /* y=count = ifthenelse(w=count==0,NULL,w=count) */
+ cond = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(cond, calcRef);
+ setFunctionId(cond, eqRef);
+ getArg(cond,0) = newTmpVariable(mb, TYPE_bit);
+ cond = pushArgument(mb, cond, getArg(w, 0));
+ cond = pushWrd(mb, cond, 0);
+ pushInstruction(mb,cond);
+
+ y = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(y, calcRef);
+ setFunctionId(y, ifthenelseRef);
+ getArg(y,0) = newTmpVariable(mb, tp2);
+ y = pushArgument(mb, y, getArg(cond, 0));
+ y = pushNil(mb, y, tp2);
+ y = pushArgument(mb, y, getArg(w, 0));
+ pushInstruction(mb,y);
+
+ /* dbl v = double(count) */
+ v = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(v, batcalcRef);
+ setFunctionId(v, dblRef);
+ getArg(v,0) = newTmpVariable(mb, newBatType(TYPE_oid,
TYPE_dbl));
+ v = pushArgument(mb, v, getArg(u, 0));
+ pushInstruction(mb, v);
+
+ /* dbl x = v / y */
+ x = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(x, batcalcRef);
+ setFunctionId(x, divRef);
+ getArg(x,0) = newTmpVariable(mb, newBatType(TYPE_oid,
TYPE_dbl));
+ x = pushArgument(mb, x, getArg(v, 0));
+ x = pushArgument(mb, x, getArg(y, 0));
+ pushInstruction(mb, x);
+
+ /* dbl w = avg * x */
+ w = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(w, batcalcRef);
+ setFunctionId(w, mulRef);
+ getArg(w,0) = newTmpVariable(mb, battp);
+ w = pushArgument(mb, w, getArg(r, 0));
+ w = pushArgument(mb, w, getArg(x, 0));
+ pushInstruction(mb, w);
+
+ r = w;
}
s = newInstruction(mb,ASSIGNsymbol);
@@ -667,30 +753,126 @@ group_by_ext(mat_t *mat, int mtop, int g
return 0;
}
+/* Per partition aggregates are merged and aggregated together. For
+ * most (handled) aggregates thats relatively simple. AVG is somewhat
+ * more complex. */
static void
mat_group_aggr(MalBlkPtr mb, InstrPtr p, mat_t *mat, int b, int g, int e)
{
- int tp = getArgType(mb,p,0), k;
+ int tp = getArgType(mb,p,0), k, tp2 = 0;
char *aggr2 = aggr_phase2(getFunctionId(p));
- InstrPtr ai1 = newInstruction(mb, ASSIGNsymbol), ai2;
+ int isAvg = (getFunctionId(p) == subavgRef);
+ InstrPtr ai1 = newInstruction(mb, ASSIGNsymbol), ai10, ai2;
setModuleId(ai1,matRef);
setFunctionId(ai1,packRef);
getArg(ai1,0) = newTmpVariable(mb, tp);
+ if (isAvg) { /* counts */
+ tp2 = newBatType(TYPE_oid, TYPE_wrd);
+ ai10 = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(ai10,matRef);
+ setFunctionId(ai10,packRef);
+ getArg(ai10,0) = newTmpVariable(mb, tp2);
+ }
+
for(k=1; k<mat[b].mi->argc; k++) {
InstrPtr q = copyInstruction(p);
+
getArg(q,0) = newTmpVariable(mb, tp);
- getArg(q,1) = getArg(mat[b].mi,k);
- getArg(q,2) = getArg(mat[g].mi,k);
- getArg(q,3) = getArg(mat[e].mi,k);
+ if (isAvg) {
+ getArg(q,1) = newTmpVariable(mb, tp2);
+ q = pushArgument(mb, q, getArg(q,1)); /* push at end,
create space */
+ q->retc = 2;
+ getArg(q,q->argc-1) = getArg(q,q->argc-2);
+ getArg(q,q->argc-2) = getArg(q,q->argc-3);
+ }
+ getArg(q,1+isAvg) = getArg(mat[b].mi,k);
+ getArg(q,2+isAvg) = getArg(mat[g].mi,k);
+ getArg(q,3+isAvg) = getArg(mat[e].mi,k);
pushInstruction(mb,q);
/* pack the result into a mat */
ai1 = pushArgument(mb,ai1,getArg(q,0));
+ if (isAvg)
+ ai10 = pushArgument(mb,ai10,getArg(q,1));
}
pushInstruction(mb, ai1);
+ if (isAvg)
+ pushInstruction(mb, ai10);
+ /* for avg we do sum (avg*(count/sumcount) ) */
+ if (isAvg) {
+ InstrPtr r,s,v,w, cond;
+
+ /* wrd s = sum counts */
+ s = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(s, aggrRef);
+ setFunctionId(s, subsumRef);
+ getArg(s,0) = newTmpVariable(mb, tp2);
+ s = pushArgument(mb, s, getArg(ai10, 0));
+ s = pushArgument(mb, s, mat[g].mv);
+ s = pushArgument(mb, s, mat[e].mv);
+ s = pushBit(mb, s, 1); /* skip nils */
+ s = pushBit(mb, s, 1);
+ pushInstruction(mb,s);
+
+ /* w=count = ifthenelse(s=count==0,NULL,s=count) */
+ cond = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(cond, batcalcRef);
+ setFunctionId(cond, eqRef);
+ getArg(cond,0) = newTmpVariable(mb, newBatType(TYPE_oid,
TYPE_bit));
+ cond = pushArgument(mb, cond, getArg(s, 0));
+ cond = pushWrd(mb, cond, 0);
+ pushInstruction(mb,cond);
+
+ w = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(w, batcalcRef);
+ setFunctionId(w, ifthenelseRef);
+ getArg(w,0) = newTmpVariable(mb, tp2);
+ w = pushArgument(mb, w, getArg(cond, 0));
+ w = pushNil(mb, w, TYPE_wrd);
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list