Changeset: 498738535dfe for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=498738535dfe
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk_aggr.c
        gdk/gdk_calc.h
        monetdb5/modules/kernel/aggr.c
        monetdb5/modules/kernel/aggr.mal
        monetdb5/modules/kernel/aggr.mal.sh
        monetdb5/modules/kernel/algebra.mx
        sql/backends/monet5/sql.mx
        sql/backends/monet5/sql_scenario.c
        sql/scripts/39_analytics.sql
Branch: default
Log Message:

Implemented {stddev,var}_{samp,pop} SQL aggregates using a single scan.
The SQL aggregates stddev is gone and has been replaced with the
standard-compliant stddev_samp.  In addition we now also have
stddev_pop, and the variance aggregates var_samp and var_pop.  All
four aggregates return a DOUBLE value.


diffs (truncated from 1018 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -94,6 +94,8 @@ dbl BATcalcstdev_population(dbl *avgp, B
 dbl BATcalcstdev_sample(dbl *avgp, BAT *b);
 BAT *BATcalcsub(BAT *b1, BAT *b2, BAT *s, int tp, int abort_on_error);
 BAT *BATcalcsubcst(BAT *b, const ValRecord *v, BAT *s, int tp, int 
abort_on_error);
+dbl BATcalcvariance_population(dbl *avgp, BAT *b);
+dbl BATcalcvariance_sample(dbl *avgp, BAT *b);
 BAT *BATcalcxor(BAT *b1, BAT *b2, BAT *s);
 BAT *BATcalcxorcst(BAT *b, const ValRecord *v, BAT *s);
 BAT *BATclear(BAT *b, int force);
@@ -129,6 +131,8 @@ BAT *BATgroupsize(BAT *b, BAT *g, BAT *e
 BAT *BATgroupstdev_population(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
 BAT *BATgroupstdev_sample(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
 BAT *BATgroupsum(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int skip_nils, int 
abort_on_error);
+BAT *BATgroupvariance_population(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
+BAT *BATgroupvariance_sample(BAT *b, BAT *g, BAT *e, BAT *s, int tp, int 
skip_nils, int abort_on_error);
 BUN BATgrows(BAT *b);
 BAT *BAThash(BAT *b, BUN masksize);
 BAT *BAThashjoin(BAT *l, BAT *r, BUN estimate);
@@ -675,6 +679,10 @@ str AGGRsubsumcand_int(bat *retval, bat 
 str AGGRsubsumcand_lng(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, 
int *skip_nils, int *abort_on_error);
 str AGGRsubsumcand_sht(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, 
int *skip_nils, int *abort_on_error);
 str AGGRsubsumcand_wrd(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, 
int *skip_nils, int *abort_on_error);
+str AGGRsubvariance_dbl(bat *retval, bat *bid, bat *gid, bat *eid, int 
*skip_nils, int *abort_on_error);
+str AGGRsubvariancecand_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bat 
*sid, int *skip_nils, int *abort_on_error);
+str AGGRsubvariancep_dbl(bat *retval, bat *bid, bat *gid, bat *eid, int 
*skip_nils, int *abort_on_error);
+str AGGRsubvariancepcand_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bat 
*sid, int *skip_nils, int *abort_on_error);
 str AGGRsubxml(bat *retval, bat *bid, bat *gid, bat *eid, int *skip_nils);
 str AGGRsubxmlcand(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, int 
*skip_nils);
 str AGGRsum2_bte(bat *retval, bat *bid, bat *eid);
@@ -691,6 +699,10 @@ str AGGRsum3_int(bat *retval, bat *bid, 
 str AGGRsum3_lng(bat *retval, bat *bid, bat *gid, bat *eid);
 str AGGRsum3_sht(bat *retval, bat *bid, bat *gid, bat *eid);
 str AGGRsum3_wrd(bat *retval, bat *bid, bat *gid, bat *eid);
+str AGGRvariance2_dbl(bat *retval, bat *bid, bat *eid);
+str AGGRvariance3_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
+str AGGRvariancep2_dbl(bat *retval, bat *bid, bat *eid);
+str AGGRvariancep3_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
 str AGGRxml(bat *retval, bat *bid, int *skip_nils);
 str ALARMctime(str *res);
 str ALARMepilogue(void);
@@ -844,6 +856,8 @@ str ALGtunique(int *result, int *bid);
 str ALGuselect(int *result, int *bid, ptr low, ptr high);
 str ALGuselect1(int *result, int *bid, ptr value);
 str ALGuselectInclusive(int *result, int *bid, ptr low, ptr high, bit *lin, 
bit *rin);
+str ALGvariance(dbl *res, int *bid);
+str ALGvariancep(dbl *res, int *bid);
 str ARRAYgridBAT_int(int *ret, int *bid, int *groups, int *groupsize, int 
*clustersize, int *offset);
 str ARRAYgridBAT_lng(lng *ret, lng *bid, lng *groups, lng *groupsize, lng 
*clustersize, lng *offset);
 str ARRAYgridBATshift_int(int *ret, int *bid, int *groups, int *groupsize, int 
*clustersize, int *offset, int *shift);
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -2133,7 +2133,7 @@ BATgroupmedian(BAT *b, BAT *g, BAT *e, B
        } while (0)
 
 static dbl
-calcstdev(dbl *avgp, const void *values, BUN cnt, int tp, int issample)
+calcvariance(dbl *avgp, const void *values, BUN cnt, int tp, int issample)
 {
        BUN n = 0, i;
        dbl mean = 0;
@@ -2171,21 +2171,37 @@ calcstdev(dbl *avgp, const void *values,
        }
        if (avgp)
                *avgp = mean;
-       return sqrt(m2 / (n - issample));
+       return m2 / (n - issample);
 }
 
 dbl
 BATcalcstdev_population(dbl *avgp, BAT *b)
 {
-       return calcstdev(avgp, (const void *) Tloc(b, BUNfirst(b)),
-                        BATcount(b), b->ttype, 0);
+       dbl v = calcvariance(avgp, (const void *) Tloc(b, BUNfirst(b)),
+                            BATcount(b), b->ttype, 0);
+       return v == dbl_nil ? dbl_nil : sqrt(v);
 }
 
 dbl
 BATcalcstdev_sample(dbl *avgp, BAT *b)
 {
-       return calcstdev(avgp, (const void *) Tloc(b, BUNfirst(b)),
-                        BATcount(b), b->ttype, 1);
+       dbl v = calcvariance(avgp, (const void *) Tloc(b, BUNfirst(b)),
+                            BATcount(b), b->ttype, 1);
+       return v == dbl_nil ? dbl_nil : sqrt(v);
+}
+
+dbl
+BATcalcvariance_population(dbl *avgp, BAT *b)
+{
+       return calcvariance(avgp, (const void *) Tloc(b, BUNfirst(b)),
+                           BATcount(b), b->ttype, 0);
+}
+
+dbl
+BATcalcvariance_sample(dbl *avgp, BAT *b)
+{
+       return calcvariance(avgp, (const void *) Tloc(b, BUNfirst(b)),
+                           BATcount(b), b->ttype, 1);
 }
 
 #define AGGR_STDEV(TYPE)                                               \
@@ -2224,9 +2240,12 @@ BATcalcstdev_sample(dbl *avgp, BAT *b)
                                mean[i] = dbl_nil;                      \
                                nils++;                                 \
                        } else if (cnts[i] == 1) {                      \
-                               dbls[i] = 0;                            \
+                               dbls[i] = issample ? dbl_nil : 0;       \
+                               nils2++;                                \
+                       } else if (variance) {                          \
+                               dbls[i] = m2[i] / (cnts[i] - issample); \
                        } else {                                        \
-                               dbls[i] = m2[i] / (cnts[i] - issample); \
+                               dbls[i] = sqrt(m2[i] / (cnts[i] - issample)); \
                        }                                               \
                }                                                       \
        } while (0)
@@ -2241,13 +2260,13 @@ BATcalcstdev_sample(dbl *avgp, BAT *b)
  * aggregates. */
 static BAT *
 dogroupstdev(BAT **avgb, BAT *b, BAT *g, BAT *e, BAT *s, int tp,
-            int skip_nils, int issample, const char *func)
+            int skip_nils, int issample, int variance, const char *func)
 {
        const oid *gids;
        oid gid;
        oid min, max;
        BUN i, ngrp;
-       BUN nils = 0;
+       BUN nils = 0, nils2 = 0;
        BUN *cnts = NULL;
        dbl *dbls, *mean, *delta, *m2;
        BAT *bn = NULL;
@@ -2362,6 +2381,7 @@ dogroupstdev(BAT **avgb, BAT *b, BAT *g,
        } else {
                GDKfree(mean);
        }
+       nils += nils2;
        GDKfree(delta);
        GDKfree(m2);
        GDKfree(cnts);
@@ -2392,7 +2412,7 @@ BATgroupstdev_sample(BAT *b, BAT *g, BAT
                     int skip_nils, int abort_on_error)
 {
        (void) abort_on_error;
-       return dogroupstdev(NULL, b, g, e, s, tp, skip_nils, 1,
+       return dogroupstdev(NULL, b, g, e, s, tp, skip_nils, 1, 0,
                            "BATgroupstdev_sample");
 }
 
@@ -2401,6 +2421,24 @@ BATgroupstdev_population(BAT *b, BAT *g,
                         int skip_nils, int abort_on_error)
 {
        (void) abort_on_error;
-       return dogroupstdev(NULL, b, g, e, s, tp, skip_nils, 0,
+       return dogroupstdev(NULL, b, g, e, s, tp, skip_nils, 0, 0,
                            "BATgroupstdev_population");
 }
+
+BAT *
+BATgroupvariance_sample(BAT *b, BAT *g, BAT *e, BAT *s, int tp,
+                    int skip_nils, int abort_on_error)
+{
+       (void) abort_on_error;
+       return dogroupstdev(NULL, b, g, e, s, tp, skip_nils, 1, 1,
+                           "BATgroupvariance_sample");
+}
+
+BAT *
+BATgroupvariance_population(BAT *b, BAT *g, BAT *e, BAT *s, int tp,
+                        int skip_nils, int abort_on_error)
+{
+       (void) abort_on_error;
+       return dogroupstdev(NULL, b, g, e, s, tp, skip_nils, 0, 1,
+                           "BATgroupvariance_population");
+}
diff --git a/gdk/gdk_calc.h b/gdk/gdk_calc.h
--- a/gdk/gdk_calc.h
+++ b/gdk/gdk_calc.h
@@ -140,3 +140,7 @@ gdk_export dbl BATcalcstdev_population(d
 gdk_export dbl BATcalcstdev_sample(dbl *avgp, BAT *b);
 gdk_export BAT *BATgroupstdev_sample(BAT *b, BAT *g, BAT *e, BAT *s, int tp, 
int skip_nils, int abort_on_error);
 gdk_export BAT *BATgroupstdev_population(BAT *b, BAT *g, BAT *e, BAT *s, int 
tp, int skip_nils, int abort_on_error);
+gdk_export dbl BATcalcvariance_population(dbl *avgp, BAT *b);
+gdk_export dbl BATcalcvariance_sample(dbl *avgp, BAT *b);
+gdk_export BAT *BATgroupvariance_sample(BAT *b, BAT *g, BAT *e, BAT *s, int 
tp, int skip_nils, int abort_on_error);
+gdk_export BAT *BATgroupvariance_population(BAT *b, BAT *g, BAT *e, BAT *s, 
int tp, int skip_nils, int abort_on_error);
diff --git a/monetdb5/modules/kernel/aggr.c b/monetdb5/modules/kernel/aggr.c
--- a/monetdb5/modules/kernel/aggr.c
+++ b/monetdb5/modules/kernel/aggr.c
@@ -434,6 +434,36 @@ AGGRstdevp2_dbl(bat *retval, bat *bid, b
        return AGGRgrouped2(retval, bid, eid, TYPE_dbl, 
BATgroupstdev_population, 1, "aggr.stdevp");
 }
 
+aggr_export str AGGRvariance3_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
+str
+AGGRvariance3_dbl(bat *retval, bat *bid, bat *gid, bat *eid)
+{
+       return AGGRgrouped3(retval, bid, gid, eid, TYPE_dbl,
+                                               BATgroupvariance_sample, 1, 
"aggr.variance");
+}
+
+aggr_export str AGGRvariance2_dbl(bat *retval, bat *bid, bat *eid);
+str
+AGGRvariance2_dbl(bat *retval, bat *bid, bat *eid)
+{
+       return AGGRgrouped2(retval, bid, eid, TYPE_dbl, 
BATgroupvariance_sample, 1, "aggr.variance");
+}
+
+aggr_export str AGGRvariancep3_dbl(bat *retval, bat *bid, bat *gid, bat *eid);
+str
+AGGRvariancep3_dbl(bat *retval, bat *bid, bat *gid, bat *eid)
+{
+       return AGGRgrouped3(retval, bid, gid, eid, TYPE_dbl,
+                                               BATgroupvariance_population, 1, 
"aggr.variancep");
+}
+
+aggr_export str AGGRvariancep2_dbl(bat *retval, bat *bid, bat *eid);
+str
+AGGRvariancep2_dbl(bat *retval, bat *bid, bat *eid)
+{
+       return AGGRgrouped2(retval, bid, eid, TYPE_dbl, 
BATgroupvariance_population, 1, "aggr.variancep");
+}
+
 aggr_export str AGGRcount3(bat *retval, bat *bid, bat *gid, bat *eid, bit 
*ignorenils);
 str
 AGGRcount3(bat *retval, bat *bid, bat *gid, bat *eid, bit *ignorenils)
@@ -868,6 +898,38 @@ AGGRsubstdevpcand_dbl(bat *retval, bat *
                                                  *abort_on_error, TYPE_dbl, 
BATgroupstdev_population, "aggr.substdevp");
 }
 
+aggr_export str AGGRsubvariance_dbl(bat *retval, bat *bid, bat *gid, bat *eid, 
int *skip_nils, int *abort_on_error);
+str
+AGGRsubvariance_dbl(bat *retval, bat *bid, bat *gid, bat *eid, int *skip_nils, 
int *abort_on_error)
+{
+       return AGGRsubgrouped(retval, bid, gid, eid, NULL, *skip_nils,
+                                                 *abort_on_error, TYPE_dbl, 
BATgroupvariance_sample, "aggr.subvariance");
+}
+
+aggr_export str AGGRsubvariancecand_dbl(bat *retval, bat *bid, bat *gid, bat 
*eid, bat *sid, int *skip_nils, int *abort_on_error);
+str
+AGGRsubvariancecand_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, 
int *skip_nils, int *abort_on_error)
+{
+       return AGGRsubgrouped(retval, bid, gid, eid, sid, *skip_nils,
+                                                 *abort_on_error, TYPE_dbl, 
BATgroupvariance_sample, "aggr.subvariance");
+}
+
+aggr_export str AGGRsubvariancep_dbl(bat *retval, bat *bid, bat *gid, bat 
*eid, int *skip_nils, int *abort_on_error);
+str
+AGGRsubvariancep_dbl(bat *retval, bat *bid, bat *gid, bat *eid, int 
*skip_nils, int *abort_on_error)
+{
+       return AGGRsubgrouped(retval, bid, gid, eid, NULL, *skip_nils,
+                                                 *abort_on_error, TYPE_dbl, 
BATgroupvariance_population, "aggr.subvariancep");
+}
+
+aggr_export str AGGRsubvariancepcand_dbl(bat *retval, bat *bid, bat *gid, bat 
*eid, bat *sid, int *skip_nils, int *abort_on_error);
+str
+AGGRsubvariancepcand_dbl(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, 
int *skip_nils, int *abort_on_error)
+{
+       return AGGRsubgrouped(retval, bid, gid, eid, sid, *skip_nils,
+                                                 *abort_on_error, TYPE_dbl, 
BATgroupvariance_population, "aggr.subvariancep");
+}
+
 aggr_export str AGGRsubcount(bat *retval, bat *bid, bat *gid, bat *eid, int 
*skip_nils);
 str
 AGGRsubcount(bat *retval, bat *bid, bat *gid, bat *eid, int *skip_nils)
diff --git a/monetdb5/modules/kernel/aggr.mal b/monetdb5/modules/kernel/aggr.mal
--- a/monetdb5/modules/kernel/aggr.mal
+++ b/monetdb5/modules/kernel/aggr.mal
@@ -724,6 +724,38 @@ command substdevp(b:bat[:oid,:bte],g:bat
 address AGGRsubstdevpcand_dbl
 comment "Grouped stdevp aggregate with candidates list";
 
+command variance(b:bat[:oid,:bte], e:bat[:oid,:any_1]) :bat[:oid,:dbl]
+address AGGRvariance2_dbl
+comment "Grouped tail average on bte";
+
+command variance(b:bat[:oid,:bte], g:bat[:oid,:oid], 
e:bat[:oid,:any_1]):bat[:oid,:dbl]
+address AGGRvariance3_dbl
+comment "Grouped tail average on bte";
+
+command 
subvariance(b:bat[:oid,:bte],g:bat[:oid,:oid],e:bat[:oid,:any_1],skip_nils:int,abort_on_error:int)
 :bat[:oid,:dbl]
+address AGGRsubvariance_dbl
+comment "Grouped variance aggregate";
+
+command 
subvariance(b:bat[:oid,:bte],g:bat[:oid,:oid],e:bat[:oid,:any_1],s:bat[:oid,:oid],skip_nils:int,abort_on_error:int)
 :bat[:oid,:dbl]
+address AGGRsubvariancecand_dbl
+comment "Grouped variance aggregate with candidates list";
+
+command variancep(b:bat[:oid,:bte], e:bat[:oid,:any_1]) :bat[:oid,:dbl]
+address AGGRvariancep2_dbl
+comment "Grouped tail average on bte";
+
+command variancep(b:bat[:oid,:bte], g:bat[:oid,:oid], 
e:bat[:oid,:any_1]):bat[:oid,:dbl]
+address AGGRvariancep3_dbl
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to