Changeset: af165b5bfc37 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=af165b5bfc37
Modified Files:
        gdk/gdk_analytic.h
        gdk/gdk_analytic_func.c
        sql/backends/monet5/sql_gencode.c
        sql/backends/monet5/sql_rank.c
        sql/backends/monet5/sql_rank.h
        sql/backends/monet5/sql_rank.mal
        sql/backends/monet5/sql_rank.mal.sh
        sql/scripts/39_analytics.sql
        sql/server/rel_select.c
Branch: statistics-analytics
Log Message:

Implementing stddev_samp window function


diffs (truncated from 392 to 300 lines):

diff --git a/gdk/gdk_analytic.h b/gdk/gdk_analytic.h
--- a/gdk/gdk_analytic.h
+++ b/gdk/gdk_analytic.h
@@ -30,6 +30,7 @@ gdk_export gdk_return GDKanalyticalwindo
 gdk_export gdk_return GDKanalyticalfirst(BAT *r, BAT *b, BAT *s, BAT *e, int 
tpe);
 gdk_export gdk_return GDKanalyticallast(BAT *r, BAT *b, BAT *s, BAT *e, int 
tpe);
 gdk_export gdk_return GDKanalyticalnthvalue(BAT *r, BAT *b, BAT *s, BAT *e, 
BAT *l, const void *restrict bound, int tp1, int tp2);
+
 gdk_export gdk_return GDKanalyticalmin(BAT *r, BAT *b, BAT *s, BAT *e, int 
tpe);
 gdk_export gdk_return GDKanalyticalmax(BAT *r, BAT *b, BAT *s, BAT *e, int 
tpe);
 gdk_export gdk_return GDKanalyticalcount(BAT *r, BAT *b, BAT *s, BAT *e, const 
bit *restrict ignore_nils, int tpe);
@@ -37,4 +38,6 @@ gdk_export gdk_return GDKanalyticalsum(B
 gdk_export gdk_return GDKanalyticalprod(BAT *r, BAT *b, BAT *s, BAT *e, int 
tp1, int tp2);
 gdk_export gdk_return GDKanalyticalavg(BAT *r, BAT *b, BAT *s, BAT *e, int 
tpe);
 
+gdk_export gdk_return GDKanalyticalstddev_samp(BAT *r, BAT *b, BAT *s, BAT *e, 
int tpe);
+
 #endif //_GDK_ANALYTIC_H_
diff --git a/gdk/gdk_analytic_func.c b/gdk/gdk_analytic_func.c
--- a/gdk/gdk_analytic_func.c
+++ b/gdk/gdk_analytic_func.c
@@ -1720,3 +1720,76 @@ GDKanalyticalavg(BAT *r, BAT *b, BAT *s,
        r->tnil = has_nils;
        return GDK_SUCCEED;
 }
+
+#define ANALYTICAL_STDEV_CALC(TPE)                     \
+       do {                                                            \
+               TPE *bp = (TPE*)Tloc(b, 0), *bs, *be, v;                \
+               for (; i < cnt; i++, rb++) {                            \
+                       bs = bp + start[i];                             \
+                       be = bp + end[i];                               \
+                       for (; bs < be; bs++) {                         \
+                               v = *bs;                                \
+                               if (is_##TPE##_nil(v))          \
+                                       continue;               \
+                               n++;                            \
+                               delta = (dbl) v - mean;         \
+                               mean += delta / n;              \
+                               m2 += delta * ((dbl) v - mean); \
+                       }                                               \
+                       if (n > 1) { \
+                               *rb = sqrt(m2 / (n - 1)); \
+                       } else { \
+                               *rb = dbl_nil; \
+                               has_nils = true; \
+                       } \
+                       n = 0;  \
+                       mean = 0;       \
+                       m2 = 0; \
+               }       \
+       } while (0)
+
+gdk_return
+GDKanalyticalstddev_samp(BAT *r, BAT *b, BAT *s, BAT *e, int tpe)
+{
+       bool has_nils = false;
+       BUN i = 0, cnt = BATcount(b), n = 0;
+       lng *restrict start, *restrict end;
+       dbl *restrict rb = (dbl *) Tloc(r, 0), mean = 0, m2 = 0, delta;
+
+       assert(s && e);
+       start = (lng *) Tloc(s, 0);
+       end = (lng *) Tloc(e, 0);
+
+       switch (tpe) {
+       case TYPE_bte:
+               ANALYTICAL_STDEV_CALC(bte);
+               break;
+       case TYPE_sht:
+               ANALYTICAL_STDEV_CALC(sht);
+               break;
+       case TYPE_int:
+               ANALYTICAL_STDEV_CALC(int);
+               break;
+       case TYPE_lng:
+               ANALYTICAL_STDEV_CALC(lng);
+               break;
+#ifdef HAVE_HGE
+       case TYPE_hge:
+               ANALYTICAL_STDEV_CALC(hge);
+               break;
+#endif
+       case TYPE_flt:
+               ANALYTICAL_STDEV_CALC(flt);
+               break;
+       case TYPE_dbl:
+               ANALYTICAL_STDEV_CALC(dbl);
+               break;
+       default:
+               GDKerror("%s: average of type %s unsupported.\n", __func__, 
ATOMname(tpe));
+               return GDK_FAIL;
+       }
+       BATsetcount(r, cnt);
+       r->tnonil = !has_nils;
+       r->tnil = has_nils;
+       return GDK_SUCCEED;
+}
diff --git a/sql/backends/monet5/sql_gencode.c 
b/sql/backends/monet5/sql_gencode.c
--- a/sql/backends/monet5/sql_gencode.c
+++ b/sql/backends/monet5/sql_gencode.c
@@ -912,13 +912,12 @@ monet5_resolve_function(ptr M, sql_func 
        if (!mname || !fname)
                return 0;
 
-       /* Some SQL functions MAL mapping such as count(*) aggregate, the 
number or arguments don't match */
+       /* Some SQL functions MAL mapping such as count(*) aggregate, the 
number of arguments don't match */
        if (mname == calcRef && fname == getName("="))
                return 1;
        if (mname == aggrRef && fname == countRef)
                return 1;
-       if (mname == sqlRef && (fname == first_valueRef || fname == lagRef || 
fname == leadRef || fname == nth_valueRef || fname == ntileRef ||
-               fname ==  minRef || fname == maxRef || fname == countRef || 
fname == prodRef || fname == sumRef || fname == avgRef))
+       if (f->type == F_ANALYTIC)
                return 1;
 
        c = MCgetClient(sql->clientid);
diff --git a/sql/backends/monet5/sql_rank.c b/sql/backends/monet5/sql_rank.c
--- a/sql/backends/monet5/sql_rank.c
+++ b/sql/backends/monet5/sql_rank.c
@@ -1486,7 +1486,7 @@ SQLavg(Client cntxt, MalBlkPtr mb, MalSt
                        case TYPE_flt: {
                                flt fp = *((flt*)in);
                                dbl *db = (dbl*)res;
-                               if(is_flt_nil(fp))
+                               if (is_flt_nil(fp))
                                        *db = dbl_nil;
                                else
                                        *db = (dbl) fp;
@@ -1495,8 +1495,59 @@ SQLavg(Client cntxt, MalBlkPtr mb, MalSt
                                *res = *in;
                                break;
                        default:
-                               throw(SQL, "sql.avg", SQLSTATE(42000) "average 
not available for %s", ATOMname(tpe));
+                               throw(SQL, "sql.avg", SQLSTATE(42000) "sql.avg 
not available for %s", ATOMname(tpe));
                }
        }
        return msg;
 }
+
+str
+SQLstddev_samp(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       BAT *r, *b, *s, *e;
+       str msg = SQLanalytics_args(&r, &b, &s, &e, cntxt, mb, stk, pci, 
TYPE_dbl, "sql.stddev",
+                                                               SQLSTATE(42000) 
"stddev(:any_1,:lng,:lng)");
+       int tpe = getArgType(mb, pci, 1);
+       gdk_return gdk_res;
+
+       if (msg)
+               return msg;
+       if (isaBatType(tpe))
+               tpe = getBatType(tpe);
+
+       if (b) {
+               bat *res = getArgReference_bat(stk, pci, 0);
+
+               gdk_res = GDKanalyticalstddev_samp(r, b, s, e, tpe);
+               BBPunfix(b->batCacheid);
+               if (s) BBPunfix(s->batCacheid);
+               if (e) BBPunfix(e->batCacheid);
+               if (gdk_res == GDK_SUCCEED)
+                       BBPkeepref(*res = r->batCacheid);
+               else
+                       throw(SQL, "sql.stddev", GDK_EXCEPTION);
+       } else {
+               dbl *res = getArgReference(stk, pci, 0);
+
+               switch (tpe) {
+                       case TYPE_bte:
+                       case TYPE_sht:
+                       case TYPE_int:
+                       case TYPE_lng:
+#ifdef HAVE_HGE
+                       case TYPE_hge:
+#endif
+                       case TYPE_flt:
+                       case TYPE_dbl:
+                               *res = dbl_nil;
+                               break;
+                       default: {
+                               if (tpe == TYPE_daytime || tpe == TYPE_date || 
tpe == TYPE_timestamp)
+                                       *res = dbl_nil;
+                               else
+                                       throw(SQL, "sql.stddev", 
SQLSTATE(42000) "sql.stddev not available for %s", ATOMname(tpe));
+                       }
+               }
+       }
+       return msg;
+}
diff --git a/sql/backends/monet5/sql_rank.h b/sql/backends/monet5/sql_rank.h
--- a/sql/backends/monet5/sql_rank.h
+++ b/sql/backends/monet5/sql_rank.h
@@ -13,6 +13,8 @@
 
 sql5_export str SQLdiff(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 sql5_export str SQLwindow_bound(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
+
+/* rank functions */
 sql5_export str SQLrow_number(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 sql5_export str SQLrank(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 sql5_export str SQLdense_rank(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
@@ -24,6 +26,8 @@ sql5_export str SQLlast_value(Client cnt
 sql5_export str SQLnth_value(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 sql5_export str SQLlag(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 sql5_export str SQLlead(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
+
+/* aggregates */
 sql5_export str SQLmin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 sql5_export str SQLmax(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 sql5_export str SQLcount(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
@@ -31,4 +35,7 @@ sql5_export str SQLsum(Client cntxt, Mal
 sql5_export str SQLprod(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 sql5_export str SQLavg(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 
+/* statistical functions */
+sql5_export str SQLstddev_samp(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
+
 #endif /* _SQL_RANK_H */
diff --git a/sql/backends/monet5/sql_rank.mal b/sql/backends/monet5/sql_rank.mal
--- a/sql/backends/monet5/sql_rank.mal
+++ b/sql/backends/monet5/sql_rank.mal
@@ -572,3 +572,88 @@ comment "return the average of groups";
 pattern batsql.avg(b:bat[:dbl], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
 address SQLavg
 comment "return the average of groups";
+
+pattern sql.stddev(b:bte, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:bte], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:sht, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:sht], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:int, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:int], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:lng, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:lng], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:flt, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:flt], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:dbl, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:dbl], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:daytime, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:daytime], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:date, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:date], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:timestamp, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:timestamp], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+command aggr.exist(b:bat[:any_2], h:any_1):bit
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to