Changeset: af165b5bfc37 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=af165b5bfc37
Modified Files:
gdk/gdk_analytic.h
gdk/gdk_analytic_func.c
sql/backends/monet5/sql_gencode.c
sql/backends/monet5/sql_rank.c
sql/backends/monet5/sql_rank.h
sql/backends/monet5/sql_rank.mal
sql/backends/monet5/sql_rank.mal.sh
sql/scripts/39_analytics.sql
sql/server/rel_select.c
Branch: statistics-analytics
Log Message:
Implementing stddev_samp window function
diffs (truncated from 392 to 300 lines):
diff --git a/gdk/gdk_analytic.h b/gdk/gdk_analytic.h
--- a/gdk/gdk_analytic.h
+++ b/gdk/gdk_analytic.h
@@ -30,6 +30,7 @@ gdk_export gdk_return GDKanalyticalwindo
gdk_export gdk_return GDKanalyticalfirst(BAT *r, BAT *b, BAT *s, BAT *e, int
tpe);
gdk_export gdk_return GDKanalyticallast(BAT *r, BAT *b, BAT *s, BAT *e, int
tpe);
gdk_export gdk_return GDKanalyticalnthvalue(BAT *r, BAT *b, BAT *s, BAT *e,
BAT *l, const void *restrict bound, int tp1, int tp2);
+
gdk_export gdk_return GDKanalyticalmin(BAT *r, BAT *b, BAT *s, BAT *e, int
tpe);
gdk_export gdk_return GDKanalyticalmax(BAT *r, BAT *b, BAT *s, BAT *e, int
tpe);
gdk_export gdk_return GDKanalyticalcount(BAT *r, BAT *b, BAT *s, BAT *e, const
bit *restrict ignore_nils, int tpe);
@@ -37,4 +38,6 @@ gdk_export gdk_return GDKanalyticalsum(B
gdk_export gdk_return GDKanalyticalprod(BAT *r, BAT *b, BAT *s, BAT *e, int
tp1, int tp2);
gdk_export gdk_return GDKanalyticalavg(BAT *r, BAT *b, BAT *s, BAT *e, int
tpe);
+gdk_export gdk_return GDKanalyticalstddev_samp(BAT *r, BAT *b, BAT *s, BAT *e,
int tpe);
+
#endif //_GDK_ANALYTIC_H_
diff --git a/gdk/gdk_analytic_func.c b/gdk/gdk_analytic_func.c
--- a/gdk/gdk_analytic_func.c
+++ b/gdk/gdk_analytic_func.c
@@ -1720,3 +1720,76 @@ GDKanalyticalavg(BAT *r, BAT *b, BAT *s,
r->tnil = has_nils;
return GDK_SUCCEED;
}
+
+#define ANALYTICAL_STDEV_CALC(TPE) \
+ do { \
+ TPE *bp = (TPE*)Tloc(b, 0), *bs, *be, v; \
+ for (; i < cnt; i++, rb++) { \
+ bs = bp + start[i]; \
+ be = bp + end[i]; \
+ for (; bs < be; bs++) { \
+ v = *bs; \
+ if (is_##TPE##_nil(v)) \
+ continue; \
+ n++; \
+ delta = (dbl) v - mean; \
+ mean += delta / n; \
+ m2 += delta * ((dbl) v - mean); \
+ } \
+ if (n > 1) { \
+ *rb = sqrt(m2 / (n - 1)); \
+ } else { \
+ *rb = dbl_nil; \
+ has_nils = true; \
+ } \
+ n = 0; \
+ mean = 0; \
+ m2 = 0; \
+ } \
+ } while (0)
+
+gdk_return
+GDKanalyticalstddev_samp(BAT *r, BAT *b, BAT *s, BAT *e, int tpe)
+{
+ bool has_nils = false;
+ BUN i = 0, cnt = BATcount(b), n = 0;
+ lng *restrict start, *restrict end;
+ dbl *restrict rb = (dbl *) Tloc(r, 0), mean = 0, m2 = 0, delta;
+
+ assert(s && e);
+ start = (lng *) Tloc(s, 0);
+ end = (lng *) Tloc(e, 0);
+
+ switch (tpe) {
+ case TYPE_bte:
+ ANALYTICAL_STDEV_CALC(bte);
+ break;
+ case TYPE_sht:
+ ANALYTICAL_STDEV_CALC(sht);
+ break;
+ case TYPE_int:
+ ANALYTICAL_STDEV_CALC(int);
+ break;
+ case TYPE_lng:
+ ANALYTICAL_STDEV_CALC(lng);
+ break;
+#ifdef HAVE_HGE
+ case TYPE_hge:
+ ANALYTICAL_STDEV_CALC(hge);
+ break;
+#endif
+ case TYPE_flt:
+ ANALYTICAL_STDEV_CALC(flt);
+ break;
+ case TYPE_dbl:
+ ANALYTICAL_STDEV_CALC(dbl);
+ break;
+ default:
+ GDKerror("%s: average of type %s unsupported.\n", __func__,
ATOMname(tpe));
+ return GDK_FAIL;
+ }
+ BATsetcount(r, cnt);
+ r->tnonil = !has_nils;
+ r->tnil = has_nils;
+ return GDK_SUCCEED;
+}
diff --git a/sql/backends/monet5/sql_gencode.c
b/sql/backends/monet5/sql_gencode.c
--- a/sql/backends/monet5/sql_gencode.c
+++ b/sql/backends/monet5/sql_gencode.c
@@ -912,13 +912,12 @@ monet5_resolve_function(ptr M, sql_func
if (!mname || !fname)
return 0;
- /* Some SQL functions MAL mapping such as count(*) aggregate, the
number or arguments don't match */
+ /* Some SQL functions MAL mapping such as count(*) aggregate, the
number of arguments don't match */
if (mname == calcRef && fname == getName("="))
return 1;
if (mname == aggrRef && fname == countRef)
return 1;
- if (mname == sqlRef && (fname == first_valueRef || fname == lagRef ||
fname == leadRef || fname == nth_valueRef || fname == ntileRef ||
- fname == minRef || fname == maxRef || fname == countRef ||
fname == prodRef || fname == sumRef || fname == avgRef))
+ if (f->type == F_ANALYTIC)
return 1;
c = MCgetClient(sql->clientid);
diff --git a/sql/backends/monet5/sql_rank.c b/sql/backends/monet5/sql_rank.c
--- a/sql/backends/monet5/sql_rank.c
+++ b/sql/backends/monet5/sql_rank.c
@@ -1486,7 +1486,7 @@ SQLavg(Client cntxt, MalBlkPtr mb, MalSt
case TYPE_flt: {
flt fp = *((flt*)in);
dbl *db = (dbl*)res;
- if(is_flt_nil(fp))
+ if (is_flt_nil(fp))
*db = dbl_nil;
else
*db = (dbl) fp;
@@ -1495,8 +1495,59 @@ SQLavg(Client cntxt, MalBlkPtr mb, MalSt
*res = *in;
break;
default:
- throw(SQL, "sql.avg", SQLSTATE(42000) "average
not available for %s", ATOMname(tpe));
+ throw(SQL, "sql.avg", SQLSTATE(42000) "sql.avg
not available for %s", ATOMname(tpe));
}
}
return msg;
}
+
+str
+SQLstddev_samp(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+ BAT *r, *b, *s, *e;
+ str msg = SQLanalytics_args(&r, &b, &s, &e, cntxt, mb, stk, pci,
TYPE_dbl, "sql.stddev",
+ SQLSTATE(42000)
"stddev(:any_1,:lng,:lng)");
+ int tpe = getArgType(mb, pci, 1);
+ gdk_return gdk_res;
+
+ if (msg)
+ return msg;
+ if (isaBatType(tpe))
+ tpe = getBatType(tpe);
+
+ if (b) {
+ bat *res = getArgReference_bat(stk, pci, 0);
+
+ gdk_res = GDKanalyticalstddev_samp(r, b, s, e, tpe);
+ BBPunfix(b->batCacheid);
+ if (s) BBPunfix(s->batCacheid);
+ if (e) BBPunfix(e->batCacheid);
+ if (gdk_res == GDK_SUCCEED)
+ BBPkeepref(*res = r->batCacheid);
+ else
+ throw(SQL, "sql.stddev", GDK_EXCEPTION);
+ } else {
+ dbl *res = getArgReference(stk, pci, 0);
+
+ switch (tpe) {
+ case TYPE_bte:
+ case TYPE_sht:
+ case TYPE_int:
+ case TYPE_lng:
+#ifdef HAVE_HGE
+ case TYPE_hge:
+#endif
+ case TYPE_flt:
+ case TYPE_dbl:
+ *res = dbl_nil;
+ break;
+ default: {
+ if (tpe == TYPE_daytime || tpe == TYPE_date ||
tpe == TYPE_timestamp)
+ *res = dbl_nil;
+ else
+ throw(SQL, "sql.stddev",
SQLSTATE(42000) "sql.stddev not available for %s", ATOMname(tpe));
+ }
+ }
+ }
+ return msg;
+}
diff --git a/sql/backends/monet5/sql_rank.h b/sql/backends/monet5/sql_rank.h
--- a/sql/backends/monet5/sql_rank.h
+++ b/sql/backends/monet5/sql_rank.h
@@ -13,6 +13,8 @@
sql5_export str SQLdiff(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
sql5_export str SQLwindow_bound(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
+
+/* rank functions */
sql5_export str SQLrow_number(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
sql5_export str SQLrank(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
sql5_export str SQLdense_rank(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
@@ -24,6 +26,8 @@ sql5_export str SQLlast_value(Client cnt
sql5_export str SQLnth_value(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
sql5_export str SQLlag(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
sql5_export str SQLlead(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
+
+/* aggregates */
sql5_export str SQLmin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
sql5_export str SQLmax(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
sql5_export str SQLcount(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
@@ -31,4 +35,7 @@ sql5_export str SQLsum(Client cntxt, Mal
sql5_export str SQLprod(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
sql5_export str SQLavg(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
+/* statistical functions */
+sql5_export str SQLstddev_samp(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
InstrPtr pci);
+
#endif /* _SQL_RANK_H */
diff --git a/sql/backends/monet5/sql_rank.mal b/sql/backends/monet5/sql_rank.mal
--- a/sql/backends/monet5/sql_rank.mal
+++ b/sql/backends/monet5/sql_rank.mal
@@ -572,3 +572,88 @@ comment "return the average of groups";
pattern batsql.avg(b:bat[:dbl], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
address SQLavg
comment "return the average of groups";
+
+pattern sql.stddev(b:bte, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:bte], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:sht, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:sht], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:int, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:int], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:lng, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:lng], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:flt, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:flt], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:dbl, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:dbl], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:daytime, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:daytime], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:date, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:date], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern sql.stddev(b:timestamp, s:lng, e:lng) :dbl
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+pattern batsql.stddev(b:bat[:timestamp], s:bat[:lng], e:bat[:lng]) :bat[:dbl]
+address SQLstddev_samp
+comment "standard deviation of groups";
+
+command aggr.exist(b:bat[:any_2], h:any_1):bit
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list