Changeset: 9c297e31d281 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9c297e31d281 Modified Files: gdk/gdk_aggr.c gdk/gdk_analytic.c gdk/gdk_calc_private.h sql/common/sql_types.c sql/test/analytics/Tests/analytics00.sql sql/test/analytics/Tests/analytics00.stable.out Branch: analytics Log Message:
Implemented analytical sum in a floating-point window. diffs (truncated from 367 to 300 lines): diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c --- a/gdk/gdk_aggr.c +++ b/gdk/gdk_aggr.c @@ -178,7 +178,7 @@ exchange(double *x, double *y) } /* this function was adapted from https://bugs.python.org/file10357/msum4.py */ -static BUN +BUN dofsum(const void *restrict values, oid seqb, BUN start, BUN end, void *restrict results, BUN ngrp, int tp1, int tp2, const oid *restrict cand, const oid *candend, const oid *restrict gids, diff --git a/gdk/gdk_analytic.c b/gdk/gdk_analytic.c --- a/gdk/gdk_analytic.c +++ b/gdk/gdk_analytic.c @@ -9,6 +9,7 @@ #include "monetdb_config.h" #include "gdk.h" #include "gdk_analytic.h" +#include "gdk_calc_private.h" #define ANALYTICAL_LIMIT_IMP(TPE, OP) \ do { \ @@ -242,20 +243,20 @@ ANALYTICAL_LIMIT(max, MAX, <) #define ANALYTICAL_SUM_IMP(TPE1, TPE2) \ do { \ TPE1 *bp; \ - TPE2 *rp, *rb, curval; \ + TPE2 *rp, *rb, curval = TPE2##_nil; \ bp = (TPE1*)Tloc(b, 0); \ rb = rp = (TPE2*)Tloc(r, 0); \ - curval = TPE2##_nil; \ if (p) { \ if (o) { \ np = (bit*)Tloc(p, 0); \ for(i=0; i<cnt; i++, np++, rp++, bp++) { \ if (*np) { \ + for (;rb < rp; rb++) \ + *rb = curval; \ if(is_##TPE2##_nil(curval)) \ has_nils = true; \ - for (;rb < rp; rb++) \ - *rb = curval; \ - curval = TPE2##_nil; \ + else \ + curval = TPE2##_nil; \ } \ if (!is_##TPE1##_nil(*bp)) { \ if(is_##TPE2##_nil(curval)) \ @@ -275,11 +276,12 @@ ANALYTICAL_LIMIT(max, MAX, <) np = (bit*)Tloc(p, 0); \ for(i=0; i<cnt; i++, np++, rp++, bp++) { \ if (*np) { \ + for (;rb < rp; rb++) \ + *rb = curval; \ if(is_##TPE2##_nil(curval)) \ has_nils = true; \ - for (;rb < rp; rb++) \ - *rb = curval; \ - curval = TPE2##_nil; \ + else \ + curval = TPE2##_nil; \ } \ if (!is_##TPE1##_nil(*bp)) { \ if(is_##TPE2##_nil(curval)) \ @@ -308,24 +310,98 @@ ANALYTICAL_LIMIT(max, MAX, <) goto calc_overflow); \ } \ } \ - if(is_##TPE2##_nil(curval)) \ - has_nils = true; \ for(;rb < rp; rb++) \ *rb = curval; \ + if(is_##TPE2##_nil(curval)) \ + has_nils = true; \ } else { /* single value, ie no ordering */ \ + for(i=0; i<cnt; i++, rp++, bp++) \ + *rp = *bp; \ if(is_##TPE1##_nil(*bp)) \ has_nils = true; \ - for(i=0; i<cnt; i++, rp++, bp++) \ - *rp = *bp; \ } \ goto finish; \ } while(0); +#define ANALYTICAL_SUM_FP_IMP(TPE1, TPE2) \ + do { \ + TPE1 *bp, *bprev; \ + TPE2 *rp, *rb, curval = TPE2##_nil; \ + bp = bprev = (TPE1*)Tloc(b, 0); \ + rb = rp = (TPE2*)Tloc(r, 0); \ + if (p) { \ + if (o) { \ + np = (bit*)Tloc(p, 0); \ + for(i=0,j=0; i<cnt; i++, np++, rp++, bp++) { \ + if (*np) { \ + dofsum(bprev, 0, 0, i - j, rb, 1, TYPE_##TPE1, \ + TYPE_##TPE2, NULL, NULL, NULL, 0, 0, \ + true, false, true, "GDKanalyticalsum"); \ + curval = *rb; \ + bprev = bp; \ + j = i; \ + for (;rb < rp; rb++) \ + *rb = curval; \ + if(is_##TPE2##_nil(curval)) \ + has_nils = true; \ + } \ + } \ + dofsum(bprev, 0, 0, i - j, rb, 1, TYPE_##TPE1, \ + TYPE_##TPE2, NULL, NULL, NULL, 0, 0, \ + true, false, true, "GDKanalyticalsum"); \ + curval = *rb; \ + if(is_##TPE2##_nil(curval)) \ + has_nils = true; \ + for (;rb < rp; rb++) \ + *rb = curval; \ + } else { /* single value, ie no ordering */ \ + np = (bit*)Tloc(p, 0); \ + for(i=0,j=0; i<cnt; i++, np++, rp++, bp++) { \ + if (*np) { \ + dofsum(bprev, 0, 0, i - j, rb, 1, TYPE_##TPE1, \ + TYPE_##TPE2, NULL, NULL, NULL, 0, 0, \ + true, false, true, "GDKanalyticalsum"); \ + curval = *rb; \ + bprev = bp; \ + j = i; \ + for (;rb < rp; rb++) \ + *rb = curval; \ + if(is_##TPE2##_nil(curval)) \ + has_nils = true; \ + } \ + } \ + dofsum(bprev, 0, 0, i - j, rb, 1, TYPE_##TPE1, \ + TYPE_##TPE2, NULL, NULL, NULL, 0, 0, \ + true, false, true, "GDKanalyticalsum"); \ + curval = *rb; \ + if(is_##TPE2##_nil(curval)) \ + has_nils = true; \ + for (;rb < rp; rb++) \ + *rb = curval; \ + } \ + } else if (o) { /* single value, ie no partitions */ \ + dofsum(bp, 0, 0, cnt, rb, 1, TYPE_##TPE1, TYPE_##TPE2, \ + NULL, NULL, NULL, 0, 0, true, false, true, \ + "GDKanalyticalsum"); \ + curval = *rb; \ + for(i=0; i<cnt; i++, rb++) \ + *rb = curval; \ + if(is_##TPE2##_nil(curval)) \ + has_nils = true; \ + } else { /* single value, ie no ordering */ \ + for(i=0; i<cnt; i++, rp++, bp++) \ + *rp = *bp; \ + if(is_##TPE1##_nil(*bp)) \ + has_nils = true; \ + } \ + goto finish; \ + } while(0); + gdk_return GDKanalyticalsum(BAT *r, BAT *b, BAT *p, BAT *o, int tp1, int tp2) { bool has_nils = false; - BUN i, cnt = BATcount(b); + BUN i, j, cnt = BATcount(b); bit *np; switch (tp2) { @@ -411,18 +487,29 @@ GDKanalyticalsum(BAT *r, BAT *b, BAT *p, break; } #endif - case TYPE_flt: - if (tp1 != TYPE_flt) { - goto nosupport; - break; + case TYPE_flt: { + switch (tp1) { + case TYPE_flt: + ANALYTICAL_SUM_FP_IMP(flt, flt); + break; + default: + goto nosupport; + break; } - /* fall through */ - case TYPE_dbl: - if (tp1 != TYPE_flt && tp1 != TYPE_dbl) { - goto nosupport; - break; + } + case TYPE_dbl: { + switch (tp1) { + case TYPE_flt: + ANALYTICAL_SUM_FP_IMP(flt, dbl); + break; + case TYPE_dbl: + ANALYTICAL_SUM_FP_IMP(dbl, dbl); + break; + default: + goto nosupport; + break; } - goto nosupport; + } default: goto nosupport; } @@ -438,4 +525,5 @@ finish: } #undef ANALYTICAL_SUM_IMP +#undef ANALYTICAL_SUM_FP_IMP #undef ANALYTICAL_ADD_WITH_CHECK diff --git a/gdk/gdk_calc_private.h b/gdk/gdk_calc_private.h --- a/gdk/gdk_calc_private.h +++ b/gdk/gdk_calc_private.h @@ -250,3 +250,8 @@ } while (0) #endif /* HAVE___BUILTIN_ADD_OVERFLOW */ #endif /* HAVE_HGE */ + +BUN +dofsum(const void *restrict values, oid seqb, BUN start, BUN end, void *restrict results, BUN ngrp, int tp1, int tp2, + const oid *restrict cand, const oid *candend, const oid *restrict gids, oid min, oid max, bool skip_nils, + bool abort_on_error, bool nil_if_empty, const char *func); diff --git a/sql/common/sql_types.c b/sql/common/sql_types.c --- a/sql/common/sql_types.c +++ b/sql/common/sql_types.c @@ -1573,11 +1573,43 @@ sqltypeinit( sql_allocator *sa) //sql_create_analytic(sa, "lead", "sql", "lead", ANY, BIT, BIT, ANY, SCALE_NONE); //sql_create_analytic(sa, "first_value", "sql", "first_value", ANY, BIT, BIT, ANY, SCALE_NONE); //sql_create_analytic(sa, "last_value", "sql", "last_value", ANY, BIT, BIT, ANY, SCALE_NONE); - sql_create_analytic(sa, "sum", "sql", "sum", ANY, BIT, BIT, ANY, SCALE_NONE); + //sql_create_analytic(sa, "avg", "sql", "avg", ANY, BIT, BIT, ANY, SCALE_NONE); + //sql_create_analytic(sa, "count", "sql", "count", ANY, BIT, BIT, ANY, SCALE_NONE); sql_create_analytic(sa, "min", "sql", "min", ANY, BIT, BIT, ANY, SCALE_NONE); sql_create_analytic(sa, "max", "sql", "max", ANY, BIT, BIT, ANY, SCALE_NONE); - //sql_create_analytic(sa, "avg", "sql", "avg", ANY, BIT, BIT, ANY, SCALE_NONE); - //sql_create_analytic(sa, "count", "sql", "count", ANY, BIT, BIT, ANY, SCALE_NONE); + sql_create_analytic(sa, "sum", "sql", "sum", ANY, BIT, BIT, ANY, SCALE_NONE); + + /* //analytical sum for numerical and decimals + sql_create_analytic(sa, "sum", "sql", "sum", BTE, BIT, BIT, LargestINT, SCALE_NONE); + sql_create_analytic(sa, "sum", "sql", "sum", SHT, BIT, BIT, LargestINT, SCALE_NONE); + sql_create_analytic(sa, "sum", "sql", "sum", INT, BIT, BIT, LargestINT, SCALE_NONE); + sql_create_analytic(sa, "sum", "sql", "sum", LNG, BIT, BIT, LargestINT, SCALE_NONE); +#ifdef HAVE_HGE + if (have_hge) + sql_create_analytic(sa, "sum", "sql", "sum", HGE, BIT, BIT, LargestINT, SCALE_NONE); +#endif + sql_create_analytic(sa, "sum", "sql", "sum", LNG, BIT, BIT, LNG, SCALE_NONE); + + t = decimals; // BTE + sql_create_analytic(sa, "sum", "sql", "sum", *(t), BIT, BIT, LargestDEC, SCALE_NONE); + t++; // SHT + sql_create_analytic(sa, "sum", "sql", "sum", *(t), BIT, BIT, LargestDEC, SCALE_NONE); + t++; // INT + sql_create_analytic(sa, "sum", "sql", "sum", *(t), BIT, BIT, LargestDEC, SCALE_NONE); + t++; // LNG + sql_create_analytic(sa, "sum", "sql", "sum", *(t), BIT, BIT, LargestDEC, SCALE_NONE); +#ifdef HAVE_HGE + if (have_hge) { + t++; // HGE + sql_create_analytic(sa, "sum", "sql", "sum", *(t), BIT, BIT, LargestDEC, SCALE_NONE); + } +#endif + for (t = floats; t < dates; t++) { + sql_create_analytic(sa, "sum", "sql", "sum", *t, BIT, BIT, *t, SCALE_NONE); + //sql_create_analytic(sa, "prod", "sql", "prod", *t, BIT, BIT, *t, SCALE_NONE); maybe adding a prod analytic function + }*/ + sql_create_analytic(sa, "sum", "sql", "sum", MONINT, BIT, BIT, MONINT, SCALE_NONE); + sql_create_analytic(sa, "sum", "sql", "sum", SECINT, BIT, BIT, SECINT, SCALE_NONE); sql_create_func(sa, "and", "calc", "and", BIT, BIT, BIT, SCALE_FIX); sql_create_func(sa, "or", "calc", "or", BIT, BIT, BIT, SCALE_FIX); diff --git a/sql/test/analytics/Tests/analytics00.sql b/sql/test/analytics/Tests/analytics00.sql --- a/sql/test/analytics/Tests/analytics00.sql +++ b/sql/test/analytics/Tests/analytics00.sql @@ -44,4 +44,13 @@ select max(aa) over (partition by bb) fr select max(aa) over (partition by bb order by bb asc) from stressme; select max(aa) over (partition by bb order by bb desc) from stressme; select max(aa) over (order by bb desc) from stressme; + +create table debugme (aa real, bb int); +insert into debugme values (15, 3), (3, 1), (2, 1), (5, 3), (NULL, 2), (3, 2), (4, 1), (6, 3), (8, 2), (NULL, 4); + +select sum(aa) over (partition by bb) from debugme; +select sum(aa) over (partition by bb order by bb asc) from debugme; +select sum(aa) over (partition by bb order by bb desc) from debugme; +select sum(aa) over (order by bb desc) from debugme; + rollback; diff --git a/sql/test/analytics/Tests/analytics00.stable.out b/sql/test/analytics/Tests/analytics00.stable.out --- a/sql/test/analytics/Tests/analytics00.stable.out +++ b/sql/test/analytics/Tests/analytics00.stable.out @@ -543,6 +543,69 @@ Ready. [ "stress" ] [ "stress" ] _______________________________________________ checkin-list mailing list [email protected] https://www.monetdb.org/mailman/listinfo/checkin-list
