Changeset: 05928b61bf26 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=05928b61bf26
Modified Files:
        gdk/gdk_analytic.h
        gdk/gdk_analytic_func.c
        sql/backends/monet5/sql.c
        sql/backends/monet5/sql_rank.c
        sql/backends/monet5/sql_rank.h
Branch: window-tunning
Log Message:

Updated stdev and variance window functions


diffs (truncated from 843 to 300 lines):

diff --git a/gdk/gdk_analytic.h b/gdk/gdk_analytic.h
--- a/gdk/gdk_analytic.h
+++ b/gdk/gdk_analytic.h
@@ -37,10 +37,10 @@ gdk_export gdk_return GDKanalyticalprod(
 gdk_export gdk_return GDKanalyticalavg(BAT *r, BAT *p, BAT *o, BAT *b, BAT *s, 
BAT *e, int tpe, int frame_type);
 gdk_export gdk_return GDKanalyticalavginteger(BAT *r, BAT *p, BAT *o, BAT *b, 
BAT *s, BAT *e, int tpe, int frame_type);
 
-gdk_export gdk_return GDKanalytical_stddev_samp(BAT *r, BAT *b, BAT *s, BAT 
*e, int tpe);
-gdk_export gdk_return GDKanalytical_stddev_pop(BAT *r, BAT *b, BAT *s, BAT *e, 
int tpe);
-gdk_export gdk_return GDKanalytical_variance_samp(BAT *r, BAT *b, BAT *s, BAT 
*e, int tpe);
-gdk_export gdk_return GDKanalytical_variance_pop(BAT *r, BAT *b, BAT *s, BAT 
*e, int tpe);
+gdk_export gdk_return GDKanalytical_stddev_samp(BAT *r, BAT *p, BAT *o, BAT 
*b, BAT *s, BAT *e, int tpe, int frame_type);
+gdk_export gdk_return GDKanalytical_stddev_pop(BAT *r, BAT *p, BAT *o, BAT *b, 
BAT *s, BAT *e, int tpe, int frame_type);
+gdk_export gdk_return GDKanalytical_variance_samp(BAT *r, BAT *p, BAT *o, BAT 
*b, BAT *s, BAT *e, int tpe, int frame_type);
+gdk_export gdk_return GDKanalytical_variance_pop(BAT *r, BAT *p, BAT *o, BAT 
*b, BAT *s, BAT *e, int tpe, int frame_type);
 gdk_export gdk_return GDKanalytical_covariance_pop(BAT *r, BAT *b1, BAT *b2, 
BAT *s, BAT *e, int tpe);
 gdk_export gdk_return GDKanalytical_covariance_samp(BAT *r, BAT *b1, BAT *b2, 
BAT *s, BAT *e, int tpe);
 gdk_export gdk_return GDKanalytical_correlation(BAT *r, BAT *b1, BAT *b2, BAT 
*s, BAT *e, int tpe);
diff --git a/gdk/gdk_analytic_func.c b/gdk/gdk_analytic_func.c
--- a/gdk/gdk_analytic_func.c
+++ b/gdk/gdk_analytic_func.c
@@ -2500,11 +2500,13 @@ GDKanalyticalavg(BAT *r, BAT *p, BAT *o,
                                AVERAGE_ITER(TPE, v, avg, rem, ncnt); \
                } \
                if (ncnt == 0) {        \
+                       for (; k < i; k++)      \
+                               rb[k] = TPE##_nil; \
                        has_nils = true; \
-                       rb[k] = TPE##_nil; \
                } else { \
                        ANALYTICAL_AVERAGE_INT_CALC_FINALIZE(avg, rem, ncnt); \
-                       rb[k] = avg; \
+                       for (; k < i; k++)      \
+                               rb[k] = avg; \
                } \
                rem = 0; \
                ncnt = 0; \
@@ -2619,14 +2621,109 @@ GDKanalyticalavginteger(BAT *r, BAT *p, 
        return GDK_FAIL;
 }
 
-#define ANALYTICAL_STDEV_VARIANCE_CALC(TPE, SAMPLE, OP)        \
+#define ANALYTICAL_STDEV_VARIANCE_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP)  
\
+       do { \
+               for (; k < i;) { \
+                       j = k; \
+                       do {    \
+                               TPE v = bp[k]; \
+                               if (!is_##TPE##_nil(v))  {      \
+                                       n++;                            \
+                                       delta = (dbl) v - mean;         \
+                                       mean += delta / n;              \
+                                       m2 += delta * ((dbl) v - mean); \
+                               }       \
+                               k++; \
+                       } while (k < i && !op[k]);      \
+                       if (isinf(m2)) {        \
+                               goto overflow;          \
+                       } else if (n > SAMPLE) { \
+                               for (; j < k; j++) \
+                                       rb[j] = OP; \
+                       } else { \
+                               for (; j < k; j++) \
+                                       rb[j] = dbl_nil; \
+                               has_nils = true; \
+                       } \
+               } \
+               n = 0;  \
+               mean = 0;       \
+               m2 = 0; \
+       } while (0)
+
+#define ANALYTICAL_STDEV_VARIANCE_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP)  
\
+       do { \
+               l = i - 1; \
+               for (j = l; ; j--) { \
+                       TPE v = bp[j]; \
+                       if (!is_##TPE##_nil(bp[j]))     {       \
+                               n++;                            \
+                               delta = (dbl) v - mean;         \
+                               mean += delta / n;              \
+                               m2 += delta * ((dbl) v - mean); \
+                       }       \
+                       if (op[j] || j == k) {  \
+                               if (isinf(m2)) {        \
+                                       goto overflow;          \
+                               } else if (n > SAMPLE) { \
+                                       for (; l >= j; l--) \
+                                               rb[l] = OP; \
+                               } else { \
+                                       for (; l >= j; l--) \
+                                               rb[l] = dbl_nil; \
+                                       has_nils = true; \
+                               } \
+                               if (j == k)     \
+                                       break;  \
+                               l = j - 1;      \
+                       }       \
+               }       \
+               n = 0;  \
+               mean = 0;       \
+               m2 = 0; \
+               k = i; \
+       } while (0)
+
+#define ANALYTICAL_STDEV_VARIANCE_ALL_ROWS(TPE, SAMPLE, OP)    \
+       do { \
+               for (; j < i; j++) { \
+                       TPE v = bp[j]; \
+                       if (is_##TPE##_nil(v))          \
+                               continue;               \
+                       n++;                            \
+                       delta = (dbl) v - mean;         \
+                       mean += delta / n;              \
+                       m2 += delta * ((dbl) v - mean); \
+               } \
+               if (isinf(m2)) {        \
+                       goto overflow;          \
+               } else if (n > SAMPLE) { \
+                       for (; k < i; k++) \
+                               rb[k] = OP; \
+               } else { \
+                       for (; k < i; k++) \
+                               rb[k] = dbl_nil; \
+                       has_nils = true; \
+               } \
+               n = 0;  \
+               mean = 0;       \
+               m2 = 0; \
+       } while (0)
+
+#define ANALYTICAL_STDEV_VARIANCE_CURRENT_ROW(TPE, SAMPLE, OP) \
        do {                                                            \
-               TPE *bp = (TPE*)Tloc(b, 0), *bs, *be, v;                \
-               for (; i < cnt; i++, rb++) {                            \
-                       bs = bp + start[i];                             \
-                       be = bp + end[i];                               \
+               (void) bp;      \
+               for (; k < i; k++) \
+                       rb[k] = SAMPLE == 1 ? dbl_nil : 0;      \
+               has_nils = is_dbl_nil(rb[k - 1]); \
+       } while (0)
+
+#define ANALYTICAL_STDEV_VARIANCE_OTHERS(TPE, SAMPLE, OP)      \
+       do {                                                            \
+               for (; k < i; k++) {                    \
+                       TPE *bs = bp + start[k], *be = bp + end[k];             
\
                        for (; bs < be; bs++) {                         \
-                               v = *bs;                                \
+                               TPE v = *bs;                            \
                                if (is_##TPE##_nil(v))          \
                                        continue;               \
                                n++;                            \
@@ -2637,10 +2734,10 @@ GDKanalyticalavginteger(BAT *r, BAT *p, 
                        if (isinf(m2)) {        \
                                goto overflow;          \
                        } else if (n > SAMPLE) { \
-                               *rb = OP; \
+                               rb[k] = OP; \
                        } else { \
-                               *rb = dbl_nil; \
-                               nils++; \
+                               rb[k] = dbl_nil; \
+                               has_nils = true; \
                        } \
                        n = 0;  \
                        mean = 0;       \
@@ -2648,55 +2745,85 @@ GDKanalyticalavginteger(BAT *r, BAT *p, 
                }       \
        } while (0)
 
+#define ANALYTICAL_STDEV_VARIANCE_PARTITIONS(TPE, SAMPLE, OP, IMP)             
\
+       do {                                            \
+               TPE *bp = (TPE*)Tloc(b, 0); \
+               if (p) {                                        \
+                       for (; i < cnt; i++) {          \
+                               if (np[i])                      \
+                                       IMP(TPE, SAMPLE, OP);   \
+                       }                                               \
+               }       \
+               i = cnt;                        \
+               IMP(TPE, SAMPLE, OP);   \
+       } while (0)
+
 #ifdef HAVE_HGE
-#define ANALYTICAL_STDEV_VARIANCE_LIMIT(SAMPLE, OP) \
+#define ANALYTICAL_STDEV_VARIANCE_LIMIT(IMP, SAMPLE, OP) \
        case TYPE_hge: \
-               ANALYTICAL_STDEV_VARIANCE_CALC(hge, SAMPLE, OP); \
+               ANALYTICAL_STDEV_VARIANCE_PARTITIONS(hge, SAMPLE, OP, 
ANALYTICAL_STDEV_VARIANCE_##IMP); \
        break;
 #else
-#define ANALYTICAL_STDEV_VARIANCE_LIMIT(SAMPLE, OP)
+#define ANALYTICAL_STDEV_VARIANCE_LIMIT(IMP, SAMPLE, OP)
 #endif
 
+#define ANALYTICAL_STDEV_VARIANCE_BRANCHES(IMP, SAMPLE, OP)            \
+       do { \
+               switch (tpe) {  \
+               case TYPE_bte:  \
+                       ANALYTICAL_STDEV_VARIANCE_PARTITIONS(bte, SAMPLE, OP, 
ANALYTICAL_STDEV_VARIANCE_##IMP); \
+                       break;  \
+               case TYPE_sht:  \
+                       ANALYTICAL_STDEV_VARIANCE_PARTITIONS(sht, SAMPLE, OP, 
ANALYTICAL_STDEV_VARIANCE_##IMP); \
+                       break;  \
+               case TYPE_int:  \
+                       ANALYTICAL_STDEV_VARIANCE_PARTITIONS(int, SAMPLE, OP, 
ANALYTICAL_STDEV_VARIANCE_##IMP); \
+                       break;  \
+               case TYPE_lng:  \
+                       ANALYTICAL_STDEV_VARIANCE_PARTITIONS(lng, SAMPLE, OP, 
ANALYTICAL_STDEV_VARIANCE_##IMP); \
+                       break;  \
+               ANALYTICAL_STDEV_VARIANCE_LIMIT(IMP, SAMPLE, OP)        \
+               default:        \
+                       goto nosupport; \
+               }       \
+       } while (0)
+
+
 #define GDK_ANALYTICAL_STDEV_VARIANCE(NAME, SAMPLE, OP, DESC) \
 gdk_return \
-GDKanalytical_##NAME(BAT *r, BAT *b, BAT *s, BAT *e, int tpe) \
+GDKanalytical_##NAME(BAT *r, BAT *p, BAT *o, BAT *b, BAT *s, BAT *e, int tpe, 
int frame_type) \
 { \
-       BUN i = 0, cnt = BATcount(b), n = 0, nils = 0; \
-       lng *restrict start, *restrict end; \
+       bool has_nils = false;  \
+       lng i = 0, j = 0, k = 0, l = 0, cnt = (lng) BATcount(b), n = 0; \
+       lng *restrict start = s ? (lng*)Tloc(s, 0) : NULL, *restrict end = e ? 
(lng*)Tloc(e, 0) : NULL; \
+       bit *np = p ? Tloc(p, 0) : NULL, *op = o ? Tloc(o, 0) : NULL;   \
        dbl *restrict rb = (dbl *) Tloc(r, 0), mean = 0, m2 = 0, delta; \
- \
-       assert(s && e); \
-       start = (lng *) Tloc(s, 0); \
-       end = (lng *) Tloc(e, 0); \
- \
-       switch (tpe) { \
-       case TYPE_bte: \
-               ANALYTICAL_STDEV_VARIANCE_CALC(bte, SAMPLE, OP); \
-               break; \
-       case TYPE_sht: \
-               ANALYTICAL_STDEV_VARIANCE_CALC(sht, SAMPLE, OP); \
-               break; \
-       case TYPE_int: \
-               ANALYTICAL_STDEV_VARIANCE_CALC(int, SAMPLE, OP); \
-               break; \
-       case TYPE_lng: \
-               ANALYTICAL_STDEV_VARIANCE_CALC(lng, SAMPLE, OP); \
-               break; \
-       ANALYTICAL_STDEV_VARIANCE_LIMIT(SAMPLE, OP) \
-       case TYPE_flt:\
-               ANALYTICAL_STDEV_VARIANCE_CALC(flt, SAMPLE, OP); \
-               break; \
-       case TYPE_dbl: \
-               ANALYTICAL_STDEV_VARIANCE_CALC(dbl, SAMPLE, OP); \
-               break; \
-       default: \
-               GDKerror("42000!%s of type %s unsupported.\n", DESC, 
ATOMname(tpe)); \
-               return GDK_FAIL; \
-       } \
+       \
+       switch (frame_type) {   \
+       case 3: /* unbounded until current row */       {       \
+               ANALYTICAL_STDEV_VARIANCE_BRANCHES(UNBOUNDED_TILL_CURRENT_ROW, 
SAMPLE, OP);     \
+       } break;        \
+       case 4: /* current row until unbounded */       {       \
+               ANALYTICAL_STDEV_VARIANCE_BRANCHES(CURRENT_ROW_TILL_UNBOUNDED, 
SAMPLE, OP);     \
+       } break;        \
+       case 5: /* all rows */  {       \
+               ANALYTICAL_STDEV_VARIANCE_BRANCHES(ALL_ROWS, SAMPLE, OP);       
\
+       } break;        \
+       case 6: /* current row */ {     \
+               ANALYTICAL_STDEV_VARIANCE_BRANCHES(CURRENT_ROW, SAMPLE, OP);    
\
+       } break;        \
+       default: {      \
+               ANALYTICAL_STDEV_VARIANCE_BRANCHES(OTHERS, SAMPLE, OP); \
+       }       \
+       }       \
+       \
        BATsetcount(r, cnt); \
-       r->tnonil = nils == 0; \
-       r->tnil = nils > 0; \
+       r->tnonil = !has_nils;  \
+       r->tnil = has_nils;     \
        return GDK_SUCCEED; \
+  nosupport:   \
+       GDKerror("42000!%s of type %s unsupported.\n", DESC, ATOMname(tpe)); \
+       return GDK_FAIL; \
   overflow: \
        GDKerror("22003!overflow in calculation.\n"); \
        return GDK_FAIL; \
diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -6385,14 +6385,295 @@ static mel_func sql_init_funcs[] = {
  pattern("batsql", "avg", SQLavginteger_global, false, "return the average of 
groups", args(1,4, 
batarg("",lng),batarg("b",lng),arg("t",int),batarg("o",bit))),
  pattern("sql", "avg", SQLavginteger_global, false, "return the average of 
groups", args(1,5, 
arg("",lng),arg("b",lng),arg("t",int),arg("o",bit),arg("p",bit))),
  pattern("batsql", "avg", SQLavginteger_global, false, "return the average of 
groups", args(1,5, 
batarg("",lng),batarg("b",lng),arg("t",int),batarg("o",bit),batarg("p",bit))),
- pattern("sql", "stdev", SQLstddev_samp, false, "return the standard deviation 
sample of groups", args(1,4, 
arg("",dbl),arg("b",bte),arg("s",lng),arg("e",lng))),
- pattern("batsql", "stdev", SQLstddev_samp, false, "return the standard 
deviation sample of groups", args(1,4, 
batarg("",dbl),batarg("b",bte),batarg("s",lng),batarg("e",lng))),
- pattern("sql", "stdevp", SQLstddev_pop, false, "return the standard deviation 
population of groups", args(1,4, 
arg("",dbl),arg("b",bte),arg("s",lng),arg("e",lng))),
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to