Changeset: 9c297e31d281 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9c297e31d281
Modified Files:
        gdk/gdk_aggr.c
        gdk/gdk_analytic.c
        gdk/gdk_calc_private.h
        sql/common/sql_types.c
        sql/test/analytics/Tests/analytics00.sql
        sql/test/analytics/Tests/analytics00.stable.out
Branch: analytics
Log Message:

Implemented analytical sum in a floating-point window.


diffs (truncated from 367 to 300 lines):

diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -178,7 +178,7 @@ exchange(double *x, double *y)
 }
 
 /* this function was adapted from https://bugs.python.org/file10357/msum4.py */
-static BUN
+BUN
 dofsum(const void *restrict values, oid seqb, BUN start, BUN end,
        void *restrict results, BUN ngrp, int tp1, int tp2,
        const oid *restrict cand, const oid *candend, const oid *restrict gids,
diff --git a/gdk/gdk_analytic.c b/gdk/gdk_analytic.c
--- a/gdk/gdk_analytic.c
+++ b/gdk/gdk_analytic.c
@@ -9,6 +9,7 @@
 #include "monetdb_config.h"
 #include "gdk.h"
 #include "gdk_analytic.h"
+#include "gdk_calc_private.h"
 
 #define ANALYTICAL_LIMIT_IMP(TPE, OP)                        \
        do {                                                     \
@@ -242,20 +243,20 @@ ANALYTICAL_LIMIT(max, MAX, <)
 #define ANALYTICAL_SUM_IMP(TPE1, TPE2)                              \
        do {                                                            \
                TPE1 *bp;                                                   \
-               TPE2 *rp, *rb, curval;                                      \
+               TPE2 *rp, *rb, curval = TPE2##_nil;                         \
                bp = (TPE1*)Tloc(b, 0);                                     \
                rb = rp = (TPE2*)Tloc(r, 0);                                \
-               curval = TPE2##_nil;                                        \
                if (p) {                                                    \
                        if (o) {                                                
\
                                np = (bit*)Tloc(p, 0);                          
    \
                                for(i=0; i<cnt; i++, np++, rp++, bp++) {        
    \
                                        if (*np) {                              
        \
+                                               for (;rb < rp; rb++)            
            \
+                                                       *rb = curval;           
                \
                                                if(is_##TPE2##_nil(curval))     
            \
                                                        has_nils = true;        
                \
-                                               for (;rb < rp; rb++)            
            \
-                                                       *rb = curval;           
                \
-                                               curval = TPE2##_nil;            
            \
+                                               else                            
            \
+                                                       curval = TPE2##_nil;    
                \
                                        }                                       
        \
                                        if (!is_##TPE1##_nil(*bp)) {            
        \
                                                if(is_##TPE2##_nil(curval))     
            \
@@ -275,11 +276,12 @@ ANALYTICAL_LIMIT(max, MAX, <)
                                np = (bit*)Tloc(p, 0);                          
    \
                                for(i=0; i<cnt; i++, np++, rp++, bp++) {        
    \
                                        if (*np) {                              
        \
+                                               for (;rb < rp; rb++)            
            \
+                                                       *rb = curval;           
                \
                                                if(is_##TPE2##_nil(curval))     
            \
                                                        has_nils = true;        
                \
-                                               for (;rb < rp; rb++)            
            \
-                                                       *rb = curval;           
                \
-                                               curval = TPE2##_nil;            
            \
+                                               else                            
            \
+                                                       curval = TPE2##_nil;    
                \
                                        }                                       
        \
                                        if (!is_##TPE1##_nil(*bp)) {            
        \
                                                if(is_##TPE2##_nil(curval))     
            \
@@ -308,24 +310,98 @@ ANALYTICAL_LIMIT(max, MAX, <)
                                                                           goto 
calc_overflow);         \
                                }                                               
    \
                        }                                                       
\
-                       if(is_##TPE2##_nil(curval))                             
\
-                               has_nils = true;                                
    \
                        for(;rb < rp; rb++)                                     
\
                                *rb = curval;                                   
    \
+                       if(is_##TPE2##_nil(curval))                             
\
+                               has_nils = true;                                
    \
                } else { /* single value, ie no ordering */                 \
+                       for(i=0; i<cnt; i++, rp++, bp++)                        
\
+                               *rp = *bp;                                      
    \
                        if(is_##TPE1##_nil(*bp))                                
\
                                has_nils = true;                                
    \
-                       for(i=0; i<cnt; i++, rp++, bp++)                        
\
-                               *rp = *bp;                                      
    \
                }                                                           \
                goto finish;                                                \
        } while(0);
 
+#define ANALYTICAL_SUM_FP_IMP(TPE1, TPE2)                              \
+       do {                                                               \
+               TPE1 *bp, *bprev;                                              \
+               TPE2 *rp, *rb, curval = TPE2##_nil;                            \
+               bp = bprev = (TPE1*)Tloc(b, 0);                                \
+               rb = rp = (TPE2*)Tloc(r, 0);                                   \
+               if (p) {                                                       \
+                       if (o) {                                                
   \
+                               np = (bit*)Tloc(p, 0);                          
       \
+                               for(i=0,j=0; i<cnt; i++, np++, rp++, bp++) {    
       \
+                                       if (*np) {                              
           \
+                                               dofsum(bprev, 0, 0, i - j, rb, 
1, TYPE_##TPE1, \
+                                                          TYPE_##TPE2, NULL, 
NULL, NULL, 0, 0,    \
+                                                          true, false, true, 
"GDKanalyticalsum"); \
+                                               curval = *rb;                   
               \
+                                               bprev = bp;                     
               \
+                                               j = i;                          
               \
+                                               for (;rb < rp; rb++)            
               \
+                                                       *rb = curval;           
                   \
+                                               if(is_##TPE2##_nil(curval))     
               \
+                                                       has_nils = true;        
                   \
+                                       }                                       
           \
+                               }                                               
       \
+                               dofsum(bprev, 0, 0, i - j, rb, 1, TYPE_##TPE1,  
       \
+                                          TYPE_##TPE2, NULL, NULL, NULL, 0, 0, 
           \
+                                          true, false, true, 
"GDKanalyticalsum");         \
+                               curval = *rb;                                   
       \
+                               if(is_##TPE2##_nil(curval))                     
       \
+                                       has_nils = true;                        
           \
+                               for (;rb < rp; rb++)                            
       \
+                                       *rb = curval;                           
           \
+                       } else { /* single value, ie no ordering */             
   \
+                               np = (bit*)Tloc(p, 0);                          
       \
+                               for(i=0,j=0; i<cnt; i++, np++, rp++, bp++) {    
       \
+                                       if (*np) {                              
           \
+                                               dofsum(bprev, 0, 0, i - j, rb, 
1, TYPE_##TPE1, \
+                                                          TYPE_##TPE2, NULL, 
NULL, NULL, 0, 0,    \
+                                                          true, false, true, 
"GDKanalyticalsum"); \
+                                               curval = *rb;                   
               \
+                                               bprev = bp;                     
               \
+                                               j = i;                          
               \
+                                               for (;rb < rp; rb++)            
               \
+                                                       *rb = curval;           
                   \
+                                               if(is_##TPE2##_nil(curval))     
               \
+                                                       has_nils = true;        
                   \
+                                       }                                       
           \
+                               }                                               
       \
+                               dofsum(bprev, 0, 0, i - j, rb, 1, TYPE_##TPE1,  
       \
+                                          TYPE_##TPE2, NULL, NULL, NULL, 0, 0, 
           \
+                                          true, false, true, 
"GDKanalyticalsum");         \
+                               curval = *rb;                                   
       \
+                               if(is_##TPE2##_nil(curval))                     
       \
+                                       has_nils = true;                        
           \
+                               for (;rb < rp; rb++)                            
       \
+                                       *rb = curval;                           
           \
+                       }                                                       
   \
+               } else if (o) { /* single value, ie no partitions */           \
+                       dofsum(bp, 0, 0, cnt, rb, 1, TYPE_##TPE1, TYPE_##TPE2,  
   \
+                                  NULL, NULL, NULL, 0, 0, true, false, true,   
       \
+                                  "GDKanalyticalsum");                         
       \
+                       curval = *rb;                                           
   \
+                       for(i=0; i<cnt; i++, rb++)                              
   \
+                               *rb = curval;                                   
       \
+                       if(is_##TPE2##_nil(curval))                             
   \
+                               has_nils = true;                                
       \
+               } else { /* single value, ie no ordering */                    \
+                       for(i=0; i<cnt; i++, rp++, bp++)                        
   \
+                               *rp = *bp;                                      
       \
+                       if(is_##TPE1##_nil(*bp))                                
   \
+                               has_nils = true;                                
       \
+               }                                                              \
+               goto finish;                                                   \
+       } while(0);
+
 gdk_return
 GDKanalyticalsum(BAT *r, BAT *b, BAT *p, BAT *o, int tp1, int tp2)
 {
        bool has_nils = false;
-       BUN i, cnt = BATcount(b);
+       BUN i, j, cnt = BATcount(b);
        bit *np;
 
        switch (tp2) {
@@ -411,18 +487,29 @@ GDKanalyticalsum(BAT *r, BAT *b, BAT *p,
                        break;
                }
 #endif
-               case TYPE_flt:
-                       if (tp1 != TYPE_flt) {
-                               goto nosupport;
-                               break;
+               case TYPE_flt: {
+                       switch (tp1) {
+                               case TYPE_flt:
+                                       ANALYTICAL_SUM_FP_IMP(flt, flt);
+                                       break;
+                               default:
+                                       goto nosupport;
+                                       break;
                        }
-                       /* fall through */
-               case TYPE_dbl:
-                       if (tp1 != TYPE_flt && tp1 != TYPE_dbl) {
-                               goto nosupport;
-                               break;
+               }
+               case TYPE_dbl: {
+                       switch (tp1) {
+                               case TYPE_flt:
+                                       ANALYTICAL_SUM_FP_IMP(flt, dbl);
+                                       break;
+                               case TYPE_dbl:
+                                       ANALYTICAL_SUM_FP_IMP(dbl, dbl);
+                                       break;
+                               default:
+                                       goto nosupport;
+                                       break;
                        }
-                       goto nosupport;
+               }
                default:
                        goto nosupport;
        }
@@ -438,4 +525,5 @@ finish:
 }
 
 #undef ANALYTICAL_SUM_IMP
+#undef ANALYTICAL_SUM_FP_IMP
 #undef ANALYTICAL_ADD_WITH_CHECK
diff --git a/gdk/gdk_calc_private.h b/gdk/gdk_calc_private.h
--- a/gdk/gdk_calc_private.h
+++ b/gdk/gdk_calc_private.h
@@ -250,3 +250,8 @@
        } while (0)
 #endif /* HAVE___BUILTIN_ADD_OVERFLOW */
 #endif /* HAVE_HGE */
+
+BUN
+dofsum(const void *restrict values, oid seqb, BUN start, BUN end, void 
*restrict results, BUN ngrp, int tp1, int tp2,
+          const oid *restrict cand, const oid *candend, const oid *restrict 
gids, oid min, oid max, bool skip_nils,
+          bool abort_on_error, bool nil_if_empty, const char *func);
diff --git a/sql/common/sql_types.c b/sql/common/sql_types.c
--- a/sql/common/sql_types.c
+++ b/sql/common/sql_types.c
@@ -1573,11 +1573,43 @@ sqltypeinit( sql_allocator *sa)
        //sql_create_analytic(sa, "lead", "sql", "lead", ANY, BIT, BIT, ANY, 
SCALE_NONE);
        //sql_create_analytic(sa, "first_value", "sql", "first_value", ANY, 
BIT, BIT, ANY, SCALE_NONE);
        //sql_create_analytic(sa, "last_value", "sql", "last_value", ANY, BIT, 
BIT, ANY, SCALE_NONE);
-       sql_create_analytic(sa, "sum", "sql", "sum", ANY, BIT, BIT, ANY, 
SCALE_NONE);
+       //sql_create_analytic(sa, "avg", "sql", "avg", ANY, BIT, BIT, ANY, 
SCALE_NONE);
+       //sql_create_analytic(sa, "count", "sql", "count", ANY, BIT, BIT, ANY, 
SCALE_NONE);
        sql_create_analytic(sa, "min", "sql", "min", ANY, BIT, BIT, ANY, 
SCALE_NONE);
        sql_create_analytic(sa, "max", "sql", "max", ANY, BIT, BIT, ANY, 
SCALE_NONE);
-       //sql_create_analytic(sa, "avg", "sql", "avg", ANY, BIT, BIT, ANY, 
SCALE_NONE);
-       //sql_create_analytic(sa, "count", "sql", "count", ANY, BIT, BIT, ANY, 
SCALE_NONE);
+       sql_create_analytic(sa, "sum", "sql", "sum", ANY, BIT, BIT, ANY, 
SCALE_NONE);
+
+       /* //analytical sum for numerical and decimals
+       sql_create_analytic(sa, "sum", "sql", "sum", BTE, BIT, BIT, LargestINT, 
SCALE_NONE);
+       sql_create_analytic(sa, "sum", "sql", "sum", SHT, BIT, BIT, LargestINT, 
SCALE_NONE);
+       sql_create_analytic(sa, "sum", "sql", "sum", INT, BIT, BIT, LargestINT, 
SCALE_NONE);
+       sql_create_analytic(sa, "sum", "sql", "sum", LNG, BIT, BIT, LargestINT, 
SCALE_NONE);
+#ifdef HAVE_HGE
+       if (have_hge)
+               sql_create_analytic(sa, "sum", "sql", "sum", HGE, BIT, BIT, 
LargestINT, SCALE_NONE);
+#endif
+       sql_create_analytic(sa, "sum", "sql", "sum", LNG, BIT, BIT, LNG, 
SCALE_NONE);
+
+       t = decimals; // BTE
+       sql_create_analytic(sa, "sum", "sql", "sum", *(t), BIT, BIT, 
LargestDEC, SCALE_NONE);
+       t++; // SHT
+       sql_create_analytic(sa, "sum", "sql", "sum", *(t), BIT, BIT, 
LargestDEC, SCALE_NONE);
+       t++; // INT
+       sql_create_analytic(sa, "sum", "sql", "sum", *(t), BIT, BIT, 
LargestDEC, SCALE_NONE);
+       t++; // LNG
+       sql_create_analytic(sa, "sum", "sql", "sum", *(t), BIT, BIT, 
LargestDEC, SCALE_NONE);
+#ifdef HAVE_HGE
+       if (have_hge) {
+               t++; // HGE
+               sql_create_analytic(sa, "sum", "sql", "sum", *(t), BIT, BIT, 
LargestDEC, SCALE_NONE);
+       }
+#endif
+       for (t = floats; t < dates; t++) {
+               sql_create_analytic(sa, "sum", "sql", "sum", *t, BIT, BIT, *t, 
SCALE_NONE);
+               //sql_create_analytic(sa, "prod", "sql", "prod", *t, BIT, BIT, 
*t, SCALE_NONE); maybe adding a prod analytic function
+       }*/
+       sql_create_analytic(sa, "sum", "sql", "sum", MONINT, BIT, BIT, MONINT, 
SCALE_NONE);
+       sql_create_analytic(sa, "sum", "sql", "sum", SECINT, BIT, BIT, SECINT, 
SCALE_NONE);
 
        sql_create_func(sa, "and", "calc", "and", BIT, BIT, BIT, SCALE_FIX);
        sql_create_func(sa, "or",  "calc",  "or", BIT, BIT, BIT, SCALE_FIX);
diff --git a/sql/test/analytics/Tests/analytics00.sql 
b/sql/test/analytics/Tests/analytics00.sql
--- a/sql/test/analytics/Tests/analytics00.sql
+++ b/sql/test/analytics/Tests/analytics00.sql
@@ -44,4 +44,13 @@ select max(aa) over (partition by bb) fr
 select max(aa) over (partition by bb order by bb asc) from stressme;
 select max(aa) over (partition by bb order by bb desc) from stressme;
 select max(aa) over (order by bb desc) from stressme;
+
+create table debugme (aa real, bb int);
+insert into debugme values (15, 3), (3, 1), (2, 1), (5, 3), (NULL, 2), (3, 2), 
(4, 1), (6, 3), (8, 2), (NULL, 4);
+
+select sum(aa) over (partition by bb) from debugme;
+select sum(aa) over (partition by bb order by bb asc) from debugme;
+select sum(aa) over (partition by bb order by bb desc) from debugme;
+select sum(aa) over (order by bb desc) from debugme;
+
 rollback;
diff --git a/sql/test/analytics/Tests/analytics00.stable.out 
b/sql/test/analytics/Tests/analytics00.stable.out
--- a/sql/test/analytics/Tests/analytics00.stable.out
+++ b/sql/test/analytics/Tests/analytics00.stable.out
@@ -543,6 +543,69 @@ Ready.
 [ "stress"     ]
 [ "stress"     ]
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to