Changeset: 1633b2631a38 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1633b2631a38
Modified Files:
        gdk/gdk_analytic.c
        gdk/gdk_analytic.h
        sql/backends/monet5/sql_rank.c
        sql/backends/monet5/sql_rank.h
        sql/backends/monet5/sql_rank.mal
        sql/backends/monet5/sql_rank.mal.sh
        sql/backends/monet5/sql_rank_hge.mal
        sql/backends/monet5/sql_rank_hge.mal.sh
        sql/common/sql_types.c
        sql/test/analytics/Tests/analytics00.sql
Branch: analytics
Log Message:

Implemented analytical count function.


diffs (truncated from 762 to 300 lines):

diff --git a/gdk/gdk_analytic.c b/gdk/gdk_analytic.c
--- a/gdk/gdk_analytic.c
+++ b/gdk/gdk_analytic.c
@@ -13,14 +13,15 @@
 
 #define ANALYTICAL_LIMIT_IMP(TPE, OP)                        \
        do {                                                     \
-               TPE *rp, *rb, *bp, curval;                           \
+               TPE *rp, *rb, *restrict bp, *end, curval;            \
                rb = rp = (TPE*)Tloc(r, 0);                          \
                bp = (TPE*)Tloc(b, 0);                               \
                curval = *bp;                                        \
+               end = rp + cnt;                                      \
                if (p) {                                             \
                        if (o) {                                         \
                                np = (bit*)Tloc(p, 0);                       \
-                               for(i=0; i<cnt; i++, np++, rp++, bp++) {     \
+                               for(; rp<end; np++, rp++, bp++) {            \
                                        if (*np) {                              
 \
                                                if(is_##TPE##_nil(curval))      
     \
                                                        has_nils = true;        
         \
@@ -41,7 +42,7 @@
                                        *rb = curval;                           
 \
                        } else { /* single value, ie no ordering */      \
                                np = (bit*)Tloc(p, 0);                       \
-                               for(i=0; i<cnt; i++, np++, rp++, bp++) {     \
+                               for(; rp<end; np++, rp++, bp++) {            \
                                        if (*np) {                              
 \
                                                if(is_##TPE##_nil(curval))      
     \
                                                        has_nils = true;        
         \
@@ -62,7 +63,7 @@
                                        *rb = curval;                           
 \
                        }                                                \
                } else if (o) { /* single value, ie no partitions */ \
-                       for(i=0; i<cnt; i++, rp++, bp++) {               \
+                       for(; rp<end; rp++, bp++) {                      \
                                if(!is_##TPE##_nil(*bp)) {                   \
                                        if(is_##TPE##_nil(curval))              
 \
                                                curval = *bp;                   
     \
@@ -77,7 +78,7 @@
                } else { /* single value, ie no ordering */          \
                        if(is_##TPE##_nil(*bp))                          \
                                has_nils = true;                             \
-                       for(i=0; i<cnt; i++, rp++, bp++)                 \
+                       for(; rp<end; rp++, bp++)                        \
                                *rp = *bp;                                   \
                }                                                    \
        } while(0);
@@ -99,7 +100,7 @@ GDKanalytical##OP(BAT *r, BAT *b, BAT *p
        const void *nil;                                                        
                \
        bool has_nils = false;                                                  
                \
        BUN i, j, cnt = BATcount(b);                                            
                \
-       bit *np;                                                                
                \
+       bit *restrict np;                                                       
                \
        gdk_return gdk_res = GDK_SUCCEED;                                       
                \
                                                                                
             \
        switch(ATOMstorage(tpe)) {                                              
                \
@@ -127,7 +128,7 @@ GDKanalytical##OP(BAT *r, BAT *b, BAT *p
                        break;                                                  
                        \
                default: {                                                      
                    \
                        BATiter bpi = bat_iterator(b);                          
                        \
-                       void *curval = BUNtail(bpi, 0);                         
                        \
+                       void *restrict curval = BUNtail(bpi, 0);                
                        \
                        nil = ATOMnilptr(tpe);                                  
                        \
                        atomcmp = ATOMcompare(tpe);                             
                        \
                        if (p) {                                                
                        \
@@ -223,6 +224,196 @@ ANALYTICAL_LIMIT(max, MAX, <)
 #undef ANALYTICAL_LIMIT_IMP_HUGE
 #undef ANALYTICAL_LIMIT_IMP
 
+#define ANALYTICAL_COUNT_WITH_NIL_FIXED_SIZE_IMP(TPE) \
+       do {                                              \
+               TPE *restrict bp = (TPE*)Tloc(b, 0);          \
+               lng *rp, *rb, *end, curval = 0;               \
+               rb = rp = (lng*)Tloc(r, 0);                   \
+               end = rp + cnt;                               \
+               if (p) {                                      \
+                       np = (bit*)Tloc(p, 0);                    \
+                       for(; rp<end; np++, rp++, bp++) {         \
+                               if (*np) {                            \
+                                       for (;rb < rp; rb++)              \
+                                               *rb = curval;                 \
+                                       curval = 0;                       \
+                               }                                     \
+                               curval += !is_##TPE##_nil(*bp);       \
+                       }                                         \
+                       for (;rb < rp; rb++)                      \
+                               *rb = curval;                         \
+               } else { /* single value, ie no partitions */ \
+                       for(; rp<end; rp++, bp++)                 \
+                               curval += !is_##TPE##_nil(*bp);       \
+                       for(;rb < rp; rb++)                       \
+                               *rb = curval;                         \
+               }                                             \
+       } while(0);
+
+#define ANALYTICAL_COUNT_WITH_NIL_STR_IMP(TPE_CAST, OFFSET)               \
+       do {                                                                  \
+               const void *restrict bp = Tloc(b, 0);                           
  \
+               lng *rp, *rb, curval = 0;                                       
  \
+               rb = rp = (lng*)Tloc(r, 0);                                     
  \
+               if (p) {                                                        
  \
+                       np = (bit*)Tloc(p, 0);                                  
      \
+                       for(i = 0; i < cnt; i++, np++, rp++) {                  
      \
+                               if (*np) {                                      
          \
+                                       for (;rb < rp; rb++)                    
              \
+                                               *rb = curval;                   
                  \
+                                       curval = 0;                             
              \
+                               }                                               
          \
+                               curval += base[(var_t) ((TPE_CAST) bp) OFFSET] 
!= '\200'; \
+                       }                                                       
      \
+                       for (;rb < rp; rb++)                                    
      \
+                               *rb = curval;                                   
          \
+               } else { /* single value, ie no partitions */                   
  \
+                       for(i = 0; i < cnt; i++)                                
      \
+                               curval += base[(var_t) ((TPE_CAST) bp) OFFSET] 
!= '\200'; \
+                       rp += cnt;                                              
      \
+                       for(;rb < rp; rb++)                                     
      \
+                               *rb = curval;                                   
          \
+               }                                                               
  \
+       } while(0);
+
+gdk_return
+GDKanalyticalcount(BAT *r, BAT *b, BAT *p, BAT *o, const bit *ignore_nils, int 
tpe)
+{
+       BUN i, cnt = BATcount(b);
+       gdk_return gdk_res = GDK_SUCCEED;
+       (void) o;
+
+       if(*ignore_nils || b->T.nonil) {
+               bit *np, *pnp;
+               lng *rp, *rb, curval = 0;
+               rb = rp = (lng*)Tloc(r, 0);
+               if (p) {
+                       np = pnp = (bit*)Tloc(p, 0);
+                       bit* end = np + cnt;
+                       for(; np < end; np++, rp++) {
+                               if (*np) {
+                                       curval = np - pnp;
+                                       pnp = np;
+                                       for (;rb < rp; rb++)
+                                               *rb = curval;
+                               }
+                       }
+                       curval = np - pnp;
+                       for (;rb < rp; rb++)
+                               *rb = curval;
+               } else { /* single value */
+                       lng* end = rp + cnt;
+                       for(; rp < end; rp++)
+                               *rp = cnt;
+               }
+       } else {
+               bit *restrict np;
+               switch (tpe) {
+                       case TYPE_bit:
+                               ANALYTICAL_COUNT_WITH_NIL_FIXED_SIZE_IMP(bit)
+                               break;
+                       case TYPE_bte:
+                               ANALYTICAL_COUNT_WITH_NIL_FIXED_SIZE_IMP(bte)
+                               break;
+                       case TYPE_sht:
+                               ANALYTICAL_COUNT_WITH_NIL_FIXED_SIZE_IMP(sht)
+                               break;
+                       case TYPE_int:
+                               ANALYTICAL_COUNT_WITH_NIL_FIXED_SIZE_IMP(int)
+                               break;
+                       case TYPE_lng:
+                               ANALYTICAL_COUNT_WITH_NIL_FIXED_SIZE_IMP(lng)
+                               break;
+#ifdef HAVE_HGE
+                       case TYPE_hge:
+                               ANALYTICAL_COUNT_WITH_NIL_FIXED_SIZE_IMP(hge)
+                               break;
+#endif
+                       case TYPE_flt:
+                               ANALYTICAL_COUNT_WITH_NIL_FIXED_SIZE_IMP(flt)
+                               break;
+                       case TYPE_dbl:
+                               ANALYTICAL_COUNT_WITH_NIL_FIXED_SIZE_IMP(dbl)
+                               break;
+                       case TYPE_str: {
+                               const char *restrict base = b->tvheap->base;
+                               switch (b->twidth) {
+                                       case 1:
+                                               
ANALYTICAL_COUNT_WITH_NIL_STR_IMP(const unsigned char *, [i] + GDK_VAROFFSET)
+                                               break;
+                                       case 2:
+                                               
ANALYTICAL_COUNT_WITH_NIL_STR_IMP(const unsigned short *, [i] + GDK_VAROFFSET)
+                                               break;
+#if SIZEOF_VAR_T != SIZEOF_INT
+                                       case 4:
+                                               
ANALYTICAL_COUNT_WITH_NIL_STR_IMP(const unsigned int *, [i])
+                                               break;
+#endif
+                                       default:
+                                               
ANALYTICAL_COUNT_WITH_NIL_STR_IMP(const var_t *, [i])
+                                               break;
+                               }
+                               break;
+                       }
+                       default: {
+                               const void *restrict nil = ATOMnilptr(tpe);
+                               int (*cmp)(const void *, const void *) = 
ATOMcompare(tpe);
+                               lng *rp, *rb, curval = 0;
+                               rb = rp = (lng*)Tloc(r, 0);
+                               if (b->tvarsized) {
+                                       const char *restrict base = 
b->tvheap->base;
+                                       const void *restrict bp = Tloc(b, 0);
+                                       if (p) {
+                                               np = (bit*)Tloc(p, 0);
+                                               for(i = 0; i < cnt; i++, np++, 
rp++) {
+                                                       if (*np) {
+                                                               for (;rb < rp; 
rb++)
+                                                                       *rb = 
curval;
+                                                               curval = 0;
+                                                       }
+                                                       curval += (*cmp)(nil, 
base + ((const var_t *) bp)[i]) != 0;
+                                               }
+                                               for (;rb < rp; rb++)
+                                                       *rb = curval;
+                                       } else { /* single value, ie no 
partitions */
+                                               for(i = 0; i < cnt; i++)
+                                                       curval += (*cmp)(nil, 
base + ((const var_t *) bp)[i]) != 0;
+                                               rp += cnt;
+                                               for(;rb < rp; rb++)
+                                                       *rb = curval;
+                                       }
+                               } else {
+                                       if (p) {
+                                               np = (bit*)Tloc(p, 0);
+                                               for(i = 0; i < cnt; i++, np++, 
rp++) {
+                                                       if (*np) {
+                                                               for (;rb < rp; 
rb++)
+                                                                       *rb = 
curval;
+                                                               curval = 0;
+                                                       }
+                                                       curval += 
(*cmp)(Tloc(b, i), nil) != 0;
+                                               }
+                                               for (;rb < rp; rb++)
+                                                       *rb = curval;
+                                       } else { /* single value, ie no 
partitions */
+                                               for(i = 0; i < cnt; i++)
+                                                       curval += 
(*cmp)(Tloc(b, i), nil) != 0;
+                                               rp += cnt;
+                                               for(;rb < rp; rb++)
+                                                       *rb = curval;
+                                       }
+                               }
+                       }
+               }
+       }
+       BATsetcount(r, cnt);
+       r->tnonil = true;
+       r->tnil = false;
+       return gdk_res;
+}
+
+#undef ANALYTICAL_COUNT_WITH_NIL_FIXED_SIZE_IMP
+
 #define ANALYTICAL_ADD_WITH_CHECK(lft, rgt, TPE2, dst, max, on_overflow) \
        do {                                                            \
                if ((rgt) < 1) {                                        \
@@ -242,14 +433,14 @@ ANALYTICAL_LIMIT(max, MAX, <)
 
 #define ANALYTICAL_SUM_IMP(TPE1, TPE2)                              \
        do {                                                            \
-               TPE1 *bp;                                                   \
-               TPE2 *rp, *rb, curval = TPE2##_nil;                         \
-               bp = (TPE1*)Tloc(b, 0);                                     \
+               TPE1 *restrict bp = (TPE1*)Tloc(b, 0);                      \
+               TPE2 *rp, *rb, *end, curval = TPE2##_nil;                   \
                rb = rp = (TPE2*)Tloc(r, 0);                                \
+               end = rp + cnt;                                             \
                if (p) {                                                    \
                        if (o) {                                                
\
                                np = (bit*)Tloc(p, 0);                          
    \
-                               for(i=0; i<cnt; i++, np++, rp++, bp++) {        
    \
+                               for(; rp<end; np++, rp++, bp++) {               
    \
                                        if (*np) {                              
        \
                                                for (;rb < rp; rb++)            
            \
                                                        *rb = curval;           
                \
@@ -274,7 +465,7 @@ ANALYTICAL_LIMIT(max, MAX, <)
                                        *rb = curval;                           
        \
                        } else { /* single value, ie no ordering */             
\
                                np = (bit*)Tloc(p, 0);                          
    \
-                               for(i=0; i<cnt; i++, np++, rp++, bp++) {        
    \
+                               for(; rp<end; np++, rp++, bp++) {               
    \
                                        if (*np) {                              
        \
                                                for (;rb < rp; rb++)            
            \
                                                        *rb = curval;           
                \
@@ -299,7 +490,7 @@ ANALYTICAL_LIMIT(max, MAX, <)
                                        *rb = curval;                           
        \
                        }                                                       
\
                } else if (o) { /* single value, ie no partitions */        \
-                       for(i=0; i<cnt; i++, rp++, bp++) {                      
\
+                       for(; rp<end; rp++, bp++) {                             
\
                                if(!is_##TPE1##_nil(*bp)) {                     
    \
                                        if(is_##TPE2##_nil(curval))             
        \
                                                curval = (TPE2) *bp;            
            \
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to