Changeset: d263423703ba for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d263423703ba
Modified Files:
        gdk/gdk_analytic.c
        gdk/gdk_analytic.h
        sql/backends/monet5/sql_rank.c
        sql/backends/monet5/sql_rank.h
        sql/backends/monet5/sql_rank.mal
        sql/backends/monet5/sql_rank.mal.sh
        sql/backends/monet5/sql_rank_hge.mal
        sql/backends/monet5/sql_rank_hge.mal.sh
        sql/common/sql_types.c
        sql/server/rel_select.c
        sql/server/sql_scan.c
        sql/test/analytics/Tests/analytics01.sql
        sql/test/analytics/Tests/analytics01.stable.out
Branch: analytics
Log Message:

Implemented nth_value on a window, plus ntile cleanup.


diffs (truncated from 1009 to 300 lines):

diff --git a/gdk/gdk_analytic.c b/gdk/gdk_analytic.c
--- a/gdk/gdk_analytic.c
+++ b/gdk/gdk_analytic.c
@@ -103,7 +103,6 @@ GDKanalyticaldiff(BAT *r, BAT *b, BAT *c
 
 #define NTILE_CALC(TPE)               \
        do {                              \
-               TPE val =  *(TPE *)ntile;     \
                if((BUN)val >= cnt) {         \
                        i = 1;                    \
                        for(; rb<rp; i++, rb++)   \
@@ -132,8 +131,14 @@ GDKanalyticaldiff(BAT *r, BAT *b, BAT *c
 #define ANALYTICAL_NTILE_IMP(TPE)            \
        do {                                     \
                TPE i = 0, j = 1, *rp, *rb, buckets; \
+               TPE val =  *(TPE *)ntile;            \
                rb = rp = (TPE*)Tloc(r, 0);          \
-               if(p) {                              \
+               if(is_##TPE##_nil(val)) {            \
+                       TPE *end = rp + cnt;             \
+                       has_nils = true;                 \
+                       for(; rp<end; rp++)              \
+                               *rp = TPE##_nil;             \
+               } else if(p) {                       \
                        pnp = np = (bit*)Tloc(p, 0);     \
                        TPE *end = rp + cnt;             \
                        for(; rp<end; np++, rp++) {      \
@@ -161,6 +166,7 @@ GDKanalyticalntile(BAT *r, BAT *b, BAT *
 {
        BUN cnt = BATcount(b);
        bit *np, *pnp;
+       bool has_nils = false;
        gdk_return gdk_res = GDK_SUCCEED;
 
        switch (tpe) {
@@ -190,8 +196,8 @@ nosupport:
        return GDK_FAIL;
 finish:
        BATsetcount(r, cnt);
-       r->tnonil = true;
-       r->tnil = false;
+       r->tnonil = !has_nils;
+       r->tnil = has_nils;
        if(o) {
                r->tsorted = o->tsorted;
                r->trevsorted = o->trevsorted;
@@ -430,6 +436,154 @@ finish:
 
 #undef ANALYTICAL_LAST_IMP
 
+#define ANALYTICAL_NTHVALUE_IMP(TPE)              \
+       do {                                          \
+               TPE *rp, *rb, *pbp, *bp, *end, curval;    \
+               pbp = bp = (TPE*)Tloc(b, 0);              \
+               rb = rp = (TPE*)Tloc(r, 0);               \
+               end = rp + cnt;                           \
+               if(is_lng_nil(nth)) {                     \
+                       has_nils = true;                      \
+                       for(; rp<end; rp++)                   \
+                               *rp = TPE##_nil;                  \
+               } else if(p) {                            \
+                       np = (bit*)Tloc(p, 0);                \
+                       for(; rp<end; np++, rp++, bp++) {     \
+                               if (*np) {                        \
+                                       if(nth > (TPE) (bp - pbp)) {  \
+                                               curval = TPE##_nil;       \
+                                       } else {                      \
+                                               curval = *(pbp + nth);    \
+                                       }                             \
+                                       if(is_##TPE##_nil(curval))    \
+                                               has_nils = true;          \
+                                       for(; rb<rp; rb++)            \
+                                               *rb = curval;             \
+                                       pbp = bp;                     \
+                               }                                 \
+                       }                                     \
+                       if(nth > (TPE) (bp - pbp)) {          \
+                               curval = TPE##_nil;               \
+                       } else {                              \
+                               curval = *(pbp + nth);            \
+                       }                                     \
+                       if(is_##TPE##_nil(curval))            \
+                               has_nils = true;                  \
+                       for(; rb<rp; rb++)                    \
+                               *rb = curval;                     \
+               } else {                                  \
+                       TPE* end = rp + cnt;                  \
+                       if(nth > (TPE) cnt) {                 \
+                               curval = TPE##_nil;               \
+                       } else {                              \
+                               curval = *(bp + nth);             \
+                       }                                     \
+                       if(is_##TPE##_nil(curval))            \
+                               has_nils = true;                  \
+                       for(; rp<end; rp++)                   \
+                               *rp = curval;                     \
+               }                                         \
+               goto finish;                              \
+       } while(0);
+
+gdk_return
+GDKanalyticalnthvalue(BAT *r, BAT *b, BAT *p, BAT *o, lng nth, int tpe)
+{
+       int (*atomcmp)(const void *, const void *);
+       const void *nil;
+       BUN i, j, cnt = BATcount(b);
+       bit *np;
+       gdk_return gdk_res = GDK_SUCCEED;
+       bool has_nils = false;
+
+       (void) o;
+       switch (tpe) {
+               case TYPE_bte:
+                       ANALYTICAL_NTHVALUE_IMP(bte)
+                       break;
+               case TYPE_sht:
+                       ANALYTICAL_NTHVALUE_IMP(sht)
+                       break;
+               case TYPE_int:
+                       ANALYTICAL_NTHVALUE_IMP(int)
+                       break;
+               case TYPE_lng:
+                       ANALYTICAL_NTHVALUE_IMP(lng)
+                       break;
+#ifdef HAVE_HGE
+               case TYPE_hge:
+                       ANALYTICAL_NTHVALUE_IMP(hge)
+                       break;
+#endif
+               case TYPE_flt:
+                       ANALYTICAL_NTHVALUE_IMP(flt)
+                       break;
+               case TYPE_dbl:
+                       ANALYTICAL_NTHVALUE_IMP(dbl)
+                       break;
+               default: {
+                       BATiter bpi = bat_iterator(b);
+                       const void *restrict curval;
+                       nil = ATOMnilptr(tpe);
+                       atomcmp = ATOMcompare(tpe);
+                       if(is_lng_nil(nth)) {
+                               has_nils = true;
+                               for(i=0; i<cnt; i++) {
+                                       if ((gdk_res = BUNappend(r, nil, 
false)) != GDK_SUCCEED)
+                                               goto finish;
+                               }
+                       } else if (p) {
+                               np = (bit*)Tloc(p, 0);
+                               for(i=0,j=0; i<cnt; i++, np++) {
+                                       if (*np) {
+                                               if(nth > (lng)(i - j)) {
+                                                       curval = nil;
+                                               } else {
+                                                       curval = BUNtail(bpi, 
nth);
+                                               }
+                                               if((*atomcmp)(curval, nil) == 0)
+                                                       has_nils = true;
+                                               for (;j < i; j++) {
+                                                       if ((gdk_res = 
BUNappend(r, curval, false)) != GDK_SUCCEED)
+                                                               goto finish;
+                                               }
+                                       }
+                               }
+                               if(nth > (lng)(i - j)) {
+                                       curval = nil;
+                               } else {
+                                       curval = BUNtail(bpi, nth);
+                               }
+                               if((*atomcmp)(curval, nil) == 0)
+                                       has_nils = true;
+                               for (;j < i; j++) {
+                                       if ((gdk_res = BUNappend(r, curval, 
false)) != GDK_SUCCEED)
+                                               goto finish;
+                               }
+                       } else { /* single value, ie no ordering */
+                               if(nth > (lng)cnt) {
+                                       curval = nil;
+                               } else {
+                                       curval = BUNtail(bpi, nth);
+                               }
+                               if((*atomcmp)(curval, nil) == 0)
+                                       has_nils = true;
+                               for(i=0; i<cnt; i++) {
+                                       if ((gdk_res = BUNappend(r, curval, 
false)) != GDK_SUCCEED)
+                                               goto finish;
+                               }
+                       }
+               }
+       }
+finish:
+       BATsetcount(r, cnt);
+       r->tnonil = !has_nils;
+       r->tnil = has_nils;
+       return gdk_res;
+}
+
+#undef ANALYTICAL_NTHVALUE_IMP
+
 #define ANALYTICAL_LIMIT_IMP(TPE, OP)                        \
        do {                                                     \
                TPE *rp, *rb, *restrict bp, *end, curval;            \
diff --git a/gdk/gdk_analytic.h b/gdk/gdk_analytic.h
--- a/gdk/gdk_analytic.h
+++ b/gdk/gdk_analytic.h
@@ -20,6 +20,7 @@ gdk_export gdk_return GDKanalyticaldiff(
 gdk_export gdk_return GDKanalyticalntile(BAT *r, BAT *b, BAT *p, BAT *o, int 
tpe, ptr ntile);
 gdk_export gdk_return GDKanalyticalfirst(BAT *r, BAT *b, BAT *p, BAT *o, int 
tpe);
 gdk_export gdk_return GDKanalyticallast(BAT *r, BAT *b, BAT *p, BAT *o, int 
tpe);
+gdk_export gdk_return GDKanalyticalnthvalue(BAT *r, BAT *b, BAT *p, BAT *o, 
lng nth, int tpe);
 gdk_export gdk_return GDKanalyticalmin(BAT *r, BAT *b, BAT *p, BAT *o, int 
tpe);
 gdk_export gdk_return GDKanalyticalmax(BAT *r, BAT *b, BAT *p, BAT *o, int 
tpe);
 gdk_export gdk_return GDKanalyticalcount(BAT *r, BAT *b, BAT *p, BAT *o, const 
bit *ignore_nils, int tpe);
diff --git a/sql/backends/monet5/sql_rank.c b/sql/backends/monet5/sql_rank.c
--- a/sql/backends/monet5/sql_rank.c
+++ b/sql/backends/monet5/sql_rank.c
@@ -453,7 +453,7 @@ SQLcume_dist(Client cntxt, MalBlkPtr mb,
 #define NTILE_IMP(TPE)                                                         
             \
        do {                                                                    
                \
                TPE *ntile = getArgReference_##TPE(stk, pci, 2);                
                    \
-               if(*ntile < 1) {                                                
                    \
+               if(!is_##TPE##_nil(*ntile) && *ntile < 1) {                     
                    \
                        BBPunfix(b->batCacheid);                                
                        \
                        throw(SQL, "sql.ntile", SQLSTATE(42000) "ntile must be 
greater than zero");     \
                }                                                               
                    \
@@ -487,13 +487,18 @@ SQLntile(Client cntxt, MalBlkPtr mb, Mal
        if (isaBatType(getArgType(mb, pci, 1))) {
                BUN cnt;
                bat *res = getArgReference_bat(stk, pci, 0);
+               int tp2 = getArgType(mb, pci, 2);
                BAT *b = BATdescriptor(*getArgReference_bat(stk, pci, 1)), *p = 
NULL, *o = NULL, *r;
                if (!b)
                        throw(SQL, "sql.ntile", SQLSTATE(HY005) "Cannot access 
column descriptor");
                cnt = BATcount(b);
                gdk_return gdk_code;
 
-               switch (getArgType(mb, pci, 2)) {
+               if (isaBatType(tp2)) {
+                       BBPunfix(b->batCacheid);
+                       throw(SQL, "sql.ntile", SQLSTATE(42000) "ntile second 
argument must a single atom");
+               }
+               switch (tp2) {
                        case TYPE_bte:
                                NTILE_IMP(bte)
                                break;
@@ -513,7 +518,7 @@ SQLntile(Client cntxt, MalBlkPtr mb, Mal
 #endif
                        default: {
                                BBPunfix(b->batCacheid);
-                               throw(SQL, "sql.ntile", SQLSTATE(42000) "ntile 
not available for %s", ATOMname(getArgType(mb, pci, 2)));
+                               throw(SQL, "sql.ntile", SQLSTATE(42000) "ntile 
not available for %s", ATOMname(tp2));
                        }
                }
 
@@ -631,6 +636,97 @@ SQLlast_value(Client cntxt, MalBlkPtr mb
        return SQLanalytical_func(cntxt, mb, stk, pci, "sql.last_value", 
SQLSTATE(42000) "last_value(:any_1,:bit,:bit)", GDKanalyticallast);
 }
 
+#define NTH_VALUE_IMP(TPE)                                                     
                 \
+       do {                                                                    
                    \
+               TPE *nthvalue = getArgReference_##TPE(stk, pci, 2);             
                        \
+               lng cast_value;                                                 
                        \
+               if(!is_##TPE##_nil(*nthvalue) && *nthvalue < 1) {               
                        \
+                       BBPunfix(b->batCacheid);                                
                            \
+                       throw(SQL, "sql.nth_value", SQLSTATE(42000) "nth_value 
must be greater than zero"); \
+               }                                                               
                        \
+               voidresultBAT(r, tp1, cnt, b, "sql.nth_value");                 
                        \
+               if (isaBatType(getArgType(mb, pci, 3))) {                       
                        \
+                       p = BATdescriptor(*getArgReference_bat(stk, pci, 3));   
                            \
+                       if (!p) {                                               
                            \
+                               BBPunfix(b->batCacheid);                        
                                \
+                               throw(SQL, "sql.nth_value", SQLSTATE(HY005) 
"Cannot access column descriptor"); \
+                       }                                                       
                            \
+               }                                                               
                        \
+               if (isaBatType(getArgType(mb, pci, 4))) {                       
                        \
+                       o = BATdescriptor(*getArgReference_bat(stk, pci, 4));   
                            \
+                       if (!o) {                                               
                            \
+                               BBPunfix(b->batCacheid);                        
                                \
+                               BBPunfix(p->batCacheid);                        
                                \
+                               throw(SQL, "sql.nth_value", SQLSTATE(HY005) 
"Cannot access column descriptor"); \
+                       }                                                       
                            \
+               }                                                               
                        \
+               cast_value = is_##TPE##_nil(*nthvalue) ? lng_nil : 
(lng)(((TPE)*nthvalue) - 1);         \
+               gdk_code = GDKanalyticalnthvalue(r, b, p, o, cast_value, tp1);  
                        \
+       } while(0);
+
+str
+SQLnth_value(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void)cntxt;
+       if (pci->argc != 9 || (getArgType(mb, pci, 3) != TYPE_bit && 
getBatType(getArgType(mb, pci, 3)) != TYPE_bit) ||
+               (getArgType(mb, pci, 4) != TYPE_bit && 
getBatType(getArgType(mb, pci, 4)) != TYPE_bit)) {
+               throw(SQL, "sql.nth_value", SQLSTATE(42000) 
"nth_value(:any_1,:number,:bit,:bit)");
+       }
+       if (isaBatType(getArgType(mb, pci, 1))) {
+               BUN cnt;
+               int tp1 = getBatType(getArgType(mb, pci, 1)), tp2 = 
getArgType(mb, pci, 2);
+               bat *res = getArgReference_bat(stk, pci, 0);
+               BAT *b = BATdescriptor(*getArgReference_bat(stk, pci, 1)), *p = 
NULL, *o = NULL, *r;
+               if (!b)
+                       throw(SQL, "sql.nth_value", SQLSTATE(HY005) "Cannot 
access column descriptor");
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to