Changeset: d263423703ba for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d263423703ba Modified Files: gdk/gdk_analytic.c gdk/gdk_analytic.h sql/backends/monet5/sql_rank.c sql/backends/monet5/sql_rank.h sql/backends/monet5/sql_rank.mal sql/backends/monet5/sql_rank.mal.sh sql/backends/monet5/sql_rank_hge.mal sql/backends/monet5/sql_rank_hge.mal.sh sql/common/sql_types.c sql/server/rel_select.c sql/server/sql_scan.c sql/test/analytics/Tests/analytics01.sql sql/test/analytics/Tests/analytics01.stable.out Branch: analytics Log Message:
Implemented nth_value on a window, plus ntile cleanup. diffs (truncated from 1009 to 300 lines): diff --git a/gdk/gdk_analytic.c b/gdk/gdk_analytic.c --- a/gdk/gdk_analytic.c +++ b/gdk/gdk_analytic.c @@ -103,7 +103,6 @@ GDKanalyticaldiff(BAT *r, BAT *b, BAT *c #define NTILE_CALC(TPE) \ do { \ - TPE val = *(TPE *)ntile; \ if((BUN)val >= cnt) { \ i = 1; \ for(; rb<rp; i++, rb++) \ @@ -132,8 +131,14 @@ GDKanalyticaldiff(BAT *r, BAT *b, BAT *c #define ANALYTICAL_NTILE_IMP(TPE) \ do { \ TPE i = 0, j = 1, *rp, *rb, buckets; \ + TPE val = *(TPE *)ntile; \ rb = rp = (TPE*)Tloc(r, 0); \ - if(p) { \ + if(is_##TPE##_nil(val)) { \ + TPE *end = rp + cnt; \ + has_nils = true; \ + for(; rp<end; rp++) \ + *rp = TPE##_nil; \ + } else if(p) { \ pnp = np = (bit*)Tloc(p, 0); \ TPE *end = rp + cnt; \ for(; rp<end; np++, rp++) { \ @@ -161,6 +166,7 @@ GDKanalyticalntile(BAT *r, BAT *b, BAT * { BUN cnt = BATcount(b); bit *np, *pnp; + bool has_nils = false; gdk_return gdk_res = GDK_SUCCEED; switch (tpe) { @@ -190,8 +196,8 @@ nosupport: return GDK_FAIL; finish: BATsetcount(r, cnt); - r->tnonil = true; - r->tnil = false; + r->tnonil = !has_nils; + r->tnil = has_nils; if(o) { r->tsorted = o->tsorted; r->trevsorted = o->trevsorted; @@ -430,6 +436,154 @@ finish: #undef ANALYTICAL_LAST_IMP +#define ANALYTICAL_NTHVALUE_IMP(TPE) \ + do { \ + TPE *rp, *rb, *pbp, *bp, *end, curval; \ + pbp = bp = (TPE*)Tloc(b, 0); \ + rb = rp = (TPE*)Tloc(r, 0); \ + end = rp + cnt; \ + if(is_lng_nil(nth)) { \ + has_nils = true; \ + for(; rp<end; rp++) \ + *rp = TPE##_nil; \ + } else if(p) { \ + np = (bit*)Tloc(p, 0); \ + for(; rp<end; np++, rp++, bp++) { \ + if (*np) { \ + if(nth > (TPE) (bp - pbp)) { \ + curval = TPE##_nil; \ + } else { \ + curval = *(pbp + nth); \ + } \ + if(is_##TPE##_nil(curval)) \ + has_nils = true; \ + for(; rb<rp; rb++) \ + *rb = curval; \ + pbp = bp; \ + } \ + } \ + if(nth > (TPE) (bp - pbp)) { \ + curval = TPE##_nil; \ + } else { \ + curval = *(pbp + nth); \ + } \ + if(is_##TPE##_nil(curval)) \ + has_nils = true; \ + for(; rb<rp; rb++) \ + *rb = curval; \ + } else { \ + TPE* end = rp + cnt; \ + if(nth > (TPE) cnt) { \ + curval = TPE##_nil; \ + } else { \ + curval = *(bp + nth); \ + } \ + if(is_##TPE##_nil(curval)) \ + has_nils = true; \ + for(; rp<end; rp++) \ + *rp = curval; \ + } \ + goto finish; \ + } while(0); + +gdk_return +GDKanalyticalnthvalue(BAT *r, BAT *b, BAT *p, BAT *o, lng nth, int tpe) +{ + int (*atomcmp)(const void *, const void *); + const void *nil; + BUN i, j, cnt = BATcount(b); + bit *np; + gdk_return gdk_res = GDK_SUCCEED; + bool has_nils = false; + + (void) o; + switch (tpe) { + case TYPE_bte: + ANALYTICAL_NTHVALUE_IMP(bte) + break; + case TYPE_sht: + ANALYTICAL_NTHVALUE_IMP(sht) + break; + case TYPE_int: + ANALYTICAL_NTHVALUE_IMP(int) + break; + case TYPE_lng: + ANALYTICAL_NTHVALUE_IMP(lng) + break; +#ifdef HAVE_HGE + case TYPE_hge: + ANALYTICAL_NTHVALUE_IMP(hge) + break; +#endif + case TYPE_flt: + ANALYTICAL_NTHVALUE_IMP(flt) + break; + case TYPE_dbl: + ANALYTICAL_NTHVALUE_IMP(dbl) + break; + default: { + BATiter bpi = bat_iterator(b); + const void *restrict curval; + nil = ATOMnilptr(tpe); + atomcmp = ATOMcompare(tpe); + if(is_lng_nil(nth)) { + has_nils = true; + for(i=0; i<cnt; i++) { + if ((gdk_res = BUNappend(r, nil, false)) != GDK_SUCCEED) + goto finish; + } + } else if (p) { + np = (bit*)Tloc(p, 0); + for(i=0,j=0; i<cnt; i++, np++) { + if (*np) { + if(nth > (lng)(i - j)) { + curval = nil; + } else { + curval = BUNtail(bpi, nth); + } + if((*atomcmp)(curval, nil) == 0) + has_nils = true; + for (;j < i; j++) { + if ((gdk_res = BUNappend(r, curval, false)) != GDK_SUCCEED) + goto finish; + } + } + } + if(nth > (lng)(i - j)) { + curval = nil; + } else { + curval = BUNtail(bpi, nth); + } + if((*atomcmp)(curval, nil) == 0) + has_nils = true; + for (;j < i; j++) { + if ((gdk_res = BUNappend(r, curval, false)) != GDK_SUCCEED) + goto finish; + } + } else { /* single value, ie no ordering */ + if(nth > (lng)cnt) { + curval = nil; + } else { + curval = BUNtail(bpi, nth); + } + if((*atomcmp)(curval, nil) == 0) + has_nils = true; + for(i=0; i<cnt; i++) { + if ((gdk_res = BUNappend(r, curval, false)) != GDK_SUCCEED) + goto finish; + } + } + } + } +finish: + BATsetcount(r, cnt); + r->tnonil = !has_nils; + r->tnil = has_nils; + return gdk_res; +} + +#undef ANALYTICAL_NTHVALUE_IMP + #define ANALYTICAL_LIMIT_IMP(TPE, OP) \ do { \ TPE *rp, *rb, *restrict bp, *end, curval; \ diff --git a/gdk/gdk_analytic.h b/gdk/gdk_analytic.h --- a/gdk/gdk_analytic.h +++ b/gdk/gdk_analytic.h @@ -20,6 +20,7 @@ gdk_export gdk_return GDKanalyticaldiff( gdk_export gdk_return GDKanalyticalntile(BAT *r, BAT *b, BAT *p, BAT *o, int tpe, ptr ntile); gdk_export gdk_return GDKanalyticalfirst(BAT *r, BAT *b, BAT *p, BAT *o, int tpe); gdk_export gdk_return GDKanalyticallast(BAT *r, BAT *b, BAT *p, BAT *o, int tpe); +gdk_export gdk_return GDKanalyticalnthvalue(BAT *r, BAT *b, BAT *p, BAT *o, lng nth, int tpe); gdk_export gdk_return GDKanalyticalmin(BAT *r, BAT *b, BAT *p, BAT *o, int tpe); gdk_export gdk_return GDKanalyticalmax(BAT *r, BAT *b, BAT *p, BAT *o, int tpe); gdk_export gdk_return GDKanalyticalcount(BAT *r, BAT *b, BAT *p, BAT *o, const bit *ignore_nils, int tpe); diff --git a/sql/backends/monet5/sql_rank.c b/sql/backends/monet5/sql_rank.c --- a/sql/backends/monet5/sql_rank.c +++ b/sql/backends/monet5/sql_rank.c @@ -453,7 +453,7 @@ SQLcume_dist(Client cntxt, MalBlkPtr mb, #define NTILE_IMP(TPE) \ do { \ TPE *ntile = getArgReference_##TPE(stk, pci, 2); \ - if(*ntile < 1) { \ + if(!is_##TPE##_nil(*ntile) && *ntile < 1) { \ BBPunfix(b->batCacheid); \ throw(SQL, "sql.ntile", SQLSTATE(42000) "ntile must be greater than zero"); \ } \ @@ -487,13 +487,18 @@ SQLntile(Client cntxt, MalBlkPtr mb, Mal if (isaBatType(getArgType(mb, pci, 1))) { BUN cnt; bat *res = getArgReference_bat(stk, pci, 0); + int tp2 = getArgType(mb, pci, 2); BAT *b = BATdescriptor(*getArgReference_bat(stk, pci, 1)), *p = NULL, *o = NULL, *r; if (!b) throw(SQL, "sql.ntile", SQLSTATE(HY005) "Cannot access column descriptor"); cnt = BATcount(b); gdk_return gdk_code; - switch (getArgType(mb, pci, 2)) { + if (isaBatType(tp2)) { + BBPunfix(b->batCacheid); + throw(SQL, "sql.ntile", SQLSTATE(42000) "ntile second argument must a single atom"); + } + switch (tp2) { case TYPE_bte: NTILE_IMP(bte) break; @@ -513,7 +518,7 @@ SQLntile(Client cntxt, MalBlkPtr mb, Mal #endif default: { BBPunfix(b->batCacheid); - throw(SQL, "sql.ntile", SQLSTATE(42000) "ntile not available for %s", ATOMname(getArgType(mb, pci, 2))); + throw(SQL, "sql.ntile", SQLSTATE(42000) "ntile not available for %s", ATOMname(tp2)); } } @@ -631,6 +636,97 @@ SQLlast_value(Client cntxt, MalBlkPtr mb return SQLanalytical_func(cntxt, mb, stk, pci, "sql.last_value", SQLSTATE(42000) "last_value(:any_1,:bit,:bit)", GDKanalyticallast); } +#define NTH_VALUE_IMP(TPE) \ + do { \ + TPE *nthvalue = getArgReference_##TPE(stk, pci, 2); \ + lng cast_value; \ + if(!is_##TPE##_nil(*nthvalue) && *nthvalue < 1) { \ + BBPunfix(b->batCacheid); \ + throw(SQL, "sql.nth_value", SQLSTATE(42000) "nth_value must be greater than zero"); \ + } \ + voidresultBAT(r, tp1, cnt, b, "sql.nth_value"); \ + if (isaBatType(getArgType(mb, pci, 3))) { \ + p = BATdescriptor(*getArgReference_bat(stk, pci, 3)); \ + if (!p) { \ + BBPunfix(b->batCacheid); \ + throw(SQL, "sql.nth_value", SQLSTATE(HY005) "Cannot access column descriptor"); \ + } \ + } \ + if (isaBatType(getArgType(mb, pci, 4))) { \ + o = BATdescriptor(*getArgReference_bat(stk, pci, 4)); \ + if (!o) { \ + BBPunfix(b->batCacheid); \ + BBPunfix(p->batCacheid); \ + throw(SQL, "sql.nth_value", SQLSTATE(HY005) "Cannot access column descriptor"); \ + } \ + } \ + cast_value = is_##TPE##_nil(*nthvalue) ? lng_nil : (lng)(((TPE)*nthvalue) - 1); \ + gdk_code = GDKanalyticalnthvalue(r, b, p, o, cast_value, tp1); \ + } while(0); + +str +SQLnth_value(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + (void)cntxt; + if (pci->argc != 9 || (getArgType(mb, pci, 3) != TYPE_bit && getBatType(getArgType(mb, pci, 3)) != TYPE_bit) || + (getArgType(mb, pci, 4) != TYPE_bit && getBatType(getArgType(mb, pci, 4)) != TYPE_bit)) { + throw(SQL, "sql.nth_value", SQLSTATE(42000) "nth_value(:any_1,:number,:bit,:bit)"); + } + if (isaBatType(getArgType(mb, pci, 1))) { + BUN cnt; + int tp1 = getBatType(getArgType(mb, pci, 1)), tp2 = getArgType(mb, pci, 2); + bat *res = getArgReference_bat(stk, pci, 0); + BAT *b = BATdescriptor(*getArgReference_bat(stk, pci, 1)), *p = NULL, *o = NULL, *r; + if (!b) + throw(SQL, "sql.nth_value", SQLSTATE(HY005) "Cannot access column descriptor"); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list