Changeset: 090ec7275a06 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=090ec7275a06
Added Files:
sql/backends/monet5/sql_rank.c
sql/backends/monet5/sql_rank.h
Modified Files:
clients/Tests/MAL-signatures.stable.out
clients/Tests/MAL-signatures.stable.out.int128
clients/Tests/SQL-dump.stable.out.int128
clients/Tests/exports.stable.out
gdk/gdk_batop.c
monetdb5/modules/kernel/algebra.c
monetdb5/modules/kernel/algebra.h
monetdb5/modules/kernel/algebra.mal
monetdb5/optimizer/opt_prelude.c
monetdb5/optimizer/opt_prelude.h
monetdb5/optimizer/opt_support.c
sql/backends/monet5/Makefile.ag
sql/backends/monet5/rel_bin.c
sql/backends/monet5/sql.mal
sql/backends/monet5/sql_rank.mal
sql/common/sql_types.c
sql/include/sql_catalog.h
sql/server/rel_exp.c
sql/server/rel_optimizer.c
sql/server/rel_select.c
sql/test/BugTracker-2009/Tests/orderby_with_row_number.SF-2895791.stable.out
sql/test/BugTracker-2010/Tests/crash_on_complex_join_exp.Bug-2353.stable.out
sql/test/BugTracker-2010/Tests/rank-over-crash.SF-2926454.stable.out.int128
sql/test/BugTracker-2010/Tests/with_row_number_crash.Bug-2631.stable.out
sql/test/BugTracker-2012/Tests/row_number_does_not_work_in_complex_query.Bug-2805.stable.out
sql/test/BugTracker-2013/Tests/sort_void_crash.Bug-3341.stable.out
sql/test/BugTracker/Tests/with_row_number.SF-1898089.stable.out
sql/test/Tests/rank.stable.out
sql/test/bugs/Tests/crash_order_by.stable.out
sql/test/leaks/Tests/check1.stable.out.int128
sql/test/leaks/Tests/check2.stable.out.int128
sql/test/leaks/Tests/check3.stable.out.int128
sql/test/leaks/Tests/check4.stable.out.int128
sql/test/leaks/Tests/check5.stable.out.int128
Branch: default
Log Message:
re-implemented the bare analytic window function (ie rank() over (..))
functionality
This introduces a new type of function (F_ANALYTIC) which is order dependend.
Currently only rank, dense_rank and row_number are supported.
Additional functions and window frame support will take some more time.
diffs (truncated from 14550 to 300 lines):
diff --git a/clients/Tests/MAL-signatures.stable.out
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -2364,14 +2364,6 @@ command algebra.leftjoin(left:bat[:oid,:
address ALGleftjoinestimate;
command
algebra.leftjoin(left:bat[:oid,:any_2],right:bat[:any_2,:any_3]):bat[:oid,:any_3]
address ALGleftjoin;
-command
algebra.mark_grp(b:bat[:any_1,:oid],g:bat[:oid,:any_2],s:oid):bat[:any_1,:oid]
-address ALGmark_grp_2;
-comment "grouped mark": Produces a new BAT with per group a locally unique
dense ascending sequense of OIDs in the tail. The tail of the first BAT (b)
identifies the group that each BUN of b belongs to. The second BAT (g)
represents the group extent, i.e., the head is the unique list of group IDs
from b's tail. The third argument (s) gives the base value for the new OID
sequence of each group.
-
-command algebra.mark_grp(b:bat[:any_1,:oid],g:bat[:oid,:oid]):bat[:any_1,:oid]
-address ALGmark_grp_1;
-comment "grouped mark": Produces a new BAT with per group a locally unique
dense ascending sequence of OIDs in the tail. The tail of the first BAT (b)
identifies the group that each BUN of b belongs to. The second BAT (g)
represents the group extent, i.e., the head is the unique list of group IDs
from b's tail. The tail of g gives for each group the base value for the new
OID sequence.
-
command
algebra.mark(b:bat[:any_1,:any_2],nr_parts:int,part_nr:int):bat[:any_1,:oid]
address ALGtmarkp;
comment Produces a BAT with fresh unique dense sequense of OIDs in the
tail that starts at base (i.e. [base,..base+b.count()-1] ). The base is
uniquely defined by the part_nr (ie we set the highest bits based on the
part_nr/nr_parts)
@@ -2675,6 +2667,18 @@ command batsql.alpha(dec:bat[:oid,:dbl],
address SQLbat_alpha_cst;
comment BAT implementation of astronomy alpha function
+pattern
batsql.dense_rank{orderdependend}(b:bat[:oid,:any_1],p:any_2,o:any_3):bat[:oid,:int]
+address SQLdense_rank;
+comment return the densely ranked groups
+
+pattern
batsql.diff{orderdependend}(p:bat[:oid,:bit],b:bat[:oid,:any_1]):bat[:oid,:bit]
+address SQLdiff;
+comment return true if cur != prev row
+
+pattern batsql.diff{orderdependend}(b:bat[:oid,:any_1]):bat[:oid,:bit]
+address SQLdiff;
+comment return true if cur != prev row
+
command batsql.dec_round(v:bat[:oid,:dbl],r:dbl):bat[:oid,:dbl]
address dbl_bat_dec_round_wrap;
comment round off the value v to nearests multiple of r
@@ -2707,6 +2711,14 @@ pattern batsql.next_value(sname:bat[:oid
address mvc_bat_next_value;
comment return the next value of the sequence
+pattern
batsql.rank{orderdependend}(b:bat[:oid,:any_1],p:any_2,o:any_3):bat[:oid,:int]
+address SQLrank;
+comment return the ranked groups
+
+pattern
batsql.row_number{orderdependend}(b:bat[:oid,:any_1],p:any_2,o:any_3):bat[:oid,:int]
+address SQLrow_number;
+comment return the row_numer-ed groups
+
command batsql.round(v:bat[:oid,:dbl],r:bte):bat[:oid,:dbl]
address dbl_bat_round_wrap;
comment round off the floating point v to r digits behind the dot (if r < 0,
before the dot)
@@ -22841,9 +22853,6 @@ pattern batcalc.cmp(b1:bat[:oid,:bit],b2
address CMDbatCMP;
comment Return -1/0/1 if B1 </==/> B2
-function batcalc.dense_rank_grp{inline}(b:bat[:oid,:any_1]):bat[:oid,:int];
-function
batcalc.dense_rank_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],orig:bat[:oid,:any_2]):bat[:oid,:int];
-function
batcalc.dense_rank_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],grp:bat[:oid,:oid],ext:bat[:oid,:oid]):bat[:oid,:int];
command batcalc.dbl(s1:int,v:bat[:oid,:lng],d2:int,s2:int):bat[:oid,:dbl]
address batlng_dec2dec_dbl;
comment cast decimal(lng) to decimal(dbl) and check for overflow
@@ -25712,9 +25721,6 @@ pattern batcalc.lsh_noerror(b1:bat[:oid,
address CMDbatLSH;
comment Return B1 << B2, out of range second operand causes NIL value
-function batcalc.mark_grp{inline}(b:bat[:oid,:any_1]):bat[:oid,:int];
-function
batcalc.mark_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],orig:bat[:oid,:any_2]):bat[:oid,:int];
-function
batcalc.mark_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],grp:bat[:oid,:oid],ext:bat[:oid,:oid]):bat[:oid,:int];
pattern
batcalc.mod_noerror(v:dbl,b:bat[:oid,:dbl],s:bat[:oid,:oid]):bat[:oid,:dbl]
address CMDbatMOD;
comment Return V % B with candidates list, divide by zero causes NIL value
@@ -28451,9 +28457,6 @@ pattern batcalc.or(b1:bat[:oid,:bit],b2:
address CMDbatOR;
comment Return B1 OR B2
-function batcalc.rank_grp{inline}(b:bat[:oid,:any_1]):bat[:oid,:int];
-function
batcalc.rank_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],orig:bat[:oid,:any_2]):bat[:oid,:int];
-function
batcalc.rank_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],grp:bat[:oid,:oid],ext:bat[:oid,:oid]):bat[:oid,:int];
command
batcalc.rotate_xor_hash(h:bat[:oid,:wrd],nbits:int,b:bat[:oid,:any_1]):bat[:oid,:int]
address MKEYbulk_rotate_xor_hash;
pattern
batcalc.rsh_noerror(v:lng,b:bat[:oid,:lng],s:bat[:oid,:oid]):bat[:oid,:lng]
@@ -40579,14 +40582,18 @@ pattern sql.commit():void
address SQLcommit;
comment Trigger the commit operation for a MAL block
-command sql.dense_rank(b:bat[:oid,:any_1]):bat[:oid,:int]
-address sql_dense_rank;
-comment return the densely ranked bat
-
-command
sql.dense_rank_grp(b:bat[:oid,:any_1],gp:bat[:oid,:oid],gpe:bat[:oid,:oid]):bat[:oid,:int]
-address sql_dense_rank_grp;
+pattern sql.dense_rank(b:any_1,p:bit,o:bit):int
+address SQLdense_rank;
comment return the densely ranked groups
+pattern sql.diff(p:bit,b:any_1):bit
+address SQLdiff;
+comment return true if cur != prev row
+
+pattern sql.diff(b:any_1):bit
+address SQLdiff;
+comment return true if cur != prev row
+
pattern sql.drop_hash(sch:str,tbl:str):void
address SQLdrop_hash;
comment Drop hash indices for the given table
@@ -40799,14 +40806,14 @@ pattern sql.querylog_catalog() (id:bat[:
address sql_querylog_catalog;
comment Obtain the query log catalog
-command sql.rank(b:bat[:oid,:any_1]):bat[:oid,:int]
-address sql_rank;
-comment return the rank bat
-
-command
sql.rank_grp(b:bat[:oid,:any_1],gp:bat[:oid,:oid],gpe:bat[:oid,:oid]):bat[:oid,:int]
-address sql_rank_grp;
+pattern sql.rank(b:any_1,p:bit,o:bit):int
+address SQLrank;
comment return the ranked groups
+pattern sql.row_number(b:any_1,p:bit,o:bit):int
+address SQLrow_number;
+comment return the row_numer-ed groups
+
pattern sql.reuse(sch:str,tbl:str):void
address SQLreuse;
comment Consolidate the deletion table over all columns reusing deleted slots
@@ -40856,7 +40863,6 @@ address mvc_restart_seq;
comment restart the sequence with value start
function sql.sql_environment() (name:bat[:oid,:str],value:bat[:oid,:str]);
-function
sql.subzero_or_one{inline}(b:bat[:oid,:any_1],gp:bat[:oid,:oid],gpe:bat[:oid,:oid],no_nil:bit):bat[:oid,:any_1];
pattern sql.shrink(sch:str,tbl:str):void
address SQLshrink;
comment Consolidate the deletion table over all columns using shrinking
@@ -40901,6 +40907,7 @@ pattern sql.sql_variables():bat[:oid,:st
address sql_variables;
comment return the table with session variables
+function
sql.subzero_or_one{inline}(b:bat[:oid,:any_1],gp:bat[:oid,:oid],gpe:bat[:oid,:oid],no_nil:bit):bat[:oid,:any_1];
pattern sql.single(x:any_2):bat[:oid,:any_2]
address CMDBATsingle;
command
sql.subdelta(col:bat[:oid,:oid],cand:bat[:oid,:oid],uid:bat[:oid,:oid],uval:bat[:oid,:oid]):bat[:oid,:oid]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -2875,14 +2875,6 @@ command algebra.leftjoin(left:bat[:oid,:
address ALGleftjoinestimate;
command
algebra.leftjoin(left:bat[:oid,:any_2],right:bat[:any_2,:any_3]):bat[:oid,:any_3]
address ALGleftjoin;
-command
algebra.mark_grp(b:bat[:any_1,:oid],g:bat[:oid,:any_2],s:oid):bat[:any_1,:oid]
-address ALGmark_grp_2;
-comment "grouped mark": Produces a new BAT with per group a locally unique
dense ascending sequense of OIDs in the tail. The tail of the first BAT (b)
identifies the group that each BUN of b belongs to. The second BAT (g)
represents the group extent, i.e., the head is the unique list of group IDs
from b's tail. The third argument (s) gives the base value for the new OID
sequence of each group.
-
-command algebra.mark_grp(b:bat[:any_1,:oid],g:bat[:oid,:oid]):bat[:any_1,:oid]
-address ALGmark_grp_1;
-comment "grouped mark": Produces a new BAT with per group a locally unique
dense ascending sequence of OIDs in the tail. The tail of the first BAT (b)
identifies the group that each BUN of b belongs to. The second BAT (g)
represents the group extent, i.e., the head is the unique list of group IDs
from b's tail. The tail of g gives for each group the base value for the new
OID sequence.
-
command
algebra.mark(b:bat[:any_1,:any_2],nr_parts:int,part_nr:int):bat[:any_1,:oid]
address ALGtmarkp;
comment Produces a BAT with fresh unique dense sequense of OIDs in the
tail that starts at base (i.e. [base,..base+b.count()-1] ). The base is
uniquely defined by the part_nr (ie we set the highest bits based on the
part_nr/nr_parts)
@@ -3194,6 +3186,18 @@ command batsql.dec_round(v:bat[:oid,:hge
address hge_bat_dec_round_wrap;
comment round off the value v to nearests multiple of r
+pattern
batsql.dense_rank{orderdependend}(b:bat[:oid,:any_1],p:any_2,o:any_3):bat[:oid,:int]
+address SQLdense_rank;
+comment return the densely ranked groups
+
+pattern
batsql.diff{orderdependend}(p:bat[:oid,:bit],b:bat[:oid,:any_1]):bat[:oid,:bit]
+address SQLdiff;
+comment return true if cur != prev row
+
+pattern batsql.diff{orderdependend}(b:bat[:oid,:any_1]):bat[:oid,:bit]
+address SQLdiff;
+comment return true if cur != prev row
+
command batsql.dec_round(v:bat[:oid,:dbl],r:dbl):bat[:oid,:dbl]
address dbl_bat_dec_round_wrap;
comment round off the value v to nearests multiple of r
@@ -3230,6 +3234,14 @@ command batsql.round(v:bat[:oid,:hge],d:
address hge_bat_round_wrap;
comment round off the decimal v(d,s) to r digits behind the dot (if r < 0,
before the dot)
+pattern
batsql.rank{orderdependend}(b:bat[:oid,:any_1],p:any_2,o:any_3):bat[:oid,:int]
+address SQLrank;
+comment return the ranked groups
+
+pattern
batsql.row_number{orderdependend}(b:bat[:oid,:any_1],p:any_2,o:any_3):bat[:oid,:int]
+address SQLrow_number;
+comment return the row_numer-ed groups
+
command batsql.round(v:bat[:oid,:dbl],r:bte):bat[:oid,:dbl]
address dbl_bat_round_wrap;
comment round off the floating point v to r digits behind the dot (if r < 0,
before the dot)
@@ -29380,9 +29392,6 @@ command batcalc.dbl(v:bat[:oid,:hge],dig
address bathge_num2dec_dbl;
comment cast number to decimal(dbl) and check for overflow
-function batcalc.dense_rank_grp{inline}(b:bat[:oid,:any_1]):bat[:oid,:int];
-function
batcalc.dense_rank_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],orig:bat[:oid,:any_2]):bat[:oid,:int];
-function
batcalc.dense_rank_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],grp:bat[:oid,:oid],ext:bat[:oid,:oid]):bat[:oid,:int];
command batcalc.dbl(s1:int,v:bat[:oid,:lng],d2:int,s2:int):bat[:oid,:dbl]
address batlng_dec2dec_dbl;
comment cast decimal(lng) to decimal(dbl) and check for overflow
@@ -33257,9 +33266,6 @@ pattern batcalc.lsh_noerror(b1:bat[:oid,
address CMDbatLSH;
comment Return B1 << B2, out of range second operand causes NIL value
-function batcalc.mark_grp{inline}(b:bat[:oid,:any_1]):bat[:oid,:int];
-function
batcalc.mark_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],orig:bat[:oid,:any_2]):bat[:oid,:int];
-function
batcalc.mark_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],grp:bat[:oid,:oid],ext:bat[:oid,:oid]):bat[:oid,:int];
pattern
batcalc.mod_noerror(v:dbl,b:bat[:oid,:dbl],s:bat[:oid,:oid]):bat[:oid,:dbl]
address CMDbatMOD;
comment Return V % B with candidates list, divide by zero causes NIL value
@@ -36764,9 +36770,6 @@ pattern batcalc.or(b1:bat[:oid,:hge],b2:
address CMDbatOR;
comment Return B1 OR B2
-function batcalc.rank_grp{inline}(b:bat[:oid,:any_1]):bat[:oid,:int];
-function
batcalc.rank_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],orig:bat[:oid,:any_2]):bat[:oid,:int];
-function
batcalc.rank_grp{inline}(b:bat[:oid,:any_1],ord:bat[:oid,:oid],grp:bat[:oid,:oid],ext:bat[:oid,:oid]):bat[:oid,:int];
command
batcalc.rotate_xor_hash(h:bat[:oid,:wrd],nbits:int,b:bat[:oid,:any_1]):bat[:oid,:int]
address MKEYbulk_rotate_xor_hash;
pattern
batcalc.rsh_noerror(v:lng,b:bat[:oid,:lng],s:bat[:oid,:oid]):bat[:oid,:lng]
@@ -51518,14 +51521,18 @@ command sql.dec_round(v:hge,r:hge):hge
address hge_dec_round_wrap;
comment round off the value v to nearests multiple of r
-command sql.dense_rank(b:bat[:oid,:any_1]):bat[:oid,:int]
-address sql_dense_rank;
-comment return the densely ranked bat
-
-command
sql.dense_rank_grp(b:bat[:oid,:any_1],gp:bat[:oid,:oid],gpe:bat[:oid,:oid]):bat[:oid,:int]
-address sql_dense_rank_grp;
+pattern sql.dense_rank(b:any_1,p:bit,o:bit):int
+address SQLdense_rank;
comment return the densely ranked groups
+pattern sql.diff(p:bit,b:any_1):bit
+address SQLdiff;
+comment return true if cur != prev row
+
+pattern sql.diff(b:any_1):bit
+address SQLdiff;
+comment return true if cur != prev row
+
pattern sql.drop_hash(sch:str,tbl:str):void
address SQLdrop_hash;
comment Drop hash indices for the given table
@@ -51742,14 +51749,14 @@ command sql.round(v:hge,d:int,s:int,r:bt
address hge_round_wrap;
comment round off the decimal v(d,s) to r digits behind the dot (if r < 0,
before the dot)
-command sql.rank(b:bat[:oid,:any_1]):bat[:oid,:int]
-address sql_rank;
-comment return the rank bat
-
-command
sql.rank_grp(b:bat[:oid,:any_1],gp:bat[:oid,:oid],gpe:bat[:oid,:oid]):bat[:oid,:int]
-address sql_rank_grp;
+pattern sql.rank(b:any_1,p:bit,o:bit):int
+address SQLrank;
comment return the ranked groups
+pattern sql.row_number(b:any_1,p:bit,o:bit):int
+address SQLrow_number;
+comment return the row_numer-ed groups
+
pattern sql.reuse(sch:str,tbl:str):void
address SQLreuse;
comment Consolidate the deletion table over all columns reusing deleted slots
@@ -51799,7 +51806,6 @@ address mvc_restart_seq;
comment restart the sequence with value start
function sql.sql_environment() (name:bat[:oid,:str],value:bat[:oid,:str]);
-function
sql.subzero_or_one{inline}(b:bat[:oid,:any_1],gp:bat[:oid,:oid],gpe:bat[:oid,:oid],no_nil:bit):bat[:oid,:any_1];
pattern sql.shrink(sch:str,tbl:str):void
address SQLshrink;
comment Consolidate the deletion table over all columns using shrinking
@@ -51844,6 +51850,7 @@ pattern sql.sql_variables():bat[:oid,:st
address sql_variables;
comment return the table with session variables
+function
sql.subzero_or_one{inline}(b:bat[:oid,:any_1],gp:bat[:oid,:oid],gpe:bat[:oid,:oid],no_nil:bit):bat[:oid,:any_1];
pattern sql.single(x:any_2):bat[:oid,:any_2]
address CMDBATsingle;
command
sql.subdelta(col:bat[:oid,:oid],cand:bat[:oid,:oid],uid:bat[:oid,:oid],uval:bat[:oid,:oid]):bat[:oid,:oid]
diff --git a/clients/Tests/SQL-dump.stable.out.int128
b/clients/Tests/SQL-dump.stable.out.int128
--- a/clients/Tests/SQL-dump.stable.out.int128
+++ b/clients/Tests/SQL-dump.stable.out.int128
@@ -1529,234 +1529,234 @@ 2149 "statement" "varchar" 2048 0
2139 N
2151 "id" "int" 32 0 2150 NULL true 0 NULL
2152 "name" "varchar" 1024 0 2150 NULL true 1
NULL
2153 "nr" "int" 32 0 2150 NULL true 2 NULL
-5660 "id" "int" 32 0 5659 NULL true 0 NULL
-5661 "name" "varchar" 1024 0 5659 NULL true 1
NULL
-5662 "schema_id" "int" 32 0 5659 NULL true 2
NULL
-5663 "query" "varchar" 2048 0 5659 NULL true 3
NULL
-5664 "type" "smallint" 16 0 5659 NULL true 4
NULL
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list