Changeset: c159d8cf1f73 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c159d8cf1f73 Added Files: sql/backends/monet5/generator/Tests/rangejoin.sql sql/backends/monet5/generator/Tests/rangejoin.stable.err sql/backends/monet5/generator/Tests/rangejoin.stable.out Modified Files: sql/backends/monet5/generator/Tests/All sql/backends/monet5/generator/generator.c sql/backends/monet5/generator/generator.h sql/backends/monet5/generator/generator.mal Branch: default Log Message:
Add rangejoin over series The range join over series was needed for the BAM experiments. It avoids materialization of the series and the subsequent nested loop evaluation. diffs (truncated from 350 to 300 lines): diff --git a/sql/backends/monet5/generator/Tests/All b/sql/backends/monet5/generator/Tests/All --- a/sql/backends/monet5/generator/Tests/All +++ b/sql/backends/monet5/generator/Tests/All @@ -4,6 +4,8 @@ generator02 generator03 generator04 +rangejoin + joins00 joins01 joins02 diff --git a/sql/backends/monet5/generator/Tests/rangejoin.sql b/sql/backends/monet5/generator/Tests/rangejoin.sql new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/generator/Tests/rangejoin.sql @@ -0,0 +1,7 @@ +CREATE TABLE ranges(low integer, hgh integer); +INSERT INTO ranges VALUES (1,3),(2,4),(5,6),(7,7); + +SELECT * +FROM generate_series(0,10,1) AS s JOIN ranges ON ( s.value >= ranges.low AND s.value < ranges.hgh); + +DROP TABLE ranges; diff --git a/sql/backends/monet5/generator/Tests/rangejoin.stable.err b/sql/backends/monet5/generator/Tests/rangejoin.stable.err new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/generator/Tests/rangejoin.stable.err @@ -0,0 +1,37 @@ +stderr of test 'rangejoin` in directory 'sql/backends/monet5/generator` itself: + + +# 10:17:18 > +# 10:17:18 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=39578" "--set" "mapi_usock=/var/tmp/mtest-3483/.s.monetdb.39578" "--set" "monet_prompt=" "--forcemito" "--set" "mal_listing=2" "--dbpath=/export/scratch1/mk/bam//Linux/var/MonetDB/mTests_sql_backends_monet5_generator" "--set" "mal_listing=0" "--set" "embedded_r=yes" +# 10:17:18 > + +# builtin opt gdk_dbpath = /export/scratch1/mk/bam//Linux/var/monetdb5/dbfarm/demo +# builtin opt gdk_debug = 0 +# builtin opt gdk_vmtrim = no +# builtin opt monet_prompt = > +# builtin opt monet_daemon = no +# builtin opt mapi_port = 50000 +# builtin opt mapi_open = false +# builtin opt mapi_autosense = false +# builtin opt sql_optimizer = default_pipe +# builtin opt sql_debug = 0 +# cmdline opt gdk_nr_threads = 0 +# cmdline opt mapi_open = true +# cmdline opt mapi_port = 39578 +# cmdline opt mapi_usock = /var/tmp/mtest-3483/.s.monetdb.39578 +# cmdline opt monet_prompt = +# cmdline opt mal_listing = 2 +# cmdline opt gdk_dbpath = /export/scratch1/mk/bam//Linux/var/MonetDB/mTests_sql_backends_monet5_generator +# cmdline opt mal_listing = 0 +# cmdline opt embedded_r = yes +# cmdline opt gdk_debug = 536870922 + +# 10:17:18 > +# 10:17:18 > "mclient" "-lsql" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-3483" "--port=39578" +# 10:17:18 > + + +# 10:17:18 > +# 10:17:18 > "Done." +# 10:17:18 > + diff --git a/sql/backends/monet5/generator/Tests/rangejoin.stable.out b/sql/backends/monet5/generator/Tests/rangejoin.stable.out new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/generator/Tests/rangejoin.stable.out @@ -0,0 +1,79 @@ +stdout of test 'rangejoin` in directory 'sql/backends/monet5/generator` itself: + + +# 10:17:18 > +# 10:17:18 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=39578" "--set" "mapi_usock=/var/tmp/mtest-3483/.s.monetdb.39578" "--set" "monet_prompt=" "--forcemito" "--set" "mal_listing=2" "--dbpath=/export/scratch1/mk/bam//Linux/var/MonetDB/mTests_sql_backends_monet5_generator" "--set" "mal_listing=0" "--set" "embedded_r=yes" +# 10:17:18 > + +# MonetDB 5 server v11.20.0 +# This is an unreleased version +# Serving database 'mTests_sql_backends_monet5_generator', using 8 threads +# Compiled for x86_64-unknown-linux-gnu/64bit with 64bit OIDs and 128bit integers dynamically linked +# Found 15.590 GiB available main-memory. +# Copyright (c) 1993-July 2008 CWI. +# Copyright (c) August 2008-2014 MonetDB B.V., all rights reserved +# Visit http://www.monetdb.org/ for further information +# Listening for connection requests on mapi:monetdb://vienna.ins.cwi.nl:39578/ +# Listening for UNIX domain connection requests on mapi:monetdb:///var/tmp/mtest-3483/.s.monetdb.39578 +# MonetDB/GIS module loaded +# MonetDB/SQL module loaded +# MonetDB/R module loaded + +Ready. +# SQL catalog created, loading sql scripts once +# loading sql script: 09_like.sql +# loading sql script: 10_math.sql +# loading sql script: 11_times.sql +# loading sql script: 12_url.sql +# loading sql script: 13_date.sql +# loading sql script: 14_inet.sql +# loading sql script: 15_querylog.sql +# loading sql script: 16_tracelog.sql +# loading sql script: 19_cluster.sql +# loading sql script: 20_vacuum.sql +# loading sql script: 21_dependency_functions.sql +# loading sql script: 22_clients.sql +# loading sql script: 23_skyserver.sql +# loading sql script: 24_zorder.sql +# loading sql script: 25_debug.sql +# loading sql script: 26_sysmon.sql +# loading sql script: 39_analytics.sql +# loading sql script: 39_analytics_hge.sql +# loading sql script: 40_geom.sql +# loading sql script: 40_json.sql +# loading sql script: 40_json_hge.sql +# loading sql script: 41_jsonstore.sql +# loading sql script: 45_uuid.sql +# loading sql script: 46_gsl.sql +# loading sql script: 75_storagemodel.sql +# loading sql script: 80_statistics.sql +# loading sql script: 80_udf.sql +# loading sql script: 80_udf_hge.sql +# loading sql script: 89_generator_hge.sql +# loading sql script: 90_generator.sql +# loading sql script: 99_system.sql + +# 10:17:18 > +# 10:17:18 > "mclient" "-lsql" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-3483" "--port=39578" +# 10:17:18 > + +#CREATE TABLE ranges(low integer, hgh integer); +#INSERT INTO ranges VALUES (1,3),(2,4),(5,6),(7,7); +[ 4 ] +#SELECT * +#FROM generate_series(0,10,1) AS s JOIN ranges ON ( s.value >= ranges.low AND s.value < ranges.hgh); +% .s, sys.ranges, sys.ranges # table_name +% value, low, hgh # name +% tinyint, int, int # type +% 1, 1, 1 # length +[ 1, 1, 3 ] +[ 2, 1, 3 ] +[ 2, 2, 4 ] +[ 3, 2, 4 ] +[ 5, 5, 6 ] +#DROP TABLE ranges; + +# 10:17:18 > +# 10:17:18 > "Done." +# 10:17:18 > + diff --git a/sql/backends/monet5/generator/generator.c b/sql/backends/monet5/generator/generator.c --- a/sql/backends/monet5/generator/generator.c +++ b/sql/backends/monet5/generator/generator.c @@ -956,7 +956,161 @@ str VLTgenerator_join(Client cntxt, MalB BBPkeepref(*getArgReference_bat(stk,pci,0)= bln->batCacheid); BBPkeepref(*getArgReference_bat(stk,pci,1)= brn->batCacheid); } - if ( materialized) + if ( materialized){ BBPreclaim(bl); + bl = 0; + } + if(bl) BBPreleaseref(bl->batCacheid); + if(br) BBPreleaseref(br->batCacheid); return msg; } + +#define VLTrangeExpand() \ +{ bln= BATextend(bln,BATgrows(bln));\ + if( bln == NULL){\ + BBPreleaseref(blow->batCacheid);\ + BBPreleaseref(bhgh->batCacheid);\ + throw(MAL,"generator.rangejoin",MAL_MALLOC_FAIL);\ + }\ + brn= BATextend(brn,BATgrows(brn));\ + if( brn == NULL) {\ + BBPreleaseref(blow->batCacheid);\ + BBPreleaseref(bhgh->batCacheid);\ + throw(MAL,"generator.rangejoin",MAL_MALLOC_FAIL);\ + }\ + ol = (oid*) Tloc(bln,BUNfirst(bln)) + c;\ + or = (oid*) Tloc(brn,BUNfirst(brn)) + c;\ + limit= BATcapacity(bln);\ +} + +/* The operands of a join operation can either be defined on a generator */ +#define VLTrangejoin(TPE, ABS) \ +{ TPE f,f1,l,s; TPE *vlow,*vhgh; BUN w;\ + f = *getArgReference_bte(stk,p, 1);\ + l = *getArgReference_bte(stk,p, 2);\ + s = *getArgReference_bte(stk,p, 3);\ + incr = s > 0;\ + if ( s == 0 || (f> l && s>0) || (f<l && s < 0))\ + throw(MAL,"generator.rangejoin","Illegal range");\ + vlow = (TPE*) Tloc(blow,BUNfirst(blow));\ + vhgh = (TPE*) Tloc(bhgh,BUNfirst(bhgh));\ + for( ; cnt >0; cnt--, o++,vlow++,vhgh++){\ + f1 = f + floor(abs(*vlow-f)/abs(s)) * s;\ + if ( f1 < *vlow ) f1+= s;\ + w = (BUN) floor(abs(f1-f)/abs(s));\ + for( ; (f1 > *vlow || (li && f1 == *vlow)) && (f1 < *vhgh || (ri && f1 == *vhgh)); f1 += s, w++){\ + if(c == limit)\ + VLTrangeExpand();\ + *ol++ = (oid) w;\ + *or++ = o;\ + c++;\ + }\ +} } + +str VLTgenerator_rangejoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + BAT *blow = NULL, *bhgh = NULL, *bln = NULL, *brn= NULL; + bit li,ri; + BUN limit, cnt,c =0; + oid o= 0, *ol, *or; + int tpe, incr=0; + InstrPtr p = NULL; + str msg = MAL_SUCCEED; + + (void) cntxt; + // the left join argument should be a generator + p = findGeneratorDefinition(mb,pci,pci->argv[2]); + if( p == NULL) + throw(MAL,"generator.rangejoin","invalid arguments"); + + blow = BATdescriptor(*getArgReference_bat(stk,pci,3)); + if( blow == NULL) + throw(MAL,"generator.rangejoin",RUNTIME_OBJECT_MISSING); + + bhgh = BATdescriptor(*getArgReference_bat(stk,pci,4)); + if( bhgh == NULL){ + BBPreleaseref(blow->batCacheid); + throw(MAL,"generator.rangejoin",RUNTIME_OBJECT_MISSING); + } + li = *getArgReference_bit(stk,pci,5); + ri = *getArgReference_bit(stk,pci,6); + + cnt = BATcount(blow); + limit = 2 * cnt; //top off result before expansion + tpe = blow->ttype; + o= blow->hseqbase; + + bln = BATnew(TYPE_void,TYPE_oid, limit, TRANSIENT); + brn = BATnew(TYPE_void,TYPE_oid, limit, TRANSIENT); + if( bln == NULL || brn == NULL){ + if(bln) BBPreleaseref(bln->batCacheid); + if(brn) BBPreleaseref(brn->batCacheid); + if(blow) BBPreleaseref(blow->batCacheid); + if(bhgh) BBPreleaseref(bhgh->batCacheid); + throw(MAL,"generator.rangejoin",MAL_MALLOC_FAIL); + } + ol = (oid*) Tloc(bln,BUNfirst(bln)); + or = (oid*) Tloc(brn,BUNfirst(brn)); + + /* The actual join code for generators be injected here */ + switch(tpe){ + case TYPE_bte: // VLTrangejoin(bte,abs); break; + { bte f,f1,l,s; bte *vlow,*vhgh; BUN w; + f = *getArgReference_bte(stk,p, 1); + l = *getArgReference_bte(stk,p, 2); + s = *getArgReference_bte(stk,p, 3); + incr = s > 0; + + if ( s == 0 || (f> l && s>0) || (f<l && s < 0)) + throw(MAL,"generator.rangejoin","Illegal range"); + + vlow = (bte*) Tloc(blow,BUNfirst(blow)); + vhgh = (bte*) Tloc(bhgh,BUNfirst(bhgh)); + for( ; cnt >0; cnt--, o++,vlow++,vhgh++){ + f1 = f + floor(abs(*vlow-f)/abs(s)) * s; + if ( f1 < *vlow ) f1+= s; + w = (BUN) floor(abs(f1-f)/abs(s)); + for( ; (f1 > *vlow || (li && f1 == *vlow)) && (f1 < *vhgh || (ri && f1 == *vhgh)); f1 += s, w++){ + if(c == limit) + VLTrangeExpand(); + *ol++ = (oid) w; + *or++ = o; + c++; + } + } } + break; + case TYPE_sht: VLTrangejoin(sht,abs); break; + case TYPE_int: VLTrangejoin(int,abs); break; + case TYPE_lng: VLTrangejoin(lng,llabs); break; +#ifdef HAVE_HGE + case TYPE_hge: VLTrangejoin(hge,HGE_ABS); break; +#endif + case TYPE_flt: VLTrangejoin(flt,fabsf); break; + case TYPE_dbl: VLTrangejoin(dbl,fabs); break; + default: + if( tpe == TYPE_timestamp){ + // it is easier to produce the timestamp series + // then to estimate the possible index + } + throw(MAL,"generator.rangejoin","Illegal type"); + } + + BATsetcount(bln,c); + bln->hdense = 1; + bln->hseqbase = 0; _______________________________________________ checkin-list mailing list [email protected] https://www.monetdb.org/mailman/listinfo/checkin-list
