Changeset: c159d8cf1f73 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c159d8cf1f73
Added Files:
        sql/backends/monet5/generator/Tests/rangejoin.sql
        sql/backends/monet5/generator/Tests/rangejoin.stable.err
        sql/backends/monet5/generator/Tests/rangejoin.stable.out
Modified Files:
        sql/backends/monet5/generator/Tests/All
        sql/backends/monet5/generator/generator.c
        sql/backends/monet5/generator/generator.h
        sql/backends/monet5/generator/generator.mal
Branch: default
Log Message:

Add rangejoin over series
The range join over series was needed for the BAM experiments.
It avoids materialization of the series and the subsequent
nested loop evaluation.


diffs (truncated from 350 to 300 lines):

diff --git a/sql/backends/monet5/generator/Tests/All 
b/sql/backends/monet5/generator/Tests/All
--- a/sql/backends/monet5/generator/Tests/All
+++ b/sql/backends/monet5/generator/Tests/All
@@ -4,6 +4,8 @@ generator02
 generator03
 generator04
 
+rangejoin
+
 joins00
 joins01
 joins02
diff --git a/sql/backends/monet5/generator/Tests/rangejoin.sql 
b/sql/backends/monet5/generator/Tests/rangejoin.sql
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/generator/Tests/rangejoin.sql
@@ -0,0 +1,7 @@
+CREATE TABLE ranges(low integer, hgh integer);
+INSERT INTO ranges VALUES (1,3),(2,4),(5,6),(7,7);
+
+SELECT * 
+FROM generate_series(0,10,1) AS s JOIN ranges ON ( s.value >= ranges.low AND 
s.value < ranges.hgh);
+
+DROP TABLE ranges;
diff --git a/sql/backends/monet5/generator/Tests/rangejoin.stable.err 
b/sql/backends/monet5/generator/Tests/rangejoin.stable.err
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/generator/Tests/rangejoin.stable.err
@@ -0,0 +1,37 @@
+stderr of test 'rangejoin` in directory 'sql/backends/monet5/generator` itself:
+
+
+# 10:17:18 >  
+# 10:17:18 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_open=true" "--set" "mapi_port=39578" "--set" 
"mapi_usock=/var/tmp/mtest-3483/.s.monetdb.39578" "--set" "monet_prompt=" 
"--forcemito" "--set" "mal_listing=2" 
"--dbpath=/export/scratch1/mk/bam//Linux/var/MonetDB/mTests_sql_backends_monet5_generator"
 "--set" "mal_listing=0" "--set" "embedded_r=yes"
+# 10:17:18 >  
+
+# builtin opt  gdk_dbpath = 
/export/scratch1/mk/bam//Linux/var/monetdb5/dbfarm/demo
+# builtin opt  gdk_debug = 0
+# builtin opt  gdk_vmtrim = no
+# builtin opt  monet_prompt = >
+# builtin opt  monet_daemon = no
+# builtin opt  mapi_port = 50000
+# builtin opt  mapi_open = false
+# builtin opt  mapi_autosense = false
+# builtin opt  sql_optimizer = default_pipe
+# builtin opt  sql_debug = 0
+# cmdline opt  gdk_nr_threads = 0
+# cmdline opt  mapi_open = true
+# cmdline opt  mapi_port = 39578
+# cmdline opt  mapi_usock = /var/tmp/mtest-3483/.s.monetdb.39578
+# cmdline opt  monet_prompt = 
+# cmdline opt  mal_listing = 2
+# cmdline opt  gdk_dbpath = 
/export/scratch1/mk/bam//Linux/var/MonetDB/mTests_sql_backends_monet5_generator
+# cmdline opt  mal_listing = 0
+# cmdline opt  embedded_r = yes
+# cmdline opt  gdk_debug = 536870922
+
+# 10:17:18 >  
+# 10:17:18 >  "mclient" "-lsql" "-ftest" "-Eutf-8" "-i" "-e" 
"--host=/var/tmp/mtest-3483" "--port=39578"
+# 10:17:18 >  
+
+
+# 10:17:18 >  
+# 10:17:18 >  "Done."
+# 10:17:18 >  
+
diff --git a/sql/backends/monet5/generator/Tests/rangejoin.stable.out 
b/sql/backends/monet5/generator/Tests/rangejoin.stable.out
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/generator/Tests/rangejoin.stable.out
@@ -0,0 +1,79 @@
+stdout of test 'rangejoin` in directory 'sql/backends/monet5/generator` itself:
+
+
+# 10:17:18 >  
+# 10:17:18 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_open=true" "--set" "mapi_port=39578" "--set" 
"mapi_usock=/var/tmp/mtest-3483/.s.monetdb.39578" "--set" "monet_prompt=" 
"--forcemito" "--set" "mal_listing=2" 
"--dbpath=/export/scratch1/mk/bam//Linux/var/MonetDB/mTests_sql_backends_monet5_generator"
 "--set" "mal_listing=0" "--set" "embedded_r=yes"
+# 10:17:18 >  
+
+# MonetDB 5 server v11.20.0
+# This is an unreleased version
+# Serving database 'mTests_sql_backends_monet5_generator', using 8 threads
+# Compiled for x86_64-unknown-linux-gnu/64bit with 64bit OIDs and 128bit 
integers dynamically linked
+# Found 15.590 GiB available main-memory.
+# Copyright (c) 1993-July 2008 CWI.
+# Copyright (c) August 2008-2014 MonetDB B.V., all rights reserved
+# Visit http://www.monetdb.org/ for further information
+# Listening for connection requests on mapi:monetdb://vienna.ins.cwi.nl:39578/
+# Listening for UNIX domain connection requests on 
mapi:monetdb:///var/tmp/mtest-3483/.s.monetdb.39578
+# MonetDB/GIS module loaded
+# MonetDB/SQL module loaded
+# MonetDB/R   module loaded
+
+Ready.
+# SQL catalog created, loading sql scripts once
+# loading sql script: 09_like.sql
+# loading sql script: 10_math.sql
+# loading sql script: 11_times.sql
+# loading sql script: 12_url.sql
+# loading sql script: 13_date.sql
+# loading sql script: 14_inet.sql
+# loading sql script: 15_querylog.sql
+# loading sql script: 16_tracelog.sql
+# loading sql script: 19_cluster.sql
+# loading sql script: 20_vacuum.sql
+# loading sql script: 21_dependency_functions.sql
+# loading sql script: 22_clients.sql
+# loading sql script: 23_skyserver.sql
+# loading sql script: 24_zorder.sql
+# loading sql script: 25_debug.sql
+# loading sql script: 26_sysmon.sql
+# loading sql script: 39_analytics.sql
+# loading sql script: 39_analytics_hge.sql
+# loading sql script: 40_geom.sql
+# loading sql script: 40_json.sql
+# loading sql script: 40_json_hge.sql
+# loading sql script: 41_jsonstore.sql
+# loading sql script: 45_uuid.sql
+# loading sql script: 46_gsl.sql
+# loading sql script: 75_storagemodel.sql
+# loading sql script: 80_statistics.sql
+# loading sql script: 80_udf.sql
+# loading sql script: 80_udf_hge.sql
+# loading sql script: 89_generator_hge.sql
+# loading sql script: 90_generator.sql
+# loading sql script: 99_system.sql
+
+# 10:17:18 >  
+# 10:17:18 >  "mclient" "-lsql" "-ftest" "-Eutf-8" "-i" "-e" 
"--host=/var/tmp/mtest-3483" "--port=39578"
+# 10:17:18 >  
+
+#CREATE TABLE ranges(low integer, hgh integer);
+#INSERT INTO ranges VALUES (1,3),(2,4),(5,6),(7,7);
+[ 4    ]
+#SELECT * 
+#FROM generate_series(0,10,1) AS s JOIN ranges ON ( s.value >= ranges.low AND 
s.value < ranges.hgh);
+% .s,  sys.ranges,     sys.ranges # table_name
+% value,       low,    hgh # name
+% tinyint,     int,    int # type
+% 1,   1,      1 # length
+[ 1,   1,      3       ]
+[ 2,   1,      3       ]
+[ 2,   2,      4       ]
+[ 3,   2,      4       ]
+[ 5,   5,      6       ]
+#DROP TABLE ranges;
+
+# 10:17:18 >  
+# 10:17:18 >  "Done."
+# 10:17:18 >  
+
diff --git a/sql/backends/monet5/generator/generator.c 
b/sql/backends/monet5/generator/generator.c
--- a/sql/backends/monet5/generator/generator.c
+++ b/sql/backends/monet5/generator/generator.c
@@ -956,7 +956,161 @@ str VLTgenerator_join(Client cntxt, MalB
                BBPkeepref(*getArgReference_bat(stk,pci,0)= bln->batCacheid);
                BBPkeepref(*getArgReference_bat(stk,pci,1)= brn->batCacheid);
        }
-       if ( materialized)
+       if ( materialized){
                BBPreclaim(bl);
+               bl = 0;
+       }
+       if(bl) BBPreleaseref(bl->batCacheid);
+       if(br) BBPreleaseref(br->batCacheid);
        return msg;
 }
+
+#define VLTrangeExpand() \
+{      bln= BATextend(bln,BATgrows(bln));\
+       if( bln == NULL){\
+               BBPreleaseref(blow->batCacheid);\
+               BBPreleaseref(bhgh->batCacheid);\
+               throw(MAL,"generator.rangejoin",MAL_MALLOC_FAIL);\
+       }\
+       brn= BATextend(brn,BATgrows(brn));\
+       if( brn == NULL) {\
+               BBPreleaseref(blow->batCacheid);\
+               BBPreleaseref(bhgh->batCacheid);\
+               throw(MAL,"generator.rangejoin",MAL_MALLOC_FAIL);\
+       }\
+       ol = (oid*) Tloc(bln,BUNfirst(bln)) + c;\
+       or = (oid*) Tloc(brn,BUNfirst(brn)) + c;\
+       limit= BATcapacity(bln);\
+}
+
+/* The operands of a join operation can either be defined on a generator */
+#define VLTrangejoin(TPE, ABS) \
+{ TPE f,f1,l,s; TPE *vlow,*vhgh; BUN w;\
+       f = *getArgReference_bte(stk,p, 1);\
+       l = *getArgReference_bte(stk,p, 2);\
+       s = *getArgReference_bte(stk,p, 3);\
+       incr = s > 0;\
+       if ( s == 0 || (f> l && s>0) || (f<l && s < 0))\
+               throw(MAL,"generator.rangejoin","Illegal range");\
+       vlow = (TPE*) Tloc(blow,BUNfirst(blow));\
+       vhgh = (TPE*) Tloc(bhgh,BUNfirst(bhgh));\
+       for( ; cnt >0; cnt--, o++,vlow++,vhgh++){\
+               f1 = f + floor(abs(*vlow-f)/abs(s)) * s;\
+               if ( f1 < *vlow ) f1+= s;\
+               w = (BUN) floor(abs(f1-f)/abs(s));\
+               for( ; (f1 > *vlow || (li && f1 == *vlow)) && (f1 < *vhgh || 
(ri && f1 == *vhgh)); f1 += s, w++){\
+                       if(c == limit)\
+                               VLTrangeExpand();\
+                       *ol++ = (oid) w;\
+                       *or++ = o;\
+                       c++;\
+               }\
+} }
+
+str VLTgenerator_rangejoin(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci)
+{
+       BAT  *blow = NULL, *bhgh = NULL, *bln = NULL, *brn= NULL;
+       bit li,ri;
+       BUN limit, cnt,c =0;
+       oid o= 0, *ol, *or;
+       int tpe, incr=0;
+       InstrPtr p = NULL;
+       str msg = MAL_SUCCEED;
+
+       (void) cntxt;
+       // the left join argument should be a generator
+       p = findGeneratorDefinition(mb,pci,pci->argv[2]);
+       if( p == NULL)
+               throw(MAL,"generator.rangejoin","invalid arguments");
+
+       blow = BATdescriptor(*getArgReference_bat(stk,pci,3));
+       if( blow == NULL)
+               throw(MAL,"generator.rangejoin",RUNTIME_OBJECT_MISSING);
+
+       bhgh = BATdescriptor(*getArgReference_bat(stk,pci,4));
+       if( bhgh == NULL){
+               BBPreleaseref(blow->batCacheid);
+               throw(MAL,"generator.rangejoin",RUNTIME_OBJECT_MISSING);
+       }
+       li = *getArgReference_bit(stk,pci,5);
+       ri = *getArgReference_bit(stk,pci,6);
+
+       cnt = BATcount(blow);
+       limit = 2 * cnt; //top off result before expansion
+       tpe = blow->ttype;
+       o= blow->hseqbase;
+       
+       bln = BATnew(TYPE_void,TYPE_oid, limit, TRANSIENT);
+       brn = BATnew(TYPE_void,TYPE_oid, limit, TRANSIENT);
+       if( bln == NULL || brn == NULL){
+               if(bln) BBPreleaseref(bln->batCacheid);
+               if(brn) BBPreleaseref(brn->batCacheid);
+               if(blow) BBPreleaseref(blow->batCacheid);
+               if(bhgh) BBPreleaseref(bhgh->batCacheid);
+               throw(MAL,"generator.rangejoin",MAL_MALLOC_FAIL);
+       }
+       ol = (oid*) Tloc(bln,BUNfirst(bln));
+       or = (oid*) Tloc(brn,BUNfirst(brn));
+
+       /* The actual join code for generators be injected here */
+       switch(tpe){
+       case TYPE_bte: // VLTrangejoin(bte,abs); break; 
+       { bte f,f1,l,s; bte *vlow,*vhgh; BUN w;
+       f = *getArgReference_bte(stk,p, 1);
+       l = *getArgReference_bte(stk,p, 2);
+       s = *getArgReference_bte(stk,p, 3);
+       incr = s > 0;
+
+       if ( s == 0 || (f> l && s>0) || (f<l && s < 0))
+               throw(MAL,"generator.rangejoin","Illegal range");
+
+       vlow = (bte*) Tloc(blow,BUNfirst(blow));
+       vhgh = (bte*) Tloc(bhgh,BUNfirst(bhgh));
+       for( ; cnt >0; cnt--, o++,vlow++,vhgh++){
+               f1 = f + floor(abs(*vlow-f)/abs(s)) * s;
+               if ( f1 < *vlow ) f1+= s;
+               w = (BUN) floor(abs(f1-f)/abs(s));
+               for( ; (f1 > *vlow || (li && f1 == *vlow)) && (f1 < *vhgh || 
(ri && f1 == *vhgh)); f1 += s, w++){
+                       if(c == limit)
+                               VLTrangeExpand();
+                       *ol++ = (oid) w;
+                       *or++ = o;
+                       c++;
+               }
+       } }
+       break;
+       case TYPE_sht: VLTrangejoin(sht,abs); break;
+       case TYPE_int: VLTrangejoin(int,abs); break;
+       case TYPE_lng: VLTrangejoin(lng,llabs); break;
+#ifdef HAVE_HGE
+       case TYPE_hge: VLTrangejoin(hge,HGE_ABS); break;
+#endif
+       case TYPE_flt: VLTrangejoin(flt,fabsf); break;
+       case TYPE_dbl: VLTrangejoin(dbl,fabs); break;
+       default:
+               if( tpe == TYPE_timestamp){ 
+                       // it is easier to produce the timestamp series
+                       // then to estimate the possible index
+                       }
+               throw(MAL,"generator.rangejoin","Illegal type");
+       }
+
+       BATsetcount(bln,c);
+       bln->hdense = 1;
+       bln->hseqbase = 0;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to