Changeset: 3e3beae5932c for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3e3beae5932c
Added Files:
monetdb5/modules/mal/Tests/xidlist.mal
monetdb5/modules/mal/Tests/xidlist.stable.err
monetdb5/modules/mal/Tests/xidlist.stable.out
monetdb5/modules/mal/xid.c
monetdb5/modules/mal/xid.h
monetdb5/modules/mal/xid.mal
monetdb5/optimizer/opt_xid.c
monetdb5/optimizer/opt_xid.h
Modified Files:
gdk/gdk.h
monetdb5/modules/mal/Makefile.ag
monetdb5/modules/mal/Tests/All
monetdb5/modules/mal/mal_init.mal
monetdb5/optimizer/Makefile.ag
monetdb5/optimizer/opt_pipes.c
monetdb5/optimizer/opt_support.c
monetdb5/optimizer/opt_support.h
monetdb5/optimizer/opt_wrapper.c
monetdb5/optimizer/optimizer.mal
Branch: default
Log Message:
Experimental OID list compression
The intermediates of queries comprise of large OID sets. The views are
a good method to compress them when a range is involved.
This batch of changes introduce an optimizer to inject a compression/
decompression scheme at runtime for oid,oid BATs. It is rough and
require some debugging on larger sets. Also optimization to ignore
badly compressable oid lists and avoidance of multi-decompression
could be studied.
The compression scheme itself is based on exploitation of local
properties, such as oid-ranges and small oid-sets.
Extension towards other datatypes and compression schemes (e.g.rle)
could be considered.
In general, (de)compression trades cpu from memory footprint,
which means that this scheme is most likely to be of use in
out-of-memory situations.
diffs (truncated from 893 to 300 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -625,7 +625,8 @@ typedef struct {
unsigned int copied:1, /* a copy of an existing map. */
hashash:1,/* the string heap contains hash values */
- forcemap:1; /* force STORE_MMAP even if heap exists */
+ forcemap:1, /* force STORE_MMAP even if heap exists */
+ xidcompressed:1; /* compress heaps */
storage_t storage; /* storage mode (mmap/malloc). */
storage_t newstorage; /* new desired storage mode at re-allocation. */
bte dirty; /* specific heap dirty marker */
diff --git a/monetdb5/modules/mal/Makefile.ag b/monetdb5/modules/mal/Makefile.ag
--- a/monetdb5/modules/mal/Makefile.ag
+++ b/monetdb5/modules/mal/Makefile.ag
@@ -62,6 +62,7 @@ lib_mal = {
urlbox.c urlbox.h \
zorder.c zorder.h \
sample.c sample.h \
+ xid.c xid.h \
calc.c batcalc.c
}
@@ -78,7 +79,7 @@ headers_mal = {
txtsim.mal recycle.mal \
cluster.mal trader.mal \
tokenizer.mal zorder.mal sample.mal \
- calc.mal batcalc.mal
+ calc.mal batcalc.mal xid.mal
}
EXTRA_DIST = attach.mal batExtensions.mal iterator.mal constraints.mal
groupby.mal histogram.mal mal_init.mal manual.mal mkey.mal pcre.mal
profiler.mal recycle.mal remote.mal sabaoth.mal trader.mal transaction.mal
txtsim.mal tablet.mal tablet.h sample.mal mal_mapi.mal mat.mal tokenizer.mal
pqueue.mal calc.mal batcalc.mal
diff --git a/monetdb5/modules/mal/Tests/All b/monetdb5/modules/mal/Tests/All
--- a/monetdb5/modules/mal/Tests/All
+++ b/monetdb5/modules/mal/Tests/All
@@ -67,6 +67,8 @@ cluster00
tokenizer00
zorder
+xidlist
+
#HAVE_RAPTOR?rdf
# might show different output if openssl is compiled without full sha2
diff --git a/monetdb5/modules/mal/Tests/xidlist.mal
b/monetdb5/modules/mal/Tests/xidlist.mal
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/Tests/xidlist.mal
@@ -0,0 +1,26 @@
+# OID list compressions
+
+b:= bat.new(:oid,:oid);
+bat.append(b,0@0);
+bat.append(b,1@0);
+bat.append(b,2@0);
+bat.append(b,3@0);
+bat.append(b,5@0);
+bat.append(b,7@0);
+bat.append(b,70@0);
+bat.append(b,188@0);
+bat.append(b,190@0);
+bat.append(b,192@0);
+bat.append(b,9999@0);
+bat.append(b,50@0);
+bat.append(b,49@0);
+bat.append(b,50@0);
+bat.append(b,50@0);
+bat.append(b,48@0);
+b:= bat.append(b,b);
+
+io.print(b);
+x:= xid.compress(b);
+xid.dump(x);
+z:= xid.decompress(x);
+io.print(z);
diff --git a/monetdb5/modules/mal/Tests/xidlist.stable.err
b/monetdb5/modules/mal/Tests/xidlist.stable.err
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/Tests/xidlist.stable.err
@@ -0,0 +1,34 @@
+stderr of test 'xidlist` in directory 'modules/mal` itself:
+
+
+# 00:20:40 >
+# 00:20:40 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set"
"gdk_dbfarm=/export/scratch1/mk/current//Linux/var/MonetDB" "--set"
"mapi_open=true" "--set" "mapi_port=38158" "--set" "monet_prompt=" "--trace"
"--forcemito" "--set" "mal_listing=2" "--dbname=mTests_modules_mal"
"xidlist.mal"
+# 00:20:40 >
+
+# builtin opt gdk_dbname = demo
+# builtin opt gdk_dbfarm =
/export/scratch1/mk/current//Linux/var/monetdb5/dbfarm
+# builtin opt gdk_debug = 0
+# builtin opt gdk_alloc_map = no
+# builtin opt gdk_vmtrim = yes
+# builtin opt monet_prompt = >
+# builtin opt monet_daemon = no
+# builtin opt mapi_port = 50000
+# builtin opt mapi_open = false
+# builtin opt mapi_autosense = false
+# builtin opt sql_optimizer = default_pipe
+# builtin opt sql_debug = 0
+# cmdline opt gdk_nr_threads = 0
+# cmdline opt gdk_dbfarm = /export/scratch1/mk/current//Linux/var/MonetDB
+# cmdline opt mapi_open = true
+# cmdline opt mapi_port = 38158
+# cmdline opt monet_prompt =
+# cmdline opt mal_listing = 2
+# cmdline opt gdk_dbname = mTests_modules_mal
+mserver5: /export/scratch1/mk/current//package/gdk/gdk_bat.c:2910:
BATassertHeadProps: Assertion `!b->H->revsorted || cmp >= 0' failed.
+
+Aborted
+
+# 00:20:41 >
+# 00:20:41 > "Done."
+# 00:20:41 >
+
diff --git a/monetdb5/modules/mal/Tests/xidlist.stable.out
b/monetdb5/modules/mal/Tests/xidlist.stable.out
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/Tests/xidlist.stable.out
@@ -0,0 +1,112 @@
+stdout of test 'xidlist` in directory 'modules/mal` itself:
+
+
+# 00:20:40 >
+# 00:20:40 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set"
"gdk_dbfarm=/export/scratch1/mk/current//Linux/var/MonetDB" "--set"
"mapi_open=true" "--set" "mapi_port=38158" "--set" "monet_prompt=" "--trace"
"--forcemito" "--set" "mal_listing=2" "--dbname=mTests_modules_mal"
"xidlist.mal"
+# 00:20:40 >
+
+# MonetDB 5 server v11.12.0
+# This is an unreleased version
+# Serving database 'mTests_modules_mal', using 8 threads
+# Compiled for x86_64-unknown-linux-gnu/64bit with 64bit OIDs dynamically
linked
+# Found 15.629 GiB available main-memory.
+# Copyright (c) 1993-July 2008 CWI.
+# Copyright (c) August 2008-2012 MonetDB B.V., all rights reserved
+# Visit http://www.monetdb.org/ for further information
+# Listening for connection requests on mapi:monetdb://vienna.ins.cwi.nl:38158/
+# MonetDB/GIS module loaded
+# MonetDB/JAQL module loaded
+# MonetDB/SQL module loaded
+# MonetDB/DataCell loaded
+function user.main():void;
+# OID list compressions
+ b := bat.new(:oid,:oid);
+ bat.append(b,0@0);
+ bat.append(b,1@0);
+ bat.append(b,2@0);
+ bat.append(b,3@0);
+ bat.append(b,5@0);
+ bat.append(b,7@0);
+ bat.append(b,70@0);
+ bat.append(b,188@0);
+ bat.append(b,190@0);
+ bat.append(b,192@0);
+ bat.append(b,9999@0);
+ bat.append(b,50@0);
+ bat.append(b,49@0);
+ bat.append(b,50@0);
+ bat.append(b,50@0);
+ bat.append(b,48@0);
+ b := bat.append(b,b);
+ io.print(b);
+ x := xid.compress(b);
+ xid.dump(x);
+ z := xid.decompress(x);
+ io.print(z);
+end main;
+#-----------------#
+# h t # name
+# void oid # type
+#-----------------#
+[ 0@0, 0@0 ]
+[ 1@0, 1@0 ]
+[ 2@0, 2@0 ]
+[ 3@0, 3@0 ]
+[ 4@0, 5@0 ]
+[ 5@0, 7@0 ]
+[ 6@0, 70@0 ]
+[ 7@0, 188@0 ]
+[ 8@0, 190@0 ]
+[ 9@0, 192@0 ]
+[ 10@0, 9999@0 ]
+[ 11@0, 50@0 ]
+[ 12@0, 49@0 ]
+[ 13@0, 50@0 ]
+[ 14@0, 50@0 ]
+[ 15@0, 48@0 ]
+[ 16@0, 0@0 ]
+[ 17@0, 1@0 ]
+[ 18@0, 2@0 ]
+[ 19@0, 3@0 ]
+[ 20@0, 5@0 ]
+[ 21@0, 7@0 ]
+[ 22@0, 70@0 ]
+[ 23@0, 188@0 ]
+[ 24@0, 190@0 ]
+[ 25@0, 192@0 ]
+[ 26@0, 9999@0 ]
+[ 27@0, 50@0 ]
+[ 28@0, 49@0 ]
+[ 29@0, 50@0 ]
+[ 30@0, 50@0 ]
+[ 31@0, 48@0 ]
+#xid, 35, tail compress, 32,26, clk 136 ms 2 usec
+column first 27, size 0,
+r:0 3
+p:5
+p:7
+p:70
+p:188
+s:[188] 024
+p:9999
+p:50
+r:49 50
+p:50
+p:48
+r:0 3
+p:5
+p:7
+p:70
+p:188
+s:[188] 024
+p:9999
+p:50
+r:49 50
+p:50
+p:48
+#xid, 35, decompress, 27, 32, clk 136 1 usec
+
+# 00:20:41 >
+# 00:20:41 > "Done."
+# 00:20:41 >
+
diff --git a/monetdb5/modules/mal/mal_init.mal
b/monetdb5/modules/mal/mal_init.mal
--- a/monetdb5/modules/mal/mal_init.mal
+++ b/monetdb5/modules/mal/mal_init.mal
@@ -54,6 +54,7 @@ include aggr;
include array;
include pqueue;
include mkey;
+include xid;
# @-
# Atom extensions
diff --git a/monetdb5/modules/mal/xid.c b/monetdb5/modules/mal/xid.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/xid.c
@@ -0,0 +1,336 @@
+/*
+ *The contents of this file are subject to the MonetDB Public License
+ *Version 1.1 (the "License"); you may not use this file except in
+ *compliance with the License. You may obtain a copy of the License at
+ *http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ *Software distributed under the License is distributed on an "AS IS"
+ *basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ *License for the specific language governing rights and limitations
+ *under the License.
+ *
+ *The Original Code is the MonetDB Database System.
+ *
+ *The Initial Developer of the Original Code is CWI.
+ *Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ *Copyright August 2008-2012 MonetDB B.V.
+ *All Rights Reserved.
+*/
+/*
+ * author Martin Kersten
+ * Light-weight compress oid columns to reduce temporary storage footprint.
+*/
+#include "xid.h"
+
+static long
+XIDencode(XIDcolumn col, oid *p, oid *q)
+{
+ lng v, prev=0;
+ long i=0,scnt =0;
+ //long point=0, range=0,set=0;
+
+ col[++i].tag = XIDBASE;
+ for ( v= *(oid*)p ; p<q; p++, v= *(oid*) p)
+ switch ( (unsigned int) col[i].tag & XIDMASK ){
+ case XIDBASE:
+ col[i].tag = XIDPOINT;
+ col[i].value = v;
+ //mnstr_printf(GDKout,"xidpoint %d %ld\n",i,v);
+ break;
+ case XIDSET:
+ /* watch out for duplicates */
+ if ( v > col[i-1].value && v <= col[i-1].value + 61 &&
+ (col[i].value & ((long)1)<< (v -
col[i-1].value)) == 0){
+ col[i].value |= ( ((long)1)<< (v -
col[i-1].value));
+ scnt++;
+ prev= v;
+ //mnstr_printf(GDKout,"xidset %d %ld\n",i,(v -
col[i-1].value));
+ break;
+ }
+ if (scnt == 1) {
+ col[i].tag = XIDPOINT;
+ col[i].value = prev;
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list