Changeset: 3e3beae5932c for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3e3beae5932c
Added Files:
        monetdb5/modules/mal/Tests/xidlist.mal
        monetdb5/modules/mal/Tests/xidlist.stable.err
        monetdb5/modules/mal/Tests/xidlist.stable.out
        monetdb5/modules/mal/xid.c
        monetdb5/modules/mal/xid.h
        monetdb5/modules/mal/xid.mal
        monetdb5/optimizer/opt_xid.c
        monetdb5/optimizer/opt_xid.h
Modified Files:
        gdk/gdk.h
        monetdb5/modules/mal/Makefile.ag
        monetdb5/modules/mal/Tests/All
        monetdb5/modules/mal/mal_init.mal
        monetdb5/optimizer/Makefile.ag
        monetdb5/optimizer/opt_pipes.c
        monetdb5/optimizer/opt_support.c
        monetdb5/optimizer/opt_support.h
        monetdb5/optimizer/opt_wrapper.c
        monetdb5/optimizer/optimizer.mal
Branch: default
Log Message:

Experimental OID list compression
The intermediates of queries comprise of large OID sets. The views are
a good method to compress them when a range is involved.

This batch of changes introduce an optimizer to inject a compression/
decompression scheme at runtime for oid,oid BATs. It is rough and
require some debugging on larger sets. Also optimization to ignore
badly compressable oid lists and avoidance of multi-decompression
could be studied.

The compression scheme itself is based on exploitation of local
properties, such as oid-ranges and small oid-sets.
Extension towards other datatypes and compression schemes (e.g.rle)
could be considered.

In general, (de)compression trades cpu from memory footprint,
which means that this scheme is most likely to be of use in
out-of-memory situations.


diffs (truncated from 893 to 300 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -625,7 +625,8 @@ typedef struct {
 
        unsigned int copied:1,  /* a copy of an existing map. */
                      hashash:1,/* the string heap contains hash values */
-                     forcemap:1;  /* force STORE_MMAP even if heap exists */
+                     forcemap:1,  /* force STORE_MMAP even if heap exists */
+                         xidcompressed:1; /* compress heaps */
        storage_t storage;      /* storage mode (mmap/malloc). */
        storage_t newstorage;   /* new desired storage mode at re-allocation. */
        bte dirty;              /* specific heap dirty marker */
diff --git a/monetdb5/modules/mal/Makefile.ag b/monetdb5/modules/mal/Makefile.ag
--- a/monetdb5/modules/mal/Makefile.ag
+++ b/monetdb5/modules/mal/Makefile.ag
@@ -62,6 +62,7 @@ lib_mal = {
                urlbox.c urlbox.h \
                zorder.c zorder.h \
                sample.c sample.h \
+               xid.c xid.h \
                calc.c batcalc.c
 }
 
@@ -78,7 +79,7 @@ headers_mal = {
                txtsim.mal recycle.mal \
                cluster.mal trader.mal \
                tokenizer.mal zorder.mal sample.mal \
-               calc.mal batcalc.mal
+               calc.mal batcalc.mal xid.mal
 }
 
 EXTRA_DIST = attach.mal batExtensions.mal iterator.mal constraints.mal 
groupby.mal histogram.mal mal_init.mal manual.mal mkey.mal pcre.mal 
profiler.mal recycle.mal remote.mal sabaoth.mal trader.mal transaction.mal 
txtsim.mal tablet.mal tablet.h sample.mal mal_mapi.mal mat.mal tokenizer.mal 
pqueue.mal calc.mal batcalc.mal
diff --git a/monetdb5/modules/mal/Tests/All b/monetdb5/modules/mal/Tests/All
--- a/monetdb5/modules/mal/Tests/All
+++ b/monetdb5/modules/mal/Tests/All
@@ -67,6 +67,8 @@ cluster00
 tokenizer00
 zorder
 
+xidlist
+
 #HAVE_RAPTOR?rdf
 
 # might show different output if openssl is compiled without full sha2
diff --git a/monetdb5/modules/mal/Tests/xidlist.mal 
b/monetdb5/modules/mal/Tests/xidlist.mal
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/Tests/xidlist.mal
@@ -0,0 +1,26 @@
+# OID list compressions
+
+b:= bat.new(:oid,:oid);
+bat.append(b,0@0);
+bat.append(b,1@0);
+bat.append(b,2@0);
+bat.append(b,3@0);
+bat.append(b,5@0);
+bat.append(b,7@0);
+bat.append(b,70@0);
+bat.append(b,188@0);
+bat.append(b,190@0);
+bat.append(b,192@0);
+bat.append(b,9999@0);
+bat.append(b,50@0);
+bat.append(b,49@0);
+bat.append(b,50@0);
+bat.append(b,50@0);
+bat.append(b,48@0);
+b:= bat.append(b,b);
+
+io.print(b);
+x:= xid.compress(b);
+xid.dump(x);
+z:= xid.decompress(x);
+io.print(z);
diff --git a/monetdb5/modules/mal/Tests/xidlist.stable.err 
b/monetdb5/modules/mal/Tests/xidlist.stable.err
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/Tests/xidlist.stable.err
@@ -0,0 +1,34 @@
+stderr of test 'xidlist` in directory 'modules/mal` itself:
+
+
+# 00:20:40 >  
+# 00:20:40 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"gdk_dbfarm=/export/scratch1/mk/current//Linux/var/MonetDB" "--set" 
"mapi_open=true" "--set" "mapi_port=38158" "--set" "monet_prompt=" "--trace" 
"--forcemito" "--set" "mal_listing=2" "--dbname=mTests_modules_mal" 
"xidlist.mal"
+# 00:20:40 >  
+
+# builtin opt  gdk_dbname = demo
+# builtin opt  gdk_dbfarm = 
/export/scratch1/mk/current//Linux/var/monetdb5/dbfarm
+# builtin opt  gdk_debug = 0
+# builtin opt  gdk_alloc_map = no
+# builtin opt  gdk_vmtrim = yes
+# builtin opt  monet_prompt = >
+# builtin opt  monet_daemon = no
+# builtin opt  mapi_port = 50000
+# builtin opt  mapi_open = false
+# builtin opt  mapi_autosense = false
+# builtin opt  sql_optimizer = default_pipe
+# builtin opt  sql_debug = 0
+# cmdline opt  gdk_nr_threads = 0
+# cmdline opt  gdk_dbfarm = /export/scratch1/mk/current//Linux/var/MonetDB
+# cmdline opt  mapi_open = true
+# cmdline opt  mapi_port = 38158
+# cmdline opt  monet_prompt = 
+# cmdline opt  mal_listing = 2
+# cmdline opt  gdk_dbname = mTests_modules_mal
+mserver5: /export/scratch1/mk/current//package/gdk/gdk_bat.c:2910: 
BATassertHeadProps: Assertion `!b->H->revsorted || cmp >= 0' failed.
+
+Aborted
+
+# 00:20:41 >  
+# 00:20:41 >  "Done."
+# 00:20:41 >  
+
diff --git a/monetdb5/modules/mal/Tests/xidlist.stable.out 
b/monetdb5/modules/mal/Tests/xidlist.stable.out
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/Tests/xidlist.stable.out
@@ -0,0 +1,112 @@
+stdout of test 'xidlist` in directory 'modules/mal` itself:
+
+
+# 00:20:40 >  
+# 00:20:40 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"gdk_dbfarm=/export/scratch1/mk/current//Linux/var/MonetDB" "--set" 
"mapi_open=true" "--set" "mapi_port=38158" "--set" "monet_prompt=" "--trace" 
"--forcemito" "--set" "mal_listing=2" "--dbname=mTests_modules_mal" 
"xidlist.mal"
+# 00:20:40 >  
+
+# MonetDB 5 server v11.12.0
+# This is an unreleased version
+# Serving database 'mTests_modules_mal', using 8 threads
+# Compiled for x86_64-unknown-linux-gnu/64bit with 64bit OIDs dynamically 
linked
+# Found 15.629 GiB available main-memory.
+# Copyright (c) 1993-July 2008 CWI.
+# Copyright (c) August 2008-2012 MonetDB B.V., all rights reserved
+# Visit http://www.monetdb.org/ for further information
+# Listening for connection requests on mapi:monetdb://vienna.ins.cwi.nl:38158/
+# MonetDB/GIS module loaded
+# MonetDB/JAQL module loaded
+# MonetDB/SQL module loaded
+# MonetDB/DataCell loaded
+function user.main():void;
+# OID list compressions 
+    b := bat.new(:oid,:oid);
+    bat.append(b,0@0);
+    bat.append(b,1@0);
+    bat.append(b,2@0);
+    bat.append(b,3@0);
+    bat.append(b,5@0);
+    bat.append(b,7@0);
+    bat.append(b,70@0);
+    bat.append(b,188@0);
+    bat.append(b,190@0);
+    bat.append(b,192@0);
+    bat.append(b,9999@0);
+    bat.append(b,50@0);
+    bat.append(b,49@0);
+    bat.append(b,50@0);
+    bat.append(b,50@0);
+    bat.append(b,48@0);
+    b := bat.append(b,b);
+    io.print(b);
+    x := xid.compress(b);
+    xid.dump(x);
+    z := xid.decompress(x);
+    io.print(z);
+end main;
+#-----------------#
+# h    t         # name
+# void oid       # type
+#-----------------#
+[ 0@0,   0@0     ]
+[ 1@0,   1@0     ]
+[ 2@0,   2@0     ]
+[ 3@0,   3@0     ]
+[ 4@0,   5@0     ]
+[ 5@0,   7@0     ]
+[ 6@0,   70@0    ]
+[ 7@0,   188@0   ]
+[ 8@0,   190@0   ]
+[ 9@0,   192@0   ]
+[ 10@0,          9999@0  ]
+[ 11@0,          50@0    ]
+[ 12@0,          49@0    ]
+[ 13@0,          50@0    ]
+[ 14@0,          50@0    ]
+[ 15@0,          48@0    ]
+[ 16@0,          0@0     ]
+[ 17@0,          1@0     ]
+[ 18@0,          2@0     ]
+[ 19@0,          3@0     ]
+[ 20@0,          5@0     ]
+[ 21@0,          7@0     ]
+[ 22@0,          70@0    ]
+[ 23@0,          188@0   ]
+[ 24@0,          190@0   ]
+[ 25@0,          192@0   ]
+[ 26@0,          9999@0  ]
+[ 27@0,          50@0    ]
+[ 28@0,          49@0    ]
+[ 29@0,          50@0    ]
+[ 30@0,          50@0    ]
+[ 31@0,          48@0    ]
+#xid, 35, tail compress, 32,26,  clk 136 ms 2 usec
+column first 27, size 0, 
+r:0 3
+p:5
+p:7
+p:70
+p:188
+s:[188] 024
+p:9999
+p:50
+r:49 50
+p:50
+p:48
+r:0 3
+p:5
+p:7
+p:70
+p:188
+s:[188] 024
+p:9999
+p:50
+r:49 50
+p:50
+p:48
+#xid, 35, decompress, 27, 32, clk 136 1 usec
+
+# 00:20:41 >  
+# 00:20:41 >  "Done."
+# 00:20:41 >  
+
diff --git a/monetdb5/modules/mal/mal_init.mal 
b/monetdb5/modules/mal/mal_init.mal
--- a/monetdb5/modules/mal/mal_init.mal
+++ b/monetdb5/modules/mal/mal_init.mal
@@ -54,6 +54,7 @@ include aggr;
 include array;
 include pqueue;
 include mkey;
+include xid;
 
 # @-
 # Atom extensions
diff --git a/monetdb5/modules/mal/xid.c b/monetdb5/modules/mal/xid.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/xid.c
@@ -0,0 +1,336 @@
+/*
+ *The contents of this file are subject to the MonetDB Public License
+ *Version 1.1 (the "License"); you may not use this file except in
+ *compliance with the License. You may obtain a copy of the License at
+ *http://www.monetdb.org/Legal/MonetDBLicense
+ *
+ *Software distributed under the License is distributed on an "AS IS"
+ *basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ *License for the specific language governing rights and limitations
+ *under the License.
+ *
+ *The Original Code is the MonetDB Database System.
+ *
+ *The Initial Developer of the Original Code is CWI.
+ *Portions created by CWI are Copyright (C) 1997-July 2008 CWI.
+ *Copyright August 2008-2012 MonetDB B.V.
+ *All Rights Reserved.
+*/
+/*
+ * author Martin Kersten
+ * Light-weight compress oid columns to reduce temporary storage footprint.
+*/
+#include "xid.h"
+
+static long
+XIDencode(XIDcolumn col, oid *p, oid *q)
+{
+       lng v, prev=0;
+       long i=0,scnt =0; 
+       //long point=0, range=0,set=0;
+
+       col[++i].tag = XIDBASE;
+       for ( v= *(oid*)p ; p<q; p++, v= *(oid*) p)
+               switch ( (unsigned int) col[i].tag & XIDMASK ){
+               case XIDBASE:
+                       col[i].tag = XIDPOINT;
+                       col[i].value = v;
+                       //mnstr_printf(GDKout,"xidpoint %d %ld\n",i,v);
+                       break;
+               case XIDSET:
+                       /* watch out for duplicates */
+                       if ( v > col[i-1].value && v <= col[i-1].value + 61 &&
+                               (col[i].value &  ((long)1)<< (v - 
col[i-1].value)) == 0){
+                               col[i].value |=  ( ((long)1)<< (v - 
col[i-1].value));
+                               scnt++;
+                               prev= v;
+                               //mnstr_printf(GDKout,"xidset %d %ld\n",i,(v - 
col[i-1].value));
+                               break;
+                       }
+                       if (scnt == 1) { 
+                               col[i].tag = XIDPOINT;
+                               col[i].value = prev;
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to