This message was forwarded from developers-l...@monetdb.org.  The MonetDB
mailing lists have moved to monetdb.org.  Please subscribe to
developers-l...@monetdb.org, and unsubscribe from this list.
See: http://mail.monetdb.org/mailman/listinfo/developers-list

Send developers-list mailing list submissions to
        developers-l...@monetdb.org

To subscribe or unsubscribe via the World Wide Web, visit
        http://mail.monetdb.org/mailman/listinfo/developers-list
or, via email, send a message with subject or body 'help' to
        developers-list-requ...@monetdb.org

You can reach the person managing the list at
        developers-list-ow...@monetdb.org

When replying, please edit your Subject line so it is more specific
than "Re: Contents of developers-list digest..."


Today's Topics:

   1. Re: MonetDB: default - The storage cost calculator
      (Stefan Manegold)


----------------------------------------------------------------------

Message: 1
Date: Thu, 25 Oct 2012 21:34:02 +0200 (CEST)
From: Stefan Manegold <stefan.maneg...@cwi.nl>
To: developers-l...@monetdb.org
Cc: checkin-l...@monetdb.org
Subject: Re: MonetDB: default - The storage cost calculator
Message-ID: <njd0iqbah7feqd2oh4m7eykm.1351193574...@email.android.com>
Content-Type: text/plain; charset=UTF-8

This checkin --- strangely dated Sep 21 2012; cf. 
http://dev.monetdb.org/hg/MonetDB/rev/523120729e8d !?? --- breaks compilation:
http://monetdb.cwi.nl/testweb/web/status.php


Martin Kersten <comm...@monetdb.org> wrote:

Changeset: 523120729e8d for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=523120729e8d
Added Files:
        sql/scripts/75_storagemodel.sql
Modified Files:
        monetdb5/mal/mal_profiler.c
        sql/backends/monet5/sql.mx
        sql/scripts/Makefile.ag
Branch: default
Log Message:

The storage cost calculator
This script gives the database administrator insight in the actual
footprint of the persistent tables and the maximum playground used
when indices are introduced upon them.
By chancing the storagemodelinput table directly, the footprint for
yet to be loaded databases can be assessed.

The actual storage footprint of an existing database can be
obtained by the table procuding function storage()
It represents the actual state of affairs, i.e. storage on disk
of columns and foreign key indices, and possible temporary hash indices.
For strings we take a sample to determine their average length.


diffs (truncated from 425 to 300 lines):

diff --git a/monetdb5/mal/mal_profiler.c b/monetdb5/mal/mal_profiler.c
--- a/monetdb5/mal/mal_profiler.c
+++ b/monetdb5/mal/mal_profiler.c
@@ -182,6 +182,10 @@ profilerEvent(int idx, MalBlkPtr mb, Mal
        }
        if (profileCounter[PROFstart].status == 0 && start)
                return;
+       if (myname == 0)
+               myname = putName("profiler", 8);
+       if (getModuleId(getInstrPtr(mb, pc)) == myname)
+               return;
        if (offlineProfiling)
                offlineProfilerEvent(idx, mb, stk, pc,start);
        if (cachedProfiling)
diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx
--- a/sql/backends/monet5/sql.mx
+++ b/sql/backends/monet5/sql.mx
@@ -504,12 +504,13 @@ pattern storage()(
        schema:bat[:oid,:str],
        table:bat[:oid,:str],
        column:bat[:oid,:str],
+       type:bat[:oid,:str],
        location:bat[:oid,:str],
        count:bat[:oid,:lng],
-       capacity:bat[:oid,:lng],
-       width:bat[:oid,:int],
-       size:bat[:oid,:lng],
-       hashsize:bat[:oid,:lng],
+       atomwidth:bat[:oid,:int],
+       columnsize:bat[:oid,:lng],
+       heap:bat[:oid,:lng],
+       indices:bat[:oid,:lng],
        sorted:bat[:oid,:bit])
 address sql_storage
 comment "return a table with storage information ";
@@ -7347,13 +7348,13 @@ str SQLoptimizersUpdate(Client cntxt, Ma
  * Inspection of the actual storage footprint is a recurring question of users.
  * This is modelled as a generic SQL table producing function.
  * create function storage()
- * returns table ("schema" string, "table" string, "column" string, location 
string, "count" bigint, capacity bigint, width int, size bigint, hashsize 
bigint, sorted int)
+ * returns table ("schema" string, "table" string, "column" string, "type" 
string, location string, "count" bigint, width int, columnsize bigint, heapsize 
bigint indices bigint, sorted int)
  * external name sql.storage;
  */
 str
 sql_storage(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
-       BAT *sch, *tab, *col, *loc, *cnt, *cap, *atom, *size, *aux, *sort;
+       BAT *sch, *tab, *col, *type, *loc, *cnt, *atom, *size, *heap, *indices, 
*sort;
        mvc *m = NULL;
        str msg = getSQLContext(cntxt,mb, &m, NULL);
        sql_trans *tr = m->session->tr;
@@ -7362,13 +7363,14 @@ sql_storage(Client cntxt, MalBlkPtr mb, 
        int *rsch = (int*) getArgReference(stk,pci,0);
        int *rtab = (int*) getArgReference(stk,pci,1);
        int *rcol = (int*) getArgReference(stk,pci,2);
-       int *rloc = (int*) getArgReference(stk,pci,3);
-       int *rcnt = (int*) getArgReference(stk,pci,4);
-       int *rcap = (int*) getArgReference(stk,pci,5);
+       int *rtype = (int*) getArgReference(stk,pci,3);
+       int *rloc = (int*) getArgReference(stk,pci,4);
+       int *rcnt = (int*) getArgReference(stk,pci,5);
        int *ratom = (int*) getArgReference(stk,pci,6);
        int *rsize = (int*) getArgReference(stk,pci,7);
-       int *raux = (int*) getArgReference(stk,pci,8);
-       int *rsort = (int*) getArgReference(stk,pci,9);
+       int *rheap = (int*) getArgReference(stk,pci,8);
+       int *rindices = (int*) getArgReference(stk,pci,9);
+       int *rsort = (int*) getArgReference(stk,pci,10);
 
        if (msg)
                return msg;
@@ -7379,31 +7381,34 @@ sql_storage(Client cntxt, MalBlkPtr mb, 
        BATseqbase(tab, 0);
        col = BATnew(TYPE_void,TYPE_str, 0);
        BATseqbase(col, 0);
+       type = BATnew(TYPE_void,TYPE_str, 0);
+       BATseqbase(type, 0);
        loc = BATnew(TYPE_void,TYPE_str, 0);
        BATseqbase(loc, 0);
        cnt = BATnew(TYPE_void,TYPE_lng, 0);
        BATseqbase(cnt, 0);
-       cap = BATnew(TYPE_void,TYPE_lng, 0);
-       BATseqbase(cap, 0);
        atom = BATnew(TYPE_void,TYPE_int, 0);
        BATseqbase(atom, 0);
        size = BATnew(TYPE_void,TYPE_lng, 0);
        BATseqbase(size, 0);
-       aux = BATnew(TYPE_void,TYPE_lng, 0);
-       BATseqbase(aux, 0);
+       heap = BATnew(TYPE_void,TYPE_lng, 0);
+       BATseqbase(heap, 0);
+       indices = BATnew(TYPE_void,TYPE_lng, 0);
+       BATseqbase(indices, 0);
        sort = BATnew(TYPE_void,TYPE_bit, 0);
        BATseqbase(sort, 0);
-       if ( sch == NULL || tab == NULL || col == NULL || loc == NULL || sort 
== NULL ||
-                cnt == NULL || cap == NULL || atom == NULL || size == NULL || 
aux == NULL){
+       if ( sch == NULL || tab == NULL || col == NULL || type == NULL || loc 
== NULL || sort == NULL ||
+                cnt == NULL || atom == NULL || size == NULL || heap == NULL 
||indices == NULL){
                if ( sch ) BBPreleaseref(sch->batCacheid);
                if ( tab ) BBPreleaseref(tab->batCacheid);
                if ( col ) BBPreleaseref(col->batCacheid);
                if ( loc ) BBPreleaseref(loc->batCacheid);
                if ( cnt ) BBPreleaseref(cnt->batCacheid);
-               if ( cap ) BBPreleaseref(cap->batCacheid);
+               if ( type ) BBPreleaseref(type->batCacheid);
                if ( atom ) BBPreleaseref(atom->batCacheid);
                if ( size ) BBPreleaseref(size->batCacheid);
-               if ( aux ) BBPreleaseref(aux->batCacheid);
+               if ( heap ) BBPreleaseref(heap->batCacheid);
+               if ( indices ) BBPreleaseref(indices->batCacheid);
                if ( sort ) BBPreleaseref(sort->batCacheid);
                throw(SQL,"sql.storage", MAL_MALLOC_FAIL);
        }
@@ -7412,7 +7417,8 @@ sql_storage(Client cntxt, MalBlkPtr mb, 
                sql_schema *s= (sql_schema*) nsch->data;
                if ( isalpha((int)b->name[0]) )
 
-               if (s->tables.set) for(ntab= (s)->tables.set->h ;ntab; ntab= 
ntab->next){
+               if (s->tables.set) 
+               for(ntab= (s)->tables.set->h ;ntab; ntab= ntab->next){
                        sql_base *bt= ntab->data;
                        sql_table *t= (sql_table*) bt;
                        if (isTable(t))
@@ -7426,28 +7432,49 @@ sql_storage(Client cntxt, MalBlkPtr mb, 
                                sch = BUNappend(sch, b->name, FALSE);
                                tab = BUNappend(tab, bt->name, FALSE);
                                col = BUNappend(col, bc->name, FALSE);
+                               type = BUNappend(type, c->type.type->sqlname, 
FALSE);
 
                                /*printf(" cnt "BUNFMT, BATcount(bn));*/
                                sz= BATcount(bn);
                                cnt = BUNappend(cnt, &sz, FALSE);
-                               /*printf(" cap "BUNFMT, BATcapacity(bn));*/
-                               sz= BATcapacity(bn);
-                               cap = BUNappend(cap, &sz, FALSE);
 
                                /*printf(" loc %s", 
BBP_physical(bn->batCacheid));*/
                                loc = BUNappend(loc, 
BBP_physical(bn->batCacheid), FALSE);
                                /*printf(" width %d", bn->T->width);*/
                                w= bn->T->width;
+                               if ( bn->ttype == TYPE_str){
+                                       BUN p,q;
+                                       double sum=0;
+                                       BATiter bi = bat_iterator(bn);
+                                       lng cnt1,cnt2= cnt1=(lng) BATcount(bn);
+                                       
+                                       /* just take a sample */
+                                       if ( cnt1 > 512)
+                                               cnt1 = cnt2 = 512;
+                                       BATloop(bn,p,q){
+                                               str s = BUNtail(bi,p);
+                                               if( s != NULL && strcmp(s, 
str_nil))
+                                                       sum += (int) strlen(s);
+                                               if ( --cnt1 <= 0)
+                                                       break;
+                                       }
+                                       if ( cnt2)
+                                               w = (int) (sum/cnt2);
+                               }
                                atom = BUNappend(atom, &w, FALSE);
-                               /*printf(" size "BUNFMT, 
tailsize(bn,BATcount(bn)) + (bn->T->vheap? bn->T->vheap->size:0));*/
-                               sz = tailsize(bn,BATcapacity(bn)) + 
(bn->T->vheap? bn->T->vheap->size:0);
-                               sz += headsize(bn,BATcapacity(bn)) + 
(bn->H->vheap? bn->H->vheap->size:0);
+
+                               sz = tailsize(bn,BATcount(bn));
+                               sz += headsize(bn,BATcount(bn));
                                size = BUNappend(size, &sz, FALSE);
                                
+                               sz = bn->T->vheap? bn->T->vheap->size:0;
+                               sz += bn->H->vheap? bn->H->vheap->size:0;
+                               heap = BUNappend(heap, &sz, FALSE);
+
                                sz =  bn->T->hash?bn->T->hash->heap->size:0;
                                sz += bn->H->hash?bn->H->hash->heap->size:0;
-                               aux = BUNappend(aux, &sz, FALSE);
-                               /*printf(" auxsize "BUNFMT, 
bn->T->hash?bn->T->hash->heap->size:0);*/
+                               indices = BUNappend(indices, &sz, FALSE);
+                               /*printf(" indices "BUNFMT, 
bn->T->hash?bn->T->hash->heap->size:0);*/
                                /*printf("\n");*/
 
                                w =  BATtordered(bn);
@@ -7468,28 +7495,49 @@ sql_storage(Client cntxt, MalBlkPtr mb, 
                                                        sch = BUNappend(sch, 
b->name, FALSE);
                                                        tab = BUNappend(tab, 
bt->name, FALSE);
                                                        col = BUNappend(col, 
bc->name, FALSE);
+                                                       type = BUNappend(type, 
"oid", FALSE);
                        
                                                        /*printf(" cnt "BUNFMT, 
BATcount(bn));*/
                                                        sz= BATcount(bn);
                                                        cnt = BUNappend(cnt, 
&sz, FALSE);
-                                                       /*printf(" cap "BUNFMT, 
BATcapacity(bn));*/
-                                                       sz= BATcapacity(bn);
-                                                       cap = BUNappend(cap, 
&sz, FALSE);
                        
                                                        /*printf(" loc %s", 
BBP_physical(bn->batCacheid));*/
                                                        loc = BUNappend(loc, 
BBP_physical(bn->batCacheid), FALSE);
                                                        /*printf(" width %d", 
bn->T->width);*/
                                                        w= bn->T->width;
+                                                       if ( bn->ttype == 
TYPE_str){
+                                                               BUN p,q;
+                                                               double sum=0;
+                                                               BATiter bi = 
bat_iterator(bn);
+                                                               lng cnt1, cnt2= 
cnt1 = BATcount(bn);
+                                                               
+                                                               /* just take a 
sample */
+                                                               if ( cnt1 > 512)
+                                                                       cnt1 = 
cnt2 = 512;
+                                                               BATloop(bn,p,q){
+                                                                       str s = 
BUNtail(bi,p);
+                                                                       if( s 
!= NULL && strcmp(s, str_nil))
+                                                                               
sum += (int) strlen(s);
+                                                                       if ( 
--cnt1 <= 0)
+                                                                               
break;
+                                                               }
+                                                               if ( cnt2)
+                                                                       w = 
(int) (sum/cnt2);
+                                                       }
                                                        atom = BUNappend(atom, 
&w, FALSE);
                                                        /*printf(" size 
"BUNFMT, tailsize(bn,BATcount(bn)) + (bn->T->vheap? bn->T->vheap->size:0));*/
-                                                       sz = 
tailsize(bn,BATcapacity(bn)) + (bn->T->vheap? bn->T->vheap->size:0);
-                                                       sz += 
headsize(bn,BATcapacity(bn)) + (bn->H->vheap? bn->H->vheap->size:0);
+                                                       sz = 
tailsize(bn,BATcount(bn));
+                                                       sz += 
headsize(bn,BATcount(bn));
                                                        size = BUNappend(size, 
&sz, FALSE);
                                                        
+                                                       sz = bn->T->vheap? 
bn->T->vheap->size:0;
+                                                       sz += bn->H->vheap? 
bn->H->vheap->size:0;
+                                                       heap = BUNappend(heap, 
&sz, FALSE);
+
                                                        sz =  
bn->T->hash?bn->T->hash->heap->size:0;
                                                        sz += 
bn->H->hash?bn->H->hash->heap->size:0;
-                                                       aux = BUNappend(aux, 
&sz, FALSE);
-                                                       /*printf(" auxsize 
"BUNFMT, bn->T->hash?bn->T->hash->heap->size:0);*/
+                                                       indices = 
BUNappend(indices, &sz, FALSE);
+                                                       /*printf(" indices 
"BUNFMT, bn->T->hash?bn->T->hash->heap->size:0);*/
                                                        /*printf("\n");*/
                                                        w =  BATtordered(bn);
                                                        sort = BUNappend(sort, 
&w, FALSE);
@@ -7504,11 +7552,12 @@ sql_storage(Client cntxt, MalBlkPtr mb, 
        BBPkeepref(*rtab = tab->batCacheid);
        BBPkeepref(*rcol = col->batCacheid);
        BBPkeepref(*rloc = loc->batCacheid);
+       BBPkeepref(*rtype = type->batCacheid);
        BBPkeepref(*rcnt = cnt->batCacheid);
-       BBPkeepref(*rcap = cap->batCacheid);
        BBPkeepref(*ratom = atom->batCacheid);
        BBPkeepref(*rsize = size->batCacheid);
-       BBPkeepref(*raux = aux->batCacheid);
+       BBPkeepref(*rheap = heap->batCacheid);
+       BBPkeepref(*rindices = indices->batCacheid);
        BBPkeepref(*rsort = sort->batCacheid);
        return MAL_SUCCEED;
 }
diff --git a/sql/scripts/75_storagemodel.sql b/sql/scripts/75_storagemodel.sql
new file mode 100644
--- /dev/null
+++ b/sql/scripts/75_storagemodel.sql
@@ -0,0 +1,154 @@
+-- The contents of this file are subject to the MonetDB Public License
+-- Version 1.1 (the "License"); you may not use this file except in
+-- compliance with the License. You may obtain a copy of the License at
+-- http://www.monetdb.org/Legal/MonetDBLicense
+--
+-- Software distributed under the License is distributed on an "AS IS"
+-- basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+-- License for the specific language governing rights and limitations
+-- under the License.
+--
+-- The Original Code is the MonetDB Database System.
+--
+-- The Initial Developer of the Original Code is CWI.
+-- Copyright August 2008-2012 MonetDB B.V.
+-- All Rights Reserved.
+
+
+-- Author M.Kersten
+-- This script gives the database administrator insight in the actual
+-- footprint of the persistent tables and the maximum playground used
+-- when indices are introduced upon them.
+-- By chancing the storagemodelinput table directly, the footprint for
+-- yet to be loaded databases can be assessed.
+
+-- The actual storage footprint of an existing database can be 
+-- obtained by the table procuding function storage()
+-- It represents the actual state of affairs, i.e. storage on disk
+-- of columns and foreign key indices, and possible temporary hash indices.
+-- For strings we take a sample to determine their average length.
+
+create function storage()
+returns table ("schema" string, "table" string, "column" string, "type" 
string, location string, "count" bigint, typewidth int, columnsize bigint, 
heapsize bigint, indices bigint, sorted boolean)
+external name sql.storage;
+
+-- To determine the footprint of an arbitrary database, we first have
+-- to define its schema, followed by an indication of the properties of each 
column.
+-- A storage model input table for the size prediction is shown below:
+create table storagemodelinput(
+       "schema" string,
+       "table" string,
+       "column" string,
_______________________________________________
checkin-list mailing list
checkin-l...@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list


------------------------------

_______________________________________________
developers-list mailing list
developers-l...@monetdb.org
http://mail.monetdb.org/mailman/listinfo/developers-list


End of developers-list Digest, Vol 2, Issue 11
**********************************************

------------------------------------------------------------------------------
Everyone hates slow websites. So do we.
Make your web apps faster with AppDynamics
Download AppDynamics Lite for free today:
http://p.sf.net/sfu/appdyn_sfd2d_oct
_______________________________________________
Monetdb-developers mailing list
Monetdb-developers@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/monetdb-developers

Reply via email to