MonetDB: default - Merge with Feb2013 branch.

Sjoerd Mullender Mon, 25 Feb 2013 01:51:17 -0800

Changeset: bf06921538d9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=bf06921538d9
Removed Files:
        testing/subprocess26.py
Modified Files:
        gdk/gdk_group.c
        sql/scripts/75_storagemodel.sql
        sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
        sql/test/testdb-upgrade/Tests/upgrade.stable.out
        testing/Makefile.ag
        testing/Mtest.py.in
Branch: default
Log Message:


Merge with Feb2013 branch.


diffs (truncated from 2046 to 300 lines):

diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -64,9 +64,9 @@
  *
  * Otherwise we build a partial hash table on the fly.
  *
- * A decision should be made on the order in which grouping occurs Let
- * |b| have << different values than |g| then the linked lists gets
- * extremely long, leading to a n^2 algorithm.
+ * A decision should be made on the order in which grouping occurs.
+ * Let |b| have << different values than |g| then the linked lists
+ * gets extremely long, leading to a n^2 algorithm.
  * At the MAL level, the multigroup function would perform the dynamic
  * optimization.
  */
@@ -451,20 +451,17 @@ BATgroup_internal(BAT **groups, BAT **ex
                        GRPnotfound();
                }
        } else if (b->T->hash) {
-               bit gc = g && (g->tsorted || g->trevsorted);
-
-               /* we already have a hash table on b;
-                * we also exploit if g is clustered */
+               /* we already have a hash table on b */
                ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
                                  "g=%s#" BUNFMT ","
                                  "e=%s#" BUNFMT ","
                                  "h=%s#" BUNFMT ",subsorted=%d): "
-                                 "use existing hash table%s\n",
+                                 "use existing hash table\n",
                                  BATgetId(b), BATcount(b),
                                  g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
                                  e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
                                  h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0,
-                                 subsorted, gc ? " (g clustered)" : "");
+                                 subsorted);
                hs = b->T->hash;
                gn->tsorted = 1; /* be optimistic */
                for (r = BUNfirst(b), p = r, q = r + BATcount(b); p < q; p++) {
@@ -473,45 +470,13 @@ BATgroup_internal(BAT **groups, BAT **ex
                         * HASHloop: the difference is that we only
                         * consider BUNs smaller than the one we're
                         * looking up (p), and that we also consider
-                        * the input groups;
-                        * we also exploit if g is clustered */
-                       /* skip irrelevant BUNs after the current
-                        * BUNs; exploit that hash-table links
-                        * backwards through BAT */
-                       for (hb = HASHget(hs,HASHprobe(hs, v));
-                            hb != HASHnil(hs)&& hb >= p;
-                            hb = HASHgetlink(hs,hb)) {
-                               assert( HASHgetlink(hs,hb) == HASHnil(hs)
-                                      || HASHgetlink(hs,hb) < hb);
-                       }
-                       if (gc) {
-                               for (;
-                                    hb != HASHnil(hs) && grps[hb - r] == 
grps[p - r];
-                                    hb = HASHgetlink(hs,hb)) {
-                                       assert( HASHgetlink(hs,hb) == 
HASHnil(hs)
-                                              || HASHgetlink(hs,hb) < hb);
-                                       if (cmp(v, BUNtail(bi, hb)) == 0) {
-                                               oid grp = ngrps[hb - r];
-                                               ngrps[p - r] = grp;
-                                               if (histo)
-                                                       cnts[grp]++;
-                                               if (gn->tsorted &&
-                                                   grp != ngrp - 1)
-                                                       gn->tsorted = 0;
-                                               break;
-                                       }
-                               }
-                               if (hb != HASHnil(hs) &&
-                                   grps[hb - r] != grps[p - r]) {
-                                       /* we didn't assign a group
-                                        * yet */
-                                       hb = HASHnil(hs);
-                               }
-                       } else if (grps) {
-                               for (;
+                        * the input groups */
+                       if (grps) {
+                               for (hb = HASHget(hs, HASHprobe(hs, v));
                                     hb != HASHnil(hs);
-                                    hb = HASHgetlink(hs,hb)) {
-                                       if (grps[hb - r] == grps[p - r] &&
+                                    hb = HASHgetlink(hs, hb)) {
+                                       if (hb < p &&
+                                           grps[hb - r] == grps[p - r] &&
                                            cmp(v, BUNtail(bi, hb)) == 0) {
                                                oid grp = ngrps[hb - r];
                                                ngrps[p - r] = grp;
@@ -524,10 +489,11 @@ BATgroup_internal(BAT **groups, BAT **ex
                                        }
                                }
                        } else {
-                               for (;
+                               for (hb = HASHget(hs, HASHprobe(hs, v));
                                     hb != HASHnil(hs);
-                                    hb = HASHgetlink(hs,hb)) {
-                                       if (cmp(v, BUNtail(bi, hb)) == 0) {
+                                    hb = HASHgetlink(hs, hb)) {
+                                       if (hb < p &&
+                                           cmp(v, BUNtail(bi, hb)) == 0) {
                                                oid grp = ngrps[hb - r];
                                                ngrps[p - r] = grp;
                                                if (histo)
diff --git a/sql/scripts/75_storagemodel.sql b/sql/scripts/75_storagemodel.sql
--- a/sql/scripts/75_storagemodel.sql
+++ b/sql/scripts/75_storagemodel.sql
@@ -22,20 +22,20 @@
 -- By chancing the storagemodelinput table directly, the footprint for
 -- yet to be loaded databases can be assessed.
 
--- The actual storage footprint of an existing database can be 
+-- The actual storage footprint of an existing database can be
 -- obtained by the table procuding function storage()
 -- It represents the actual state of affairs, i.e. storage on disk
 -- of columns and foreign key indices, and possible temporary hash indices.
 -- For strings we take a sample to determine their average length.
 
-create function storage()
+create function sys.storage()
 returns table ("schema" string, "table" string, "column" string, "type" 
string, location string, "count" bigint, typewidth int, columnsize bigint, 
heapsize bigint, indices bigint, sorted boolean)
 external name sql.storage;
 
 -- To determine the footprint of an arbitrary database, we first have
 -- to define its schema, followed by an indication of the properties of each 
column.
 -- A storage model input table for the size prediction is shown below:
-create table storagemodelinput(
+create table sys.storagemodelinput(
        "schema" string,
        "table" string,
        "column" string,
@@ -47,21 +47,21 @@ create table storagemodelinput(
        "reference" boolean,-- used as foreign key reference
        "sorted" boolean        -- if set there is no need for an index
 );
-update _tables
+update sys._tables
        set system = true
        where name = 'storagemodelinput'
-               and schema_id = (select id from schemas where name = 'sys');
+               and schema_id = (select id from sys.schemas where name = 'sys');
 -- this table can be adjusted to reflect the anticipated final database size
 
 -- The model input can be derived from the current database using
-create procedure storagemodelinit()
+create procedure sys.storagemodelinit()
 begin
-       delete from storagemodelinput;
+       delete from sys.storagemodelinput;
 
-       insert into storagemodelinput 
-       select X."schema", X."table", X."column", X."type", X.typewidth, 
X.count, 0, X.typewidth, false, X.sorted from storage() X;
+       insert into sys.storagemodelinput
+       select X."schema", X."table", X."column", X."type", X.typewidth, 
X.count, 0, X.typewidth, false, X.sorted from sys.storage() X;
 
-       update storagemodelinput
+       update sys.storagemodelinput
        set reference = true
        where concat(concat("schema","table"), "column") in (
                SELECT concat( concat("fkschema"."name", "fktable"."name"), 
"fkkeycol"."name" )
@@ -74,17 +74,17 @@ begin
                        AND "fkschema"."id" = "fktable"."schema_id"
                        AND "fkkey"."rkey" > -1);
 
-       update storagemodelinput
+       update sys.storagemodelinput
        set "distinct" = "count" -- assume all distinct
        where "type" = 'varchar' or "type"='clob';
 end;
 
--- The predicted storage footprint of the complete database 
+-- The predicted storage footprint of the complete database
 -- determines the amount of diskspace needed for persistent storage
 -- and the upperbound when all possible index structures are created.
 -- The storage requirement for foreign key joins is split amongst the 
participants.
 
-create function columnsize(nme string, i bigint, d bigint)
+create function sys.columnsize(nme string, i bigint, d bigint)
 returns bigint
 begin
        case
@@ -94,7 +94,7 @@ begin
        when nme = 'int'         then return 4 * i;
        when nme = 'bigint'      then return 8 * i;
        when nme = 'timestamp' then return 8 * i;
-       when  nme = 'varchar' then 
+       when  nme = 'varchar' then
                case
                when cast(d as bigint) << 8 then return i;
                when cast(d as bigint) << 16 then return 2 * i;
@@ -105,7 +105,7 @@ begin
        end case;
 end;
 
-create function heapsize(tpe string, i bigint, w int)
+create function sys.heapsize(tpe string, i bigint, w int)
 returns bigint
 begin
        if  tpe <> 'varchar' and tpe <> 'clob'
@@ -115,7 +115,7 @@ begin
        return 10240 + i * w;
 end;
 
-create function indexsize(b boolean, i bigint)
+create function sys.indexsize(b boolean, i bigint)
 returns bigint
 begin
        -- assume non-compound keys
@@ -126,37 +126,37 @@ begin
        return 0;
 end;
 
-create function storagemodel()
+create function sys.storagemodel()
 returns table (
        "schema" string,
        "table" string,
        "column" string,
        "type" string,
-       "count" bigint,         
+       "count" bigint,
        columnsize bigint,
        heapsize bigint,
        indices bigint,
        sorted boolean)
 begin
        return select I."schema", I."table", I."column", I."type", I."count",
-       columnsize(I."type", I.count, I."distinct"), 
-       heapsize(I."type", I."distinct", I."atomwidth"), 
+       columnsize(I."type", I.count, I."distinct"),
+       heapsize(I."type", I."distinct", I."atomwidth"),
        indexsize(I."reference", I."count"),
        I.sorted
-       from storagemodelinput I;
+       from sys.storagemodelinput I;
 end;
 
 -- A summary of the table storage requirement is is available as a table view.
 -- The auxillary column denotes the maximum space if all non-sorted columns
 -- would be augmented with a hash (rare situation)
-create view tablestoragemodel
+create view sys.tablestoragemodel
 as select "schema","table",max(count) as "count",
        sum(columnsize) as columnsize,
        sum(heapsize) as heapsize,
        sum(indices) as indices,
        sum(case when sorted = false then 8 * count else 0 end) as auxillary
-from storagemodel() group by "schema","table";
-update _tables
+from sys.storagemodel() group by "schema","table";
+update sys._tables
        set system = true
        where name = 'tablestoragemodel'
-               and schema_id = (select id from schemas where name = 'sys');
+               and schema_id = (select id from sys.schemas where name = 'sys');
diff --git a/sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out 
b/sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
--- a/sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
+++ b/sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
@@ -20,51 +20,181 @@ stdout of test 'upgrade` in directory 's
 
 Ready.
 Running database upgrade commands:
-drop function sys.zorder_slice;
+drop filter function sys."like"(string, string, string);
+drop filter function sys."ilike"(string, string, string);
+create filter function sys."like"(val string, pat string, esc string) external 
name algebra.likesubselect;
+create filter function sys."ilike"(val string, pat string, esc string) 
external name algebra.ilikesubselect;
+drop function sys.storage;
+-- The contents of this file are subject to the MonetDB Public License
+-- Version 1.1 (the "License"); you may not use this file except in
+-- compliance with the License. You may obtain a copy of the License at
+-- http://www.monetdb.org/Legal/MonetDBLicense
+--
+-- Software distributed under the License is distributed on an "AS IS"
+-- basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+-- License for the specific language governing rights and limitations
+-- under the License.
+--
+-- The Original Code is the MonetDB Database System.
+--
+-- The Initial Developer of the Original Code is CWI.
+-- Copyright August 2008-2013 MonetDB B.V.
+-- All Rights Reserved.
 
-Running database upgrade commands:
-create aggregate sys.stddev_samp(val TINYINT) returns DOUBLE external name 
"aggr"."stdev";
-create aggregate sys.stddev_samp(val SMALLINT) returns DOUBLE external name 
"aggr"."stdev";
-create aggregate sys.stddev_samp(val INTEGER) returns DOUBLE external name 
"aggr"."stdev";
-create aggregate sys.stddev_samp(val BIGINT) returns DOUBLE external name 
"aggr"."stdev";
-create aggregate sys.stddev_samp(val REAL) returns DOUBLE external name 
"aggr"."stdev";
-create aggregate sys.stddev_samp(val DOUBLE) returns DOUBLE external name 
"aggr"."stdev";
-create aggregate sys.stddev_samp(val DATE) returns DOUBLE external name 
"aggr"."stdev";
-create aggregate sys.stddev_samp(val TIME) returns DOUBLE external name 
"aggr"."stdev";
-create aggregate sys.stddev_samp(val TIMESTAMP) returns DOUBLE external name 
"aggr"."stdev";
-create aggregate sys.stddev_pop(val TINYINT) returns DOUBLE external name 
"aggr"."stdevp";
-create aggregate sys.stddev_pop(val SMALLINT) returns DOUBLE external name 
"aggr"."stdevp";
-create aggregate sys.stddev_pop(val INTEGER) returns DOUBLE external name 
"aggr"."stdevp";
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

MonetDB: default - Merge with Feb2013 branch.

Reply via email to