Changeset: bf06921538d9 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=bf06921538d9
Removed Files:
testing/subprocess26.py
Modified Files:
gdk/gdk_group.c
sql/scripts/75_storagemodel.sql
sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
sql/test/testdb-upgrade/Tests/upgrade.stable.out
testing/Makefile.ag
testing/Mtest.py.in
Branch: default
Log Message:
Merge with Feb2013 branch.
diffs (truncated from 2046 to 300 lines):
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -64,9 +64,9 @@
*
* Otherwise we build a partial hash table on the fly.
*
- * A decision should be made on the order in which grouping occurs Let
- * |b| have << different values than |g| then the linked lists gets
- * extremely long, leading to a n^2 algorithm.
+ * A decision should be made on the order in which grouping occurs.
+ * Let |b| have << different values than |g| then the linked lists
+ * gets extremely long, leading to a n^2 algorithm.
* At the MAL level, the multigroup function would perform the dynamic
* optimization.
*/
@@ -451,20 +451,17 @@ BATgroup_internal(BAT **groups, BAT **ex
GRPnotfound();
}
} else if (b->T->hash) {
- bit gc = g && (g->tsorted || g->trevsorted);
-
- /* we already have a hash table on b;
- * we also exploit if g is clustered */
+ /* we already have a hash table on b */
ALGODEBUG fprintf(stderr, "#BATgroup(b=%s#" BUNFMT ","
"g=%s#" BUNFMT ","
"e=%s#" BUNFMT ","
"h=%s#" BUNFMT ",subsorted=%d): "
- "use existing hash table%s\n",
+ "use existing hash table\n",
BATgetId(b), BATcount(b),
g ? BATgetId(g) : "NULL", g ? BATcount(g) : 0,
e ? BATgetId(e) : "NULL", e ? BATcount(e) : 0,
h ? BATgetId(h) : "NULL", h ? BATcount(h) : 0,
- subsorted, gc ? " (g clustered)" : "");
+ subsorted);
hs = b->T->hash;
gn->tsorted = 1; /* be optimistic */
for (r = BUNfirst(b), p = r, q = r + BATcount(b); p < q; p++) {
@@ -473,45 +470,13 @@ BATgroup_internal(BAT **groups, BAT **ex
* HASHloop: the difference is that we only
* consider BUNs smaller than the one we're
* looking up (p), and that we also consider
- * the input groups;
- * we also exploit if g is clustered */
- /* skip irrelevant BUNs after the current
- * BUNs; exploit that hash-table links
- * backwards through BAT */
- for (hb = HASHget(hs,HASHprobe(hs, v));
- hb != HASHnil(hs)&& hb >= p;
- hb = HASHgetlink(hs,hb)) {
- assert( HASHgetlink(hs,hb) == HASHnil(hs)
- || HASHgetlink(hs,hb) < hb);
- }
- if (gc) {
- for (;
- hb != HASHnil(hs) && grps[hb - r] ==
grps[p - r];
- hb = HASHgetlink(hs,hb)) {
- assert( HASHgetlink(hs,hb) ==
HASHnil(hs)
- || HASHgetlink(hs,hb) < hb);
- if (cmp(v, BUNtail(bi, hb)) == 0) {
- oid grp = ngrps[hb - r];
- ngrps[p - r] = grp;
- if (histo)
- cnts[grp]++;
- if (gn->tsorted &&
- grp != ngrp - 1)
- gn->tsorted = 0;
- break;
- }
- }
- if (hb != HASHnil(hs) &&
- grps[hb - r] != grps[p - r]) {
- /* we didn't assign a group
- * yet */
- hb = HASHnil(hs);
- }
- } else if (grps) {
- for (;
+ * the input groups */
+ if (grps) {
+ for (hb = HASHget(hs, HASHprobe(hs, v));
hb != HASHnil(hs);
- hb = HASHgetlink(hs,hb)) {
- if (grps[hb - r] == grps[p - r] &&
+ hb = HASHgetlink(hs, hb)) {
+ if (hb < p &&
+ grps[hb - r] == grps[p - r] &&
cmp(v, BUNtail(bi, hb)) == 0) {
oid grp = ngrps[hb - r];
ngrps[p - r] = grp;
@@ -524,10 +489,11 @@ BATgroup_internal(BAT **groups, BAT **ex
}
}
} else {
- for (;
+ for (hb = HASHget(hs, HASHprobe(hs, v));
hb != HASHnil(hs);
- hb = HASHgetlink(hs,hb)) {
- if (cmp(v, BUNtail(bi, hb)) == 0) {
+ hb = HASHgetlink(hs, hb)) {
+ if (hb < p &&
+ cmp(v, BUNtail(bi, hb)) == 0) {
oid grp = ngrps[hb - r];
ngrps[p - r] = grp;
if (histo)
diff --git a/sql/scripts/75_storagemodel.sql b/sql/scripts/75_storagemodel.sql
--- a/sql/scripts/75_storagemodel.sql
+++ b/sql/scripts/75_storagemodel.sql
@@ -22,20 +22,20 @@
-- By chancing the storagemodelinput table directly, the footprint for
-- yet to be loaded databases can be assessed.
--- The actual storage footprint of an existing database can be
+-- The actual storage footprint of an existing database can be
-- obtained by the table procuding function storage()
-- It represents the actual state of affairs, i.e. storage on disk
-- of columns and foreign key indices, and possible temporary hash indices.
-- For strings we take a sample to determine their average length.
-create function storage()
+create function sys.storage()
returns table ("schema" string, "table" string, "column" string, "type"
string, location string, "count" bigint, typewidth int, columnsize bigint,
heapsize bigint, indices bigint, sorted boolean)
external name sql.storage;
-- To determine the footprint of an arbitrary database, we first have
-- to define its schema, followed by an indication of the properties of each
column.
-- A storage model input table for the size prediction is shown below:
-create table storagemodelinput(
+create table sys.storagemodelinput(
"schema" string,
"table" string,
"column" string,
@@ -47,21 +47,21 @@ create table storagemodelinput(
"reference" boolean,-- used as foreign key reference
"sorted" boolean -- if set there is no need for an index
);
-update _tables
+update sys._tables
set system = true
where name = 'storagemodelinput'
- and schema_id = (select id from schemas where name = 'sys');
+ and schema_id = (select id from sys.schemas where name = 'sys');
-- this table can be adjusted to reflect the anticipated final database size
-- The model input can be derived from the current database using
-create procedure storagemodelinit()
+create procedure sys.storagemodelinit()
begin
- delete from storagemodelinput;
+ delete from sys.storagemodelinput;
- insert into storagemodelinput
- select X."schema", X."table", X."column", X."type", X.typewidth,
X.count, 0, X.typewidth, false, X.sorted from storage() X;
+ insert into sys.storagemodelinput
+ select X."schema", X."table", X."column", X."type", X.typewidth,
X.count, 0, X.typewidth, false, X.sorted from sys.storage() X;
- update storagemodelinput
+ update sys.storagemodelinput
set reference = true
where concat(concat("schema","table"), "column") in (
SELECT concat( concat("fkschema"."name", "fktable"."name"),
"fkkeycol"."name" )
@@ -74,17 +74,17 @@ begin
AND "fkschema"."id" = "fktable"."schema_id"
AND "fkkey"."rkey" > -1);
- update storagemodelinput
+ update sys.storagemodelinput
set "distinct" = "count" -- assume all distinct
where "type" = 'varchar' or "type"='clob';
end;
--- The predicted storage footprint of the complete database
+-- The predicted storage footprint of the complete database
-- determines the amount of diskspace needed for persistent storage
-- and the upperbound when all possible index structures are created.
-- The storage requirement for foreign key joins is split amongst the
participants.
-create function columnsize(nme string, i bigint, d bigint)
+create function sys.columnsize(nme string, i bigint, d bigint)
returns bigint
begin
case
@@ -94,7 +94,7 @@ begin
when nme = 'int' then return 4 * i;
when nme = 'bigint' then return 8 * i;
when nme = 'timestamp' then return 8 * i;
- when nme = 'varchar' then
+ when nme = 'varchar' then
case
when cast(d as bigint) << 8 then return i;
when cast(d as bigint) << 16 then return 2 * i;
@@ -105,7 +105,7 @@ begin
end case;
end;
-create function heapsize(tpe string, i bigint, w int)
+create function sys.heapsize(tpe string, i bigint, w int)
returns bigint
begin
if tpe <> 'varchar' and tpe <> 'clob'
@@ -115,7 +115,7 @@ begin
return 10240 + i * w;
end;
-create function indexsize(b boolean, i bigint)
+create function sys.indexsize(b boolean, i bigint)
returns bigint
begin
-- assume non-compound keys
@@ -126,37 +126,37 @@ begin
return 0;
end;
-create function storagemodel()
+create function sys.storagemodel()
returns table (
"schema" string,
"table" string,
"column" string,
"type" string,
- "count" bigint,
+ "count" bigint,
columnsize bigint,
heapsize bigint,
indices bigint,
sorted boolean)
begin
return select I."schema", I."table", I."column", I."type", I."count",
- columnsize(I."type", I.count, I."distinct"),
- heapsize(I."type", I."distinct", I."atomwidth"),
+ columnsize(I."type", I.count, I."distinct"),
+ heapsize(I."type", I."distinct", I."atomwidth"),
indexsize(I."reference", I."count"),
I.sorted
- from storagemodelinput I;
+ from sys.storagemodelinput I;
end;
-- A summary of the table storage requirement is is available as a table view.
-- The auxillary column denotes the maximum space if all non-sorted columns
-- would be augmented with a hash (rare situation)
-create view tablestoragemodel
+create view sys.tablestoragemodel
as select "schema","table",max(count) as "count",
sum(columnsize) as columnsize,
sum(heapsize) as heapsize,
sum(indices) as indices,
sum(case when sorted = false then 8 * count else 0 end) as auxillary
-from storagemodel() group by "schema","table";
-update _tables
+from sys.storagemodel() group by "schema","table";
+update sys._tables
set system = true
where name = 'tablestoragemodel'
- and schema_id = (select id from schemas where name = 'sys');
+ and schema_id = (select id from sys.schemas where name = 'sys');
diff --git a/sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
b/sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
--- a/sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
+++ b/sql/test/testdb-upgrade-chain/Tests/upgrade.stable.out
@@ -20,51 +20,181 @@ stdout of test 'upgrade` in directory 's
Ready.
Running database upgrade commands:
-drop function sys.zorder_slice;
+drop filter function sys."like"(string, string, string);
+drop filter function sys."ilike"(string, string, string);
+create filter function sys."like"(val string, pat string, esc string) external
name algebra.likesubselect;
+create filter function sys."ilike"(val string, pat string, esc string)
external name algebra.ilikesubselect;
+drop function sys.storage;
+-- The contents of this file are subject to the MonetDB Public License
+-- Version 1.1 (the "License"); you may not use this file except in
+-- compliance with the License. You may obtain a copy of the License at
+-- http://www.monetdb.org/Legal/MonetDBLicense
+--
+-- Software distributed under the License is distributed on an "AS IS"
+-- basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+-- License for the specific language governing rights and limitations
+-- under the License.
+--
+-- The Original Code is the MonetDB Database System.
+--
+-- The Initial Developer of the Original Code is CWI.
+-- Copyright August 2008-2013 MonetDB B.V.
+-- All Rights Reserved.
-Running database upgrade commands:
-create aggregate sys.stddev_samp(val TINYINT) returns DOUBLE external name
"aggr"."stdev";
-create aggregate sys.stddev_samp(val SMALLINT) returns DOUBLE external name
"aggr"."stdev";
-create aggregate sys.stddev_samp(val INTEGER) returns DOUBLE external name
"aggr"."stdev";
-create aggregate sys.stddev_samp(val BIGINT) returns DOUBLE external name
"aggr"."stdev";
-create aggregate sys.stddev_samp(val REAL) returns DOUBLE external name
"aggr"."stdev";
-create aggregate sys.stddev_samp(val DOUBLE) returns DOUBLE external name
"aggr"."stdev";
-create aggregate sys.stddev_samp(val DATE) returns DOUBLE external name
"aggr"."stdev";
-create aggregate sys.stddev_samp(val TIME) returns DOUBLE external name
"aggr"."stdev";
-create aggregate sys.stddev_samp(val TIMESTAMP) returns DOUBLE external name
"aggr"."stdev";
-create aggregate sys.stddev_pop(val TINYINT) returns DOUBLE external name
"aggr"."stdevp";
-create aggregate sys.stddev_pop(val SMALLINT) returns DOUBLE external name
"aggr"."stdevp";
-create aggregate sys.stddev_pop(val INTEGER) returns DOUBLE external name
"aggr"."stdevp";
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list