Changeset: 3cf74ecf53fa for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3cf74ecf53fa
Modified Files:
sql/scripts/75_storagemodel.sql
sql/test/Dependencies/Tests/Dependencies.stable.out
sql/test/Dependencies/Tests/Dependencies.stable.out.int128
sql/test/Dependencies/Tests/dependency_loop.stable.out
sql/test/Dependencies/Tests/dependency_loop.stable.out.int128
sql/test/Tests/systemfunctions.stable.out
sql/test/Tests/systemfunctions.stable.out.int128
sql/test/emptydb/Tests/check.stable.out
sql/test/emptydb/Tests/check.stable.out.32bit
sql/test/emptydb/Tests/check.stable.out.int128
Branch: default
Log Message:
Improve and extend storagemodel script:
- exclude system tables in views sys."storage", sys."tablestorage" and
sys."schemastorage".
- exclude system tables in procedure sys.storagemodelinit() when populating
table sys.storagemodelinput
as system tables are not useful to be modeled for storagesize by application
users.
- improve calculation of atomwidth in procedure sys.storagemodelinit()
- improved sys.columnsize() calculation for var-size type columns and added
missing types: 'geometry', 'geometrya' and 'mbr'
- added missing types 'geometry' and 'geometrya' to sys.heapsize()
- added missing types inet', 'uuid' and 'mbr' to sys.imprintsize()
- use tpe in ('a', 'b', 'c') instead of: tpe = 'a' or tpe = 'b' or tpe = 'c' to
make it easier to read and extend
diffs (truncated from 1066 to 300 lines):
diff --git a/sql/scripts/75_storagemodel.sql b/sql/scripts/75_storagemodel.sql
--- a/sql/scripts/75_storagemodel.sql
+++ b/sql/scripts/75_storagemodel.sql
@@ -41,6 +41,11 @@ external name sql."storage";
create view sys."storage" as
select * from sys."storage"()
+-- exclude system tables
+ where ("schema", "table") in (
+ SELECT sch."name", tbl."name"
+ FROM sys."tables" AS tbl JOIN sys."schemas" AS sch ON tbl.schema_id =
sch.id
+ WHERE tbl."system" = FALSE)
order by "schema", "table", "column";
create view sys."tablestorage" as
@@ -52,7 +57,7 @@ select "schema", "table",
sum(hashes) as hashsize,
sum("imprints") as imprintsize,
sum(orderidx) as orderidxsize
- from sys."storage"()
+ from sys."storage"
group by "schema", "table"
order by "schema", "table";
@@ -64,11 +69,11 @@ select "schema",
sum(hashes) as hashsize,
sum("imprints") as imprintsize,
sum(orderidx) as orderidxsize
- from sys."storage"()
+ from sys."storage"
group by "schema"
order by "schema";
--- refinements for schemas, tables, and individual columns
+-- refinements for specific schemas, tables, and individual columns
create function sys."storage"(sname varchar(1024))
returns table (
"schema" varchar(1024),
@@ -162,19 +167,23 @@ begin
delete from sys.storagemodelinput;
insert into sys.storagemodelinput
- select "schema", "table", "column", "type", typewidth, "count", 0,
typewidth, FALSE, sorted, revsorted, "unique", orderidx
- from sys."storage"()
- -- exclude system tables (those are not useful to be modeled for
storagesize for application users)
- where ("schema", "table") in (
- SELECT sch."name", tbl."name"
- FROM sys."_tables" AS tbl JOIN sys."schemas" AS sch ON
tbl.schema_id = sch.id
- WHERE tbl."system" = FALSE)
+ select "schema", "table", "column", "type", typewidth, "count", 0,
+ case when "count" > 0 and heapsize >= 8192 and "type" in
('varchar', 'char', 'clob', 'json', 'url')
+ -- string heaps have a header of 8192
+ then cast((heapsize - 8192) / "count" as bigint)
+ when "count" > 0 and heapsize >= 32 and "type" in ('blob',
'geometry', 'geometrya')
+ -- binary data heaps have a header of 32
+ then cast((heapsize - 32) / "count" as bigint)
+ else typewidth end,
+ FALSE, sorted, revsorted, "unique", orderidx
+ from sys."storage" -- view sys."storage" excludes system tables (as
those are not useful to be modeled for storagesize by application users)
order by "schema", "table", "column";
update sys.storagemodelinput
set "distinct" = "count"
where "unique" = TRUE
- or "type" IN ('varchar', 'char', 'clob', 'blob', 'json', 'url'); --
assume all strings are distinct
+ -- assume all variable size types contain distinct values
+ or "type" IN ('varchar', 'char', 'clob', 'json', 'url', 'blob',
'geometry', 'geometrya');
update sys.storagemodelinput
set reference = TRUE
@@ -196,55 +205,48 @@ end;
-- and the upperbound when all possible index structures are created.
-- The storage requirement for foreign key joins is split amongst the
participants.
-create function sys.columnsize(tpe varchar(1024), count bigint, _distinct
bigint, avgwidth int)
+create function sys.columnsize(tpe varchar(1024), count bigint)
returns bigint
begin
-- for fixed size types: typewidth_inbytes * count
- if tpe = 'tinyint' or tpe = 'boolean'
- then
- return count;
+ if tpe in ('tinyint', 'boolean')
+ then return count;
end if;
if tpe = 'smallint'
- then
- return 2 * count;
+ then return 2 * count;
end if;
- if tpe = 'int' or tpe = 'real' or tpe = 'date' or tpe = 'time' or tpe =
'timetz' or tpe = 'sec_interval' or tpe = 'month_interval'
- then
- return 4 * count;
+ if tpe in ('int', 'real', 'date', 'time', 'timetz', 'sec_interval',
'month_interval')
+ then return 4 * count;
end if;
- if tpe = 'bigint' or tpe = 'decimal' or tpe = 'double' or tpe =
'timestamp' or tpe = 'timestamptz' or tpe = 'inet' or tpe = 'oid'
- then
- return 8 * count;
+ if tpe in ('bigint', 'double', 'timestamp', 'timestamptz', 'inet',
'oid')
+ then return 8 * count;
end if;
- if tpe = 'hugeint' or tpe = 'uuid'
- then
- return 16 * count;
+ if tpe in ('hugeint', 'decimal', 'uuid', 'mbr')
+ then return 16 * count;
end if;
- -- for variable size types it is more complicated
- if tpe = 'varchar' or tpe = 'char' or tpe = 'clob' or tpe = 'json' or
tpe = 'url'
- then
- return sys.sql_max(4 * count, 8192 + ((avgwidth + 8) *
_distinct));
+ -- for variable size types we compute the columnsize as refs (assume 4
bytes each for char strings) to the heap, excluding data in the var heap
+ if tpe in ('varchar', 'char', 'clob', 'json', 'url')
+ then return 4 * count;
end if;
- if tpe = 'blob'
- then
- return (avgwidth + 8) * count;
+ if tpe in ('blob', 'geometry', 'geometrya')
+ then return 8 * count;
end if;
- return 16 * count;
+ return 8 * count;
end;
-create function sys.heapsize(tpe varchar(1024), count bigint, _distinct
bigint, avgwidth int)
+create function sys.heapsize(tpe varchar(1024), count bigint, distincts
bigint, avgwidth int)
returns bigint
begin
- if tpe = 'varchar' or tpe = 'char' or tpe = 'clob' or tpe = 'json' or
tpe = 'url'
- then
- return 8192 + ((avgwidth + 8) * _distinct);
+ -- only variable size types have a heap
+ if tpe in ('varchar', 'char', 'clob', 'json', 'url')
+ then return 8192 + ((avgwidth + 8) * distincts);
end if;
- if tpe = 'blob'
- then
- return (avgwidth + 8) * count;
+ if tpe in ('blob', 'geometry', 'geometrya')
+ then return avgwidth * count;
end if;
+
return 0;
end;
@@ -253,8 +255,7 @@ returns bigint
begin
-- assume non-compound keys
if b = true
- then
- return 8 * count;
+ then return 8 * count;
end if;
return 0;
end;
@@ -263,26 +264,21 @@ create function sys.imprintsize(tpe varc
returns bigint
begin
-- for fixed size types: typewidth_inbytes * 0.2 * count
- if tpe = 'tinyint' or tpe = 'boolean'
- then
- return cast(0.2 * count as bigint);
+ if tpe in ('tinyint', 'boolean')
+ then return cast(0.2 * count as bigint);
end if;
if tpe = 'smallint'
- then
- return cast(0.4 * count as bigint);
+ then return cast(0.4 * count as bigint);
end if;
- if tpe = 'int' or tpe = 'real' or tpe = 'date' or tpe = 'time' or tpe =
'timetz' or tpe = 'sec_interval' or tpe = 'month_interval'
- then
- return cast(0.8 * count as bigint);
+ if tpe in ('int', 'real', 'date', 'time', 'timetz', 'sec_interval',
'month_interval')
+ then return cast(0.8 * count as bigint);
end if;
- if tpe = 'bigint' or tpe = 'double' or tpe = 'timestamp' or tpe =
'timestamptz' or tpe = 'oid'
- then
- return cast(1.6 * count as bigint);
+ if tpe in ('bigint', 'double', 'timestamp', 'timestamptz', 'inet',
'oid')
+ then return cast(1.6 * count as bigint);
end if;
- -- decimal can be mapped to tinyint or smallint or int or bigint or
hugeint depending on precision. For the estimate we assume hugeint mapping.
- if tpe = 'hugeint' or tpe = 'decimal'
- then
- return cast(3.2 * count as bigint);
+ -- a decimal can be mapped to tinyint or smallint or int or bigint or
hugeint depending on precision. For the estimate we assume mapping to hugeint.
+ if tpe in ('hugeint', 'decimal', 'uuid', 'mbr')
+ then return cast(3.2 * count as bigint);
end if;
-- imprints are not supported on other types
@@ -291,7 +287,7 @@ end;
create view sys.storagemodel as
select "schema", "table", "column", "type", "count",
- columnsize("type", "count", "distinct", "atomwidth") as columnsize,
+ columnsize("type", "count") as columnsize,
heapsize("type", "count", "distinct", "atomwidth") as heapsize,
hashsize("reference", "count") as hashsize,
imprintsize("type", "count") as imprintsize,
@@ -307,7 +303,7 @@ create view sys.tablestoragemodel as
select "schema", "table",
max("count") as "rowcount",
count(*) as "storages",
- sum(columnsize("type", "count", "distinct", "atomwidth")) as columnsize,
+ sum(columnsize("type", "count")) as columnsize,
sum(heapsize("type", "count", "distinct", "atomwidth")) as heapsize,
sum(hashsize("reference", "count")) as hashsize,
sum(imprintsize("type", "count")) as imprintsize,
diff --git a/sql/test/Dependencies/Tests/Dependencies.stable.out
b/sql/test/Dependencies/Tests/Dependencies.stable.out
--- a/sql/test/Dependencies/Tests/Dependencies.stable.out
+++ b/sql/test/Dependencies/Tests/Dependencies.stable.out
@@ -100,9 +100,9 @@ Dependencies between database objects
% 2, 2, 8 # length
[ "t1", "v1", "DEP_VIEW" ]
[ "v1", "v2", "DEP_VIEW" ]
-#SELECT t.name, i.name, 'DEP_INDEX' from tables as t, idxs as i where
i.table_id = t.id and i.name not in (select name from keys) and t.type = 0;
-% .t, .i, .L44 # table_name
-% name, name, L44 # name
+#SELECT t.name, i.name, 'DEP_INDEX' from tables as t, idxs as i where
i.table_id = t.id and i.name not in (select name from keys) and t.type = 0
order by t.name, i.name;
+% .t, .i, .L45 # table_name
+% name, name, L45 # name
% varchar, varchar, char # type
% 2, 8, 9 # length
[ "t1", "id_index", "DEP_INDEX" ]
@@ -172,8 +172,8 @@ Dependencies between database objects
[ "table_type_id", "table_types_table_type_id_pkey", "DEP_KEY"
]
[ "table_type_name", "table_types_table_type_name_unique", "DEP_KEY"
]
#SELECT c.name, i.name, 'DEP_INDEX' from columns as c, objects as kc, idxs as
i where kc."name" = c.name AND kc.id = i.id AND c.table_id = i.table_id AND
i.name not in (select name from keys) order by c.name, i.name;
-% .c, .i, .L21 # table_name
-% name, name, L21 # name
+% .c, .i, .L22 # table_name
+% name, name, L22 # name
% varchar, varchar, char # type
% 2, 8, 9 # length
[ "id", "id_index", "DEP_INDEX" ]
@@ -207,9 +207,9 @@ Dependencies between database objects
% varchar, varchar, char # type
% 2, 2, 8 # length
[ "v1", "f2", "DEP_FUNC" ]
-#SELECT v.name, i.name, 'DEP_INDEX' from tables as v, idxs as i where
i.table_id = v.id and i.name not in (select name from keys) and v.type = 1;
-% .v, .i, .L44 # table_name
-% name, name, L44 # name
+#SELECT v.name, i.name, 'DEP_INDEX' from tables as v, idxs as i where
i.table_id = v.id and i.name not in (select name from keys) and v.type = 1
order by v.name, i.name;
+% .v, .i, .L45 # table_name
+% name, name, L45 # name
% varchar, varchar, char # type
% 0, 0, 9 # length
#SELECT v.name, tri.name, 'DEP_TRIGGER' from tables as v, triggers as tri,
dependencies as dep where dep.id = v.id AND dep.depend_id =tri.id AND
dep.depend_type = 8 AND v.type = 1;
@@ -416,7 +416,10 @@ Cleanup
[ "schemas", "dependency_schemas_on_users", "DEP_VIEW" ]
[ "schemas", "geometry_columns", "DEP_VIEW" ]
[ "schemas", "ids", "DEP_VIEW" ]
+[ "schemas", "storage", "DEP_VIEW" ]
[ "sequences", "ids", "DEP_VIEW" ]
+[ "storage", "schemastorage", "DEP_VIEW" ]
+[ "storage", "tablestorage", "DEP_VIEW" ]
[ "storagemodelinput", "storagemodel", "DEP_VIEW" ]
[ "storagemodelinput", "tablestoragemodel", "DEP_VIEW" ]
[ "tables", "dependency_columns_on_indexes", "DEP_VIEW" ]
@@ -435,6 +438,7 @@ Cleanup
[ "tables", "dependency_views_on_procedures", "DEP_VIEW" ]
[ "tables", "dependency_views_on_views", "DEP_VIEW" ]
[ "tables", "geometry_columns", "DEP_VIEW" ]
+[ "tables", "storage", "DEP_VIEW" ]
[ "triggers", "dependency_columns_on_triggers", "DEP_VIEW" ]
[ "triggers", "dependency_functions_on_triggers", "DEP_VIEW" ]
[ "triggers", "dependency_tables_on_triggers", "DEP_VIEW" ]
@@ -447,8 +451,8 @@ Cleanup
[ "types", "ids", "DEP_VIEW" ]
[ "users", "dependency_schemas_on_users", "DEP_VIEW" ]
#SELECT t.name, i.name, 'DEP_INDEX' from tables as t, idxs as i where
i.table_id = t.id and i.name not in (select name from keys) and t.type in (0,
10, 20, 30) order by t.name, i.name;
-% .t, .i, .L44 # table_name
-% name, name, L44 # name
+% .t, .i, .L45 # table_name
+% name, name, L45 # name
% varchar, varchar, char # type
% 0, 0, 9 # length
#(SELECT t.name as name, tri.name as trigname, 'DEP_TRIGGER' from tables as t,
triggers as tri where tri.table_id = t.id) UNION (SELECT t.name as name,
tri.name as trigname, 'DEP_TRIGGER' from triggers tri, tables t, dependencies
dep where dep.id = t.id AND dep.depend_id =tri.id AND dep.depend_type = 8)
order by name, trigname;
@@ -593,6 +597,7 @@ Cleanup
[ "id", "ids", "DEP_VIEW" ]
[ "id", "ids", "DEP_VIEW" ]
[ "id", "roles", "DEP_VIEW" ]
+[ "id", "storage", "DEP_VIEW" ]
[ "id", "systemfunctions", "DEP_VIEW" ]
[ "name", "dependency_args_on_types", "DEP_VIEW" ]
[ "name", "dependency_args_on_types", "DEP_VIEW" ]
@@ -639,6 +644,7 @@ Cleanup
[ "name", "ids", "DEP_VIEW" ]
[ "name", "ids", "DEP_VIEW" ]
[ "name", "roles", "DEP_VIEW" ]
+[ "name", "storage", "DEP_VIEW" ]
[ "nr", "dependency_columns_on_indexes", "DEP_VIEW" ]
[ "nr", "dependency_columns_on_keys", "DEP_VIEW" ]
[ "number", "dependency_args_on_types", "DEP_VIEW" ]
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list