MonetDB: default - Improve and extend storagemodel script:

Martin van Dinther Thu, 29 Nov 2018 06:28:55 -0800

Changeset: 3cf74ecf53fa for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3cf74ecf53fa
Modified Files:
        sql/scripts/75_storagemodel.sql
        sql/test/Dependencies/Tests/Dependencies.stable.out
        sql/test/Dependencies/Tests/Dependencies.stable.out.int128
        sql/test/Dependencies/Tests/dependency_loop.stable.out
        sql/test/Dependencies/Tests/dependency_loop.stable.out.int128
        sql/test/Tests/systemfunctions.stable.out
        sql/test/Tests/systemfunctions.stable.out.int128
        sql/test/emptydb/Tests/check.stable.out
        sql/test/emptydb/Tests/check.stable.out.32bit
        sql/test/emptydb/Tests/check.stable.out.int128
Branch: default
Log Message:


Improve and extend storagemodel script:
- exclude system tables in views sys."storage", sys."tablestorage" and 
sys."schemastorage".
- exclude system tables in procedure sys.storagemodelinit() when populating 
table sys.storagemodelinput
  as system tables are not useful to be modeled for storagesize by application 
users.
- improve calculation of atomwidth in procedure sys.storagemodelinit()
- improved sys.columnsize() calculation for var-size type columns and added 
missing types: 'geometry', 'geometrya' and 'mbr'
- added missing types 'geometry' and 'geometrya' to sys.heapsize()
- added missing types inet', 'uuid' and 'mbr' to sys.imprintsize()
- use tpe in ('a', 'b', 'c') instead of: tpe = 'a' or tpe = 'b' or tpe = 'c' to 
make it easier to read and extend


diffs (truncated from 1066 to 300 lines):

diff --git a/sql/scripts/75_storagemodel.sql b/sql/scripts/75_storagemodel.sql
--- a/sql/scripts/75_storagemodel.sql
+++ b/sql/scripts/75_storagemodel.sql
@@ -41,6 +41,11 @@ external name sql."storage";
 
 create view sys."storage" as
 select * from sys."storage"()
+-- exclude system tables
+ where ("schema", "table") in (
+       SELECT sch."name", tbl."name"
+         FROM sys."tables" AS tbl JOIN sys."schemas" AS sch ON tbl.schema_id = 
sch.id
+        WHERE tbl."system" = FALSE)
 order by "schema", "table", "column";
 
 create view sys."tablestorage" as
@@ -52,7 +57,7 @@ select "schema", "table",
        sum(hashes) as hashsize,
        sum("imprints") as imprintsize,
        sum(orderidx) as orderidxsize
- from sys."storage"()
+ from sys."storage"
 group by "schema", "table"
 order by "schema", "table";
 
@@ -64,11 +69,11 @@ select "schema",
        sum(hashes) as hashsize,
        sum("imprints") as imprintsize,
        sum(orderidx) as orderidxsize
- from sys."storage"()
+ from sys."storage"
 group by "schema"
 order by "schema";
 
--- refinements for schemas, tables, and individual columns
+-- refinements for specific schemas, tables, and individual columns
 create function sys."storage"(sname varchar(1024))
 returns table (
        "schema" varchar(1024),
@@ -162,19 +167,23 @@ begin
        delete from sys.storagemodelinput;
 
        insert into sys.storagemodelinput
-       select "schema", "table", "column", "type", typewidth, "count", 0, 
typewidth, FALSE, sorted, revsorted, "unique", orderidx
-         from sys."storage"()
-       -- exclude system tables (those are not useful to be modeled for 
storagesize for application users)
-        where ("schema", "table") in (
-               SELECT sch."name", tbl."name"
-                 FROM sys."_tables" AS tbl JOIN sys."schemas" AS sch ON 
tbl.schema_id = sch.id
-                WHERE tbl."system" = FALSE)
+       select "schema", "table", "column", "type", typewidth, "count", 0,
+               case when "count" > 0 and heapsize >= 8192 and "type" in 
('varchar', 'char', 'clob', 'json', 'url')
+                       -- string heaps have a header of 8192
+                       then cast((heapsize - 8192) / "count" as bigint)
+               when "count" > 0 and heapsize >= 32 and "type" in ('blob', 
'geometry', 'geometrya')
+                       -- binary data heaps have a header of 32
+                       then cast((heapsize - 32) / "count" as bigint)
+               else typewidth end,
+               FALSE, sorted, revsorted, "unique", orderidx
+         from sys."storage"  -- view sys."storage" excludes system tables (as 
those are not useful to be modeled for storagesize by application users)
        order by "schema", "table", "column";
 
        update sys.storagemodelinput
           set "distinct" = "count"
         where "unique" = TRUE
-           or "type" IN ('varchar', 'char', 'clob', 'blob', 'json', 'url'); -- 
assume all strings are distinct
+               -- assume all variable size types contain distinct values
+           or "type" IN ('varchar', 'char', 'clob', 'json', 'url', 'blob', 
'geometry', 'geometrya');
 
        update sys.storagemodelinput
           set reference = TRUE
@@ -196,55 +205,48 @@ end;
 -- and the upperbound when all possible index structures are created.
 -- The storage requirement for foreign key joins is split amongst the 
participants.
 
-create function sys.columnsize(tpe varchar(1024), count bigint, _distinct 
bigint, avgwidth int)
+create function sys.columnsize(tpe varchar(1024), count bigint)
 returns bigint
 begin
        -- for fixed size types: typewidth_inbytes * count
-       if tpe = 'tinyint' or tpe = 'boolean'
-       then
-               return count;
+       if tpe in ('tinyint', 'boolean')
+               then return count;
        end if;
        if tpe = 'smallint'
-       then
-               return 2 * count;
+               then return 2 * count;
        end if;
-       if tpe = 'int' or tpe = 'real' or tpe = 'date' or tpe = 'time' or tpe = 
'timetz' or tpe = 'sec_interval' or tpe = 'month_interval'
-       then
-               return 4 * count;
+       if tpe in ('int', 'real', 'date', 'time', 'timetz', 'sec_interval', 
'month_interval')
+               then return 4 * count;
        end if;
-       if tpe = 'bigint' or tpe = 'decimal' or tpe = 'double' or tpe = 
'timestamp' or tpe = 'timestamptz' or tpe = 'inet' or tpe = 'oid'
-       then
-               return 8 * count;
+       if tpe in ('bigint', 'double', 'timestamp', 'timestamptz', 'inet', 
'oid')
+               then return 8 * count;
        end if;
-       if tpe = 'hugeint' or tpe = 'uuid'
-       then
-               return 16 * count;
+       if tpe in ('hugeint', 'decimal', 'uuid', 'mbr')
+               then return 16 * count;
        end if;
 
-       -- for variable size types it is more complicated
-       if tpe = 'varchar' or tpe = 'char' or tpe = 'clob' or tpe = 'json' or 
tpe = 'url'
-       then
-               return sys.sql_max(4 * count, 8192 + ((avgwidth + 8) * 
_distinct));
+       -- for variable size types we compute the columnsize as refs (assume 4 
bytes each for char strings) to the heap, excluding data in the var heap
+       if tpe in ('varchar', 'char', 'clob', 'json', 'url')
+               then return 4 * count;
        end if;
-       if tpe = 'blob'
-       then
-               return (avgwidth + 8) * count;
+       if tpe in ('blob', 'geometry', 'geometrya')
+               then return 8 * count;
        end if;
 
-       return 16 * count;
+       return 8 * count;
 end;
 
-create function sys.heapsize(tpe varchar(1024), count bigint, _distinct 
bigint, avgwidth int)
+create function sys.heapsize(tpe varchar(1024), count bigint, distincts 
bigint, avgwidth int)
 returns bigint
 begin
-       if tpe = 'varchar' or tpe = 'char' or tpe = 'clob' or tpe = 'json' or 
tpe = 'url'
-       then
-               return 8192 + ((avgwidth + 8) * _distinct);
+       -- only variable size types have a heap
+       if tpe in ('varchar', 'char', 'clob', 'json', 'url')
+               then return 8192 + ((avgwidth + 8) * distincts);
        end if;
-       if tpe = 'blob'
-       then
-               return (avgwidth + 8) * count;
+       if tpe in ('blob', 'geometry', 'geometrya')
+               then return avgwidth * count;
        end if;
+
        return 0;
 end;
 
@@ -253,8 +255,7 @@ returns bigint
 begin
        -- assume non-compound keys
        if b = true
-       then
-               return 8 * count;
+               then return 8 * count;
        end if;
        return 0;
 end;
@@ -263,26 +264,21 @@ create function sys.imprintsize(tpe varc
 returns bigint
 begin
        -- for fixed size types: typewidth_inbytes * 0.2 * count
-       if tpe = 'tinyint' or tpe = 'boolean'
-       then
-               return cast(0.2 * count as bigint);
+       if tpe in ('tinyint', 'boolean')
+               then return cast(0.2 * count as bigint);
        end if;
        if tpe = 'smallint'
-       then
-               return cast(0.4 * count as bigint);
+               then return cast(0.4 * count as bigint);
        end if;
-       if tpe = 'int' or tpe = 'real' or tpe = 'date' or tpe = 'time' or tpe = 
'timetz' or tpe = 'sec_interval' or tpe = 'month_interval'
-       then
-               return cast(0.8 * count as bigint);
+       if tpe in ('int', 'real', 'date', 'time', 'timetz', 'sec_interval', 
'month_interval')
+               then return cast(0.8 * count as bigint);
        end if;
-       if tpe = 'bigint' or tpe = 'double' or tpe = 'timestamp' or tpe = 
'timestamptz' or tpe = 'oid'
-       then
-               return cast(1.6 * count as bigint);
+       if tpe in ('bigint', 'double', 'timestamp', 'timestamptz', 'inet', 
'oid')
+               then return cast(1.6 * count as bigint);
        end if;
-       -- decimal can be mapped to tinyint or smallint or int or bigint or 
hugeint depending on precision. For the estimate we assume hugeint mapping.
-       if tpe = 'hugeint' or tpe = 'decimal'
-       then
-               return cast(3.2 * count as bigint);
+       -- a decimal can be mapped to tinyint or smallint or int or bigint or 
hugeint depending on precision. For the estimate we assume mapping to hugeint.
+       if tpe in ('hugeint', 'decimal', 'uuid', 'mbr')
+               then return cast(3.2 * count as bigint);
        end if;
 
        -- imprints are not supported on other types
@@ -291,7 +287,7 @@ end;
 
 create view sys.storagemodel as
 select "schema", "table", "column", "type", "count",
-       columnsize("type", "count", "distinct", "atomwidth") as columnsize,
+       columnsize("type", "count") as columnsize,
        heapsize("type", "count", "distinct", "atomwidth") as heapsize,
        hashsize("reference", "count") as hashsize,
        imprintsize("type", "count") as imprintsize,
@@ -307,7 +303,7 @@ create view sys.tablestoragemodel as
 select "schema", "table",
        max("count") as "rowcount",
        count(*) as "storages",
-       sum(columnsize("type", "count", "distinct", "atomwidth")) as columnsize,
+       sum(columnsize("type", "count")) as columnsize,
        sum(heapsize("type", "count", "distinct", "atomwidth")) as heapsize,
        sum(hashsize("reference", "count")) as hashsize,
        sum(imprintsize("type", "count")) as imprintsize,
diff --git a/sql/test/Dependencies/Tests/Dependencies.stable.out 
b/sql/test/Dependencies/Tests/Dependencies.stable.out
--- a/sql/test/Dependencies/Tests/Dependencies.stable.out
+++ b/sql/test/Dependencies/Tests/Dependencies.stable.out
@@ -100,9 +100,9 @@ Dependencies between database objects
 % 2,   2,      8 # length
 [ "t1",        "v1",   "DEP_VIEW"      ]
 [ "v1",        "v2",   "DEP_VIEW"      ]
-#SELECT t.name, i.name, 'DEP_INDEX' from tables as t, idxs as i where 
i.table_id = t.id and i.name not in (select name from keys) and t.type = 0;
-% .t,  .i,     .L44 # table_name
-% name,        name,   L44 # name
+#SELECT t.name, i.name, 'DEP_INDEX' from tables as t, idxs as i where 
i.table_id = t.id and i.name not in (select name from keys) and t.type = 0 
order by t.name, i.name;
+% .t,  .i,     .L45 # table_name
+% name,        name,   L45 # name
 % varchar,     varchar,        char # type
 % 2,   8,      9 # length
 [ "t1",        "id_index",     "DEP_INDEX"     ]
@@ -172,8 +172,8 @@ Dependencies between database objects
 [ "table_type_id",     "table_types_table_type_id_pkey",       "DEP_KEY"       
]
 [ "table_type_name",   "table_types_table_type_name_unique",   "DEP_KEY"       
]
 #SELECT c.name, i.name, 'DEP_INDEX' from columns as c, objects as kc, idxs as 
i where kc."name" = c.name AND kc.id = i.id AND c.table_id = i.table_id AND 
i.name not in (select name from keys) order by c.name, i.name;
-% .c,  .i,     .L21 # table_name
-% name,        name,   L21 # name
+% .c,  .i,     .L22 # table_name
+% name,        name,   L22 # name
 % varchar,     varchar,        char # type
 % 2,   8,      9 # length
 [ "id",        "id_index",     "DEP_INDEX"     ]
@@ -207,9 +207,9 @@ Dependencies between database objects
 % varchar,     varchar,        char # type
 % 2,   2,      8 # length
 [ "v1",        "f2",   "DEP_FUNC"      ]
-#SELECT v.name, i.name, 'DEP_INDEX' from tables as v, idxs as i where 
i.table_id = v.id and i.name not in (select name from keys) and v.type = 1;
-% .v,  .i,     .L44 # table_name
-% name,        name,   L44 # name
+#SELECT v.name, i.name, 'DEP_INDEX' from tables as v, idxs as i where 
i.table_id = v.id and i.name not in (select name from keys) and v.type = 1 
order by v.name, i.name;
+% .v,  .i,     .L45 # table_name
+% name,        name,   L45 # name
 % varchar,     varchar,        char # type
 % 0,   0,      9 # length
 #SELECT v.name, tri.name, 'DEP_TRIGGER' from tables as v, triggers as tri, 
dependencies as dep where dep.id = v.id AND dep.depend_id =tri.id AND 
dep.depend_type = 8 AND v.type = 1;
@@ -416,7 +416,10 @@ Cleanup
 [ "schemas",   "dependency_schemas_on_users",  "DEP_VIEW"      ]
 [ "schemas",   "geometry_columns",     "DEP_VIEW"      ]
 [ "schemas",   "ids",  "DEP_VIEW"      ]
+[ "schemas",   "storage",      "DEP_VIEW"      ]
 [ "sequences", "ids",  "DEP_VIEW"      ]
+[ "storage",   "schemastorage",        "DEP_VIEW"      ]
+[ "storage",   "tablestorage", "DEP_VIEW"      ]
 [ "storagemodelinput", "storagemodel", "DEP_VIEW"      ]
 [ "storagemodelinput", "tablestoragemodel",    "DEP_VIEW"      ]
 [ "tables",    "dependency_columns_on_indexes",        "DEP_VIEW"      ]
@@ -435,6 +438,7 @@ Cleanup
 [ "tables",    "dependency_views_on_procedures",       "DEP_VIEW"      ]
 [ "tables",    "dependency_views_on_views",    "DEP_VIEW"      ]
 [ "tables",    "geometry_columns",     "DEP_VIEW"      ]
+[ "tables",    "storage",      "DEP_VIEW"      ]
 [ "triggers",  "dependency_columns_on_triggers",       "DEP_VIEW"      ]
 [ "triggers",  "dependency_functions_on_triggers",     "DEP_VIEW"      ]
 [ "triggers",  "dependency_tables_on_triggers",        "DEP_VIEW"      ]
@@ -447,8 +451,8 @@ Cleanup
 [ "types",     "ids",  "DEP_VIEW"      ]
 [ "users",     "dependency_schemas_on_users",  "DEP_VIEW"      ]
 #SELECT t.name, i.name, 'DEP_INDEX' from tables as t, idxs as i where 
i.table_id = t.id and i.name not in (select name from keys) and t.type in (0, 
10, 20, 30) order by t.name, i.name;
-% .t,  .i,     .L44 # table_name
-% name,        name,   L44 # name
+% .t,  .i,     .L45 # table_name
+% name,        name,   L45 # name
 % varchar,     varchar,        char # type
 % 0,   0,      9 # length
 #(SELECT t.name as name, tri.name as trigname, 'DEP_TRIGGER' from tables as t, 
triggers as tri where tri.table_id = t.id) UNION (SELECT t.name as name, 
tri.name as trigname, 'DEP_TRIGGER' from triggers tri, tables t, dependencies 
dep where dep.id = t.id AND dep.depend_id =tri.id AND dep.depend_type = 8) 
order by name, trigname;
@@ -593,6 +597,7 @@ Cleanup
 [ "id",        "ids",  "DEP_VIEW"      ]
 [ "id",        "ids",  "DEP_VIEW"      ]
 [ "id",        "roles",        "DEP_VIEW"      ]
+[ "id",        "storage",      "DEP_VIEW"      ]
 [ "id",        "systemfunctions",      "DEP_VIEW"      ]
 [ "name",      "dependency_args_on_types",     "DEP_VIEW"      ]
 [ "name",      "dependency_args_on_types",     "DEP_VIEW"      ]
@@ -639,6 +644,7 @@ Cleanup
 [ "name",      "ids",  "DEP_VIEW"      ]
 [ "name",      "ids",  "DEP_VIEW"      ]
 [ "name",      "roles",        "DEP_VIEW"      ]
+[ "name",      "storage",      "DEP_VIEW"      ]
 [ "nr",        "dependency_columns_on_indexes",        "DEP_VIEW"      ]
 [ "nr",        "dependency_columns_on_keys",   "DEP_VIEW"      ]
 [ "number",    "dependency_args_on_types",     "DEP_VIEW"      ]
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

MonetDB: default - Improve and extend storagemodel script:

Reply via email to