Changeset: 4f9616105de0 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4f9616105de0
Modified Files:
        clients/Tests/SQL-dump.stable.out.int128
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_private.h
        monetdb5/modules/mal/mosaic.c
        monetdb5/modules/mal/mosaic_delta.c
        monetdb5/modules/mal/mosaic_dictionary.c
        monetdb5/modules/mal/mosaic_frame.c
        monetdb5/modules/mal/mosaic_linear.c
        monetdb5/modules/mal/mosaic_prefix.c
        monetdb5/modules/mal/mosaic_runlength.c
Branch: mosaic
Log Message:

Defend against run-away compressions
The combination of non-compressed blocks and compressed ones
may lead to too large storage footprint.
A defense has been added in the estimators and the
main compression loop.


diffs (truncated from 981 to 300 lines):

diff --git a/clients/Tests/SQL-dump.stable.out.int128 
b/clients/Tests/SQL-dump.stable.out.int128
--- a/clients/Tests/SQL-dump.stable.out.int128
+++ b/clients/Tests/SQL-dump.stable.out.int128
@@ -1690,61 +1690,61 @@ 7058    "heapsize"      "hugeint"       128     0       
7062    NUL
 7059   "hashes"        "hugeint"       128     0       7062    NULL    true    
5       NULL
 7060   "imprints"      "hugeint"       128     0       7062    NULL    true    
6       NULL
 7061   "auxiliary"     "hugeint"       128     0       7062    NULL    true    
7       NULL
-7103   "column_id"     "int"   32      0       7114    NULL    true    0       
NULL
-7104   "type"  "clob"  0       0       7114    NULL    true    1       NULL
-7105   "width" "int"   32      0       7114    NULL    true    2       NULL
-7106   "stamp" "timestamp"     7       0       7114    NULL    true    3       
NULL
-7107   "sample"        "bigint"        64      0       7114    NULL    true    
4       NULL
-7108   "count" "bigint"        64      0       7114    NULL    true    5       
NULL
-7109   "unique"        "bigint"        64      0       7114    NULL    true    
6       NULL
-7110   "nils"  "bigint"        64      0       7114    NULL    true    7       
NULL
-7111   "minval"        "clob"  0       0       7114    NULL    true    8       
NULL
-7112   "maxval"        "clob"  0       0       7114    NULL    true    9       
NULL
-7113   "sorted"        "boolean"       1       0       7114    NULL    true    
10      NULL
-7204   "file_id"       "bigint"        64      0       7212    NULL    false   
0       NULL
-7205   "file_location" "clob"  0       0       7212    NULL    false   1       
NULL
-7206   "dbschema"      "smallint"      16      0       7212    NULL    false   
2       NULL
-7207   "format_version"        "varchar"       7       0       7212    NULL    
true    3       NULL
-7208   "sorting_order" "varchar"       10      0       7212    NULL    true    
4       NULL
-7209   "comments"      "clob"  0       0       7212    NULL    true    5       
NULL
-7214   "sn"    "clob"  0       0       7225    NULL    false   0       NULL
-7215   "file_id"       "bigint"        64      0       7225    NULL    false   
1       NULL
-7216   "ln"    "int"   32      0       7225    NULL    true    2       NULL
-7217   "as"    "int"   32      0       7225    NULL    true    3       NULL
-7218   "m5"    "clob"  0       0       7225    NULL    true    4       NULL
-7219   "sp"    "clob"  0       0       7225    NULL    true    5       NULL
-7220   "ur"    "clob"  0       0       7225    NULL    true    6       NULL
-7227   "id"    "clob"  0       0       7244    NULL    false   0       NULL
-7228   "file_id"       "bigint"        64      0       7244    NULL    false   
1       NULL
-7229   "cn"    "clob"  0       0       7244    NULL    true    2       NULL
-7230   "ds"    "clob"  0       0       7244    NULL    true    3       NULL
-7231   "dt"    "timestamp"     7       0       7244    NULL    true    4       
NULL
-7232   "fo"    "clob"  0       0       7244    NULL    true    5       NULL
-7233   "ks"    "clob"  0       0       7244    NULL    true    6       NULL
-7234   "lb"    "clob"  0       0       7244    NULL    true    7       NULL
-7235   "pg"    "clob"  0       0       7244    NULL    true    8       NULL
-7236   "pi"    "int"   32      0       7244    NULL    true    9       NULL
-7237   "pl"    "clob"  0       0       7244    NULL    true    10      NULL
-7238   "pu"    "clob"  0       0       7244    NULL    true    11      NULL
-7239   "sm"    "clob"  0       0       7244    NULL    true    12      NULL
-7246   "id"    "clob"  0       0       7256    NULL    false   0       NULL
-7247   "file_id"       "bigint"        64      0       7256    NULL    false   
1       NULL
-7248   "pn"    "clob"  0       0       7256    NULL    true    2       NULL
-7249   "cl"    "clob"  0       0       7256    NULL    true    3       NULL
-7250   "pp"    "clob"  0       0       7256    NULL    true    4       NULL
-7251   "vn"    "clob"  0       0       7256    NULL    true    5       NULL
-7258   "qname" "clob"  0       0       7269    NULL    false   0       NULL
-7259   "flag"  "smallint"      16      0       7269    NULL    false   1       
NULL
-7260   "rname" "clob"  0       0       7269    NULL    false   2       NULL
-7261   "pos"   "int"   32      0       7269    NULL    false   3       NULL
-7262   "mapq"  "smallint"      16      0       7269    NULL    false   4       
NULL
-7263   "cigar" "clob"  0       0       7269    NULL    false   5       NULL
-7264   "rnext" "clob"  0       0       7269    NULL    false   6       NULL
-7265   "pnext" "int"   32      0       7269    NULL    false   7       NULL
-7266   "tlen"  "int"   32      0       7269    NULL    false   8       NULL
-7267   "seq"   "clob"  0       0       7269    NULL    false   9       NULL
-7268   "qual"  "clob"  0       0       7269    NULL    false   10      NULL
-7350   "function_id"   "int"   32      0       7351    NULL    true    0       
NULL
+7105   "column_id"     "int"   32      0       7116    NULL    true    0       
NULL
+7106   "type"  "clob"  0       0       7116    NULL    true    1       NULL
+7107   "width" "int"   32      0       7116    NULL    true    2       NULL
+7108   "stamp" "timestamp"     7       0       7116    NULL    true    3       
NULL
+7109   "sample"        "bigint"        64      0       7116    NULL    true    
4       NULL
+7110   "count" "bigint"        64      0       7116    NULL    true    5       
NULL
+7111   "unique"        "bigint"        64      0       7116    NULL    true    
6       NULL
+7112   "nils"  "bigint"        64      0       7116    NULL    true    7       
NULL
+7113   "minval"        "clob"  0       0       7116    NULL    true    8       
NULL
+7114   "maxval"        "clob"  0       0       7116    NULL    true    9       
NULL
+7115   "sorted"        "boolean"       1       0       7116    NULL    true    
10      NULL
+7206   "file_id"       "bigint"        64      0       7214    NULL    false   
0       NULL
+7207   "file_location" "clob"  0       0       7214    NULL    false   1       
NULL
+7208   "dbschema"      "smallint"      16      0       7214    NULL    false   
2       NULL
+7209   "format_version"        "varchar"       7       0       7214    NULL    
true    3       NULL
+7210   "sorting_order" "varchar"       10      0       7214    NULL    true    
4       NULL
+7211   "comments"      "clob"  0       0       7214    NULL    true    5       
NULL
+7216   "sn"    "clob"  0       0       7227    NULL    false   0       NULL
+7217   "file_id"       "bigint"        64      0       7227    NULL    false   
1       NULL
+7218   "ln"    "int"   32      0       7227    NULL    true    2       NULL
+7219   "as"    "int"   32      0       7227    NULL    true    3       NULL
+7220   "m5"    "clob"  0       0       7227    NULL    true    4       NULL
+7221   "sp"    "clob"  0       0       7227    NULL    true    5       NULL
+7222   "ur"    "clob"  0       0       7227    NULL    true    6       NULL
+7229   "id"    "clob"  0       0       7246    NULL    false   0       NULL
+7230   "file_id"       "bigint"        64      0       7246    NULL    false   
1       NULL
+7231   "cn"    "clob"  0       0       7246    NULL    true    2       NULL
+7232   "ds"    "clob"  0       0       7246    NULL    true    3       NULL
+7233   "dt"    "timestamp"     7       0       7246    NULL    true    4       
NULL
+7234   "fo"    "clob"  0       0       7246    NULL    true    5       NULL
+7235   "ks"    "clob"  0       0       7246    NULL    true    6       NULL
+7236   "lb"    "clob"  0       0       7246    NULL    true    7       NULL
+7237   "pg"    "clob"  0       0       7246    NULL    true    8       NULL
+7238   "pi"    "int"   32      0       7246    NULL    true    9       NULL
+7239   "pl"    "clob"  0       0       7246    NULL    true    10      NULL
+7240   "pu"    "clob"  0       0       7246    NULL    true    11      NULL
+7241   "sm"    "clob"  0       0       7246    NULL    true    12      NULL
+7248   "id"    "clob"  0       0       7258    NULL    false   0       NULL
+7249   "file_id"       "bigint"        64      0       7258    NULL    false   
1       NULL
+7250   "pn"    "clob"  0       0       7258    NULL    true    2       NULL
+7251   "cl"    "clob"  0       0       7258    NULL    true    3       NULL
+7252   "pp"    "clob"  0       0       7258    NULL    true    4       NULL
+7253   "vn"    "clob"  0       0       7258    NULL    true    5       NULL
+7260   "qname" "clob"  0       0       7271    NULL    false   0       NULL
+7261   "flag"  "smallint"      16      0       7271    NULL    false   1       
NULL
+7262   "rname" "clob"  0       0       7271    NULL    false   2       NULL
+7263   "pos"   "int"   32      0       7271    NULL    false   3       NULL
+7264   "mapq"  "smallint"      16      0       7271    NULL    false   4       
NULL
+7265   "cigar" "clob"  0       0       7271    NULL    false   5       NULL
+7266   "rnext" "clob"  0       0       7271    NULL    false   6       NULL
+7267   "pnext" "int"   32      0       7271    NULL    false   7       NULL
+7268   "tlen"  "int"   32      0       7271    NULL    false   8       NULL
+7269   "seq"   "clob"  0       0       7271    NULL    false   9       NULL
+7270   "qual"  "clob"  0       0       7271    NULL    false   10      NULL
+7352   "function_id"   "int"   32      0       7353    NULL    true    0       
NULL
 COMMIT;
 START TRANSACTION;
 CREATE TABLE "sys"."_tables" (
@@ -1805,13 +1805,13 @@ 6992    "storage"       2000    "create view sys.""s
 7004   "storagemodelinput"     2000    NULL    0       true    0       0
 7052   "storagemodel"  2000    "create view sys.storagemodel as select * from 
sys.storagemodel();"     1       true    0       0
 7062   "tablestoragemodel"     2000    "-- A summary of the table storage 
requirement is is available as a table view.\n-- The auxiliary column denotes 
the maximum space if all non-sorted columns\n-- would be augmented with a hash 
(rare situation)\ncreate view sys.tablestoragemodel\nas select 
""schema"",""table"",max(count) as ""count"",\n\tsum(columnsize) as 
columnsize,\n\tsum(heapsize) as heapsize,\n\tsum(hashes) as 
hashes,\n\tsum(imprints) as imprints,\n\tsum(case when sorted = false then 8 * 
count else 0 end) as auxiliary\nfrom sys.storagemodel() group by 
""schema"",""table"";"   1       true    0       0
-7114   "statistics"    2000    NULL    0       true    0       0
-7212   "files" 7161    NULL    0       true    0       0
-7225   "sq"    7161    NULL    0       true    0       0
-7244   "rg"    7161    NULL    0       true    0       0
-7256   "pg"    7161    NULL    0       true    0       0
-7269   "export"        7161    NULL    0       true    0       0
-7351   "systemfunctions"       2000    NULL    0       true    0       0
+7116   "statistics"    2000    NULL    0       true    0       0
+7214   "files" 7163    NULL    0       true    0       0
+7227   "sq"    7163    NULL    0       true    0       0
+7246   "rg"    7163    NULL    0       true    0       0
+7258   "pg"    7163    NULL    0       true    0       0
+7271   "export"        7163    NULL    0       true    0       0
+7353   "systemfunctions"       2000    NULL    0       true    0       0
 COMMIT;
 START TRANSACTION;
 CREATE TABLE "sys"."args" (
@@ -1824,7 +1824,7 @@ CREATE TABLE "sys"."args" (
        "inout"       TINYINT,
        "number"      INTEGER
 );
-COPY 4252 RECORDS INTO "sys"."args" FROM stdin USING DELIMITERS '\t','\n','"';
+COPY 4254 RECORDS INTO "sys"."args" FROM stdin USING DELIMITERS '\t','\n','"';
 2155   30      "res_0" "oid"   63      0       0       0
 2156   30      "arg_1" "wrd"   64      0       1       1
 2157   31      "res_0" "oid"   63      0       0       0
@@ -5967,116 +5967,118 @@ 7084  7075    "compressionscheme"     "clob"  0       0
 7087   7086    "technique"     "clob"  0       0       0       0
 7088   7086    "outputsize"    "bigint"        64      0       0       1
 7089   7086    "factor"        "double"        53      0       0       2
-7090   7086    "sch"   "clob"  0       0       1       3
-7091   7086    "tbl"   "clob"  0       0       1       4
-7092   7086    "col"   "clob"  0       0       1       5
-7095   7094    "technique"     "clob"  0       0       0       0
-7096   7094    "outputsize"    "bigint"        64      0       0       1
-7097   7094    "factor"        "double"        53      0       0       2
-7098   7094    "sch"   "clob"  0       0       1       3
-7099   7094    "tbl"   "clob"  0       0       1       4
-7100   7094    "col"   "clob"  0       0       1       5
-7101   7094    "compressions"  "clob"  0       0       1       6
-7117   7116    "MinMax"        "int"   32      0       1       0
-7118   7116    "sample"        "bigint"        64      0       1       1
-7121   7120    "MinMax"        "int"   32      0       1       0
-7122   7120    "sample"        "bigint"        64      0       1       1
-7123   7120    "sch"   "clob"  0       0       1       2
-7126   7125    "MinMax"        "int"   32      0       1       0
-7127   7125    "sample"        "bigint"        64      0       1       1
-7128   7125    "sch"   "clob"  0       0       1       2
-7129   7125    "tbl"   "clob"  0       0       1       3
-7132   7131    "MinMax"        "int"   32      0       1       0
-7133   7131    "sample"        "bigint"        64      0       1       1
-7134   7131    "sch"   "clob"  0       0       1       2
-7135   7131    "tbl"   "clob"  0       0       1       3
-7136   7131    "col"   "clob"  0       0       1       4
-7139   7138    "result"        "clob"  0       0       0       0
-7140   7138    "src"   "clob"  0       0       1       1
-7143   7142    "result"        "smallint"      16      0       0       0
-7144   7142    "one"   "tinyint"       8       0       1       1
-7145   7142    "two"   "tinyint"       8       0       1       2
-7148   7147    "result"        "int"   32      0       0       0
-7149   7147    "one"   "smallint"      16      0       1       1
-7150   7147    "two"   "smallint"      16      0       1       2
-7153   7152    "result"        "bigint"        64      0       0       0
-7154   7152    "one"   "int"   32      0       1       1
-7155   7152    "two"   "int"   32      0       1       2
-7158   7157    "result"        "hugeint"       128     0       0       0
-7159   7157    "one"   "bigint"        64      0       1       1
-7160   7157    "two"   "bigint"        64      0       1       2
-7164   7163    "bam_repos"     "clob"  0       0       1       0
-7165   7163    "dbschema"      "smallint"      16      0       1       1
-7166   7163    "nr_threads"    "smallint"      16      0       1       2
-7169   7168    "bam_files"     "clob"  0       0       1       0
-7170   7168    "dbschema"      "smallint"      16      0       1       1
-7171   7168    "nr_threads"    "smallint"      16      0       1       2
-7174   7173    "bam_file"      "clob"  0       0       1       0
-7175   7173    "dbschema"      "smallint"      16      0       1       1
-7178   7177    "file_id"       "bigint"        64      0       1       0
-7179   7177    "dbschema"      "smallint"      16      0       1       1
-7182   7181    "result"        "boolean"       1       0       0       0
-7183   7181    "flag"  "smallint"      16      0       1       1
-7184   7181    "name"  "clob"  0       0       1       2
-7187   7186    "result"        "clob"  0       0       0       0
-7188   7186    "seq"   "clob"  0       0       1       1
-7191   7190    "result"        "clob"  0       0       0       0
-7192   7190    "qual"  "clob"  0       0       1       1
-7195   7194    "result"        "int"   32      0       0       0
-7196   7194    "cigar" "clob"  0       0       1       1
-7199   7198    "output_path"   "clob"  0       0       1       0
-7202   7201    "output_path"   "clob"  0       0       1       0
-7272   7271    "value" "tinyint"       8       0       0       0
-7273   7271    "first" "tinyint"       8       0       1       1
-7274   7271    "last"  "tinyint"       8       0       1       2
-7277   7276    "value" "tinyint"       8       0       0       0
-7278   7276    "first" "tinyint"       8       0       1       1
-7279   7276    "last"  "tinyint"       8       0       1       2
-7280   7276    "stepsize"      "tinyint"       8       0       1       3
-7283   7282    "value" "smallint"      16      0       0       0
-7284   7282    "first" "smallint"      16      0       1       1
-7285   7282    "last"  "smallint"      16      0       1       2
-7288   7287    "value" "smallint"      16      0       0       0
-7289   7287    "first" "smallint"      16      0       1       1
-7290   7287    "last"  "smallint"      16      0       1       2
-7291   7287    "stepsize"      "smallint"      16      0       1       3
-7294   7293    "value" "int"   32      0       0       0
-7295   7293    "first" "int"   32      0       1       1
-7296   7293    "last"  "int"   32      0       1       2
-7299   7298    "value" "int"   32      0       0       0
-7300   7298    "first" "int"   32      0       1       1
-7301   7298    "last"  "int"   32      0       1       2
-7302   7298    "stepsize"      "int"   32      0       1       3
-7305   7304    "value" "bigint"        64      0       0       0
-7306   7304    "first" "bigint"        64      0       1       1
-7307   7304    "last"  "bigint"        64      0       1       2
-7310   7309    "value" "bigint"        64      0       0       0
-7311   7309    "first" "bigint"        64      0       1       1
-7312   7309    "last"  "bigint"        64      0       1       2
-7313   7309    "stepsize"      "bigint"        64      0       1       3
-7316   7315    "value" "real"  24      0       0       0
-7317   7315    "first" "real"  24      0       1       1
-7318   7315    "last"  "real"  24      0       1       2
-7319   7315    "stepsize"      "real"  24      0       1       3
-7322   7321    "value" "double"        53      0       0       0
-7323   7321    "first" "double"        53      0       1       1
-7324   7321    "last"  "double"        53      0       1       2
-7325   7321    "stepsize"      "double"        53      0       1       3
-7328   7327    "value" "decimal"       10      2       0       0
-7329   7327    "first" "decimal"       10      2       1       1
-7330   7327    "last"  "decimal"       10      2       1       2
-7331   7327    "stepsize"      "decimal"       10      2       1       3
-7334   7333    "value" "timestamp"     7       0       0       0
-7335   7333    "first" "timestamp"     7       0       1       1
-7336   7333    "last"  "timestamp"     7       0       1       2
-7337   7333    "stepsize"      "sec_interval"  13      0       1       3
-7340   7339    "value" "hugeint"       128     0       0       0
-7341   7339    "first" "hugeint"       128     0       1       1
-7342   7339    "last"  "hugeint"       128     0       1       2
-7345   7344    "value" "hugeint"       128     0       0       0
-7346   7344    "first" "hugeint"       128     0       1       1
-7347   7344    "last"  "hugeint"       128     0       1       2
-7348   7344    "stepsize"      "hugeint"       128     0       1       3
+7090   7086    "run"   "bigint"        64      0       0       3
+7091   7086    "sch"   "clob"  0       0       1       4
+7092   7086    "tbl"   "clob"  0       0       1       5
+7093   7086    "col"   "clob"  0       0       1       6
+7096   7095    "technique"     "clob"  0       0       0       0
+7097   7095    "outputsize"    "bigint"        64      0       0       1
+7098   7095    "factor"        "double"        53      0       0       2
+7099   7095    "run"   "bigint"        64      0       0       3
+7100   7095    "sch"   "clob"  0       0       1       4
+7101   7095    "tbl"   "clob"  0       0       1       5
+7102   7095    "col"   "clob"  0       0       1       6
+7103   7095    "compressions"  "clob"  0       0       1       7
+7119   7118    "MinMax"        "int"   32      0       1       0
+7120   7118    "sample"        "bigint"        64      0       1       1
+7123   7122    "MinMax"        "int"   32      0       1       0
+7124   7122    "sample"        "bigint"        64      0       1       1
+7125   7122    "sch"   "clob"  0       0       1       2
+7128   7127    "MinMax"        "int"   32      0       1       0
+7129   7127    "sample"        "bigint"        64      0       1       1
+7130   7127    "sch"   "clob"  0       0       1       2
+7131   7127    "tbl"   "clob"  0       0       1       3
+7134   7133    "MinMax"        "int"   32      0       1       0
+7135   7133    "sample"        "bigint"        64      0       1       1
+7136   7133    "sch"   "clob"  0       0       1       2
+7137   7133    "tbl"   "clob"  0       0       1       3
+7138   7133    "col"   "clob"  0       0       1       4
+7141   7140    "result"        "clob"  0       0       0       0
+7142   7140    "src"   "clob"  0       0       1       1
+7145   7144    "result"        "smallint"      16      0       0       0
+7146   7144    "one"   "tinyint"       8       0       1       1
+7147   7144    "two"   "tinyint"       8       0       1       2
+7150   7149    "result"        "int"   32      0       0       0
+7151   7149    "one"   "smallint"      16      0       1       1
+7152   7149    "two"   "smallint"      16      0       1       2
+7155   7154    "result"        "bigint"        64      0       0       0
+7156   7154    "one"   "int"   32      0       1       1
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to