Changeset: 67bddbf2ee39 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=67bddbf2ee39
Modified Files:
gdk/gdk_sample.c
sql/backends/monet5/Tests/sample00.sql
sql/backends/monet5/Tests/sample01.sql
sql/backends/monet5/Tests/sample02.sql
sql/backends/monet5/Tests/sample03.sql
sql/server/rel_optimizer.c
sql/server/rel_rel.c
sql/server/rel_select.c
Branch: stratified_sampling
Log Message:
Don't optimize away columns that we need.
diffs (198 lines):
diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c
--- a/gdk/gdk_sample.c
+++ b/gdk/gdk_sample.c
@@ -255,6 +255,8 @@ BATweightedsample(BAT *b, BUN n, BAT *w)
oid minoid = b->hseqbase;
+ ERRORcheck(n > BATcount(b), "BATsample: Sample size bigger than
table!", NULL);
+
BATcheck(b, "BATsample", NULL);
BATcheck(w, "BATsample", NULL);
@@ -312,6 +314,10 @@ BATweightedsample(BAT *b, BUN n, BAT *w)
}
oids[i] = (oid)(j + minoid);
keys[i] = pow(mtwist_drand(mt_rng), 1.0 / w_ptr[j]);//TODO cast
1.0 to dbl?
+ if (keys[i] == 1) {
+ GDKerror("BATsample: weight overflow\n");
+ goto bailout;
+ }
i++;
}
diff --git a/sql/backends/monet5/Tests/sample00.sql
b/sql/backends/monet5/Tests/sample00.sql
--- a/sql/backends/monet5/Tests/sample00.sql
+++ b/sql/backends/monet5/Tests/sample00.sql
@@ -2,7 +2,6 @@
START TRANSACTION;
CREATE TABLE vals(w DOUBLE, value INTEGER);
-
INSERT INTO vals VALUES (1, 100), (0, 50);
SELECT * FROM vals SAMPLE 1 USING WEIGHTS w;
@@ -15,7 +14,6 @@ ROLLBACK;
START TRANSACTION;
CREATE TABLE vals(w DECIMAL(10,3), value INTEGER);
-
INSERT INTO vals VALUES (10.77, 100), (0, 50);
SELECT * FROM vals SAMPLE 1 USING WEIGHTS w;
diff --git a/sql/backends/monet5/Tests/sample01.sql
b/sql/backends/monet5/Tests/sample01.sql
--- a/sql/backends/monet5/Tests/sample01.sql
+++ b/sql/backends/monet5/Tests/sample01.sql
@@ -5,7 +5,8 @@ CREATE TABLE vals(id DOUBLE, type INTEGE
INSERT INTO vals VALUES (1, 100), (0, 50);
-SELECT type, COUNT(*) FROM vals GROUP BY type SAMPLE 1 USING WEIGHTS cast(id
AS double);
+plan SELECT type FROM vals GROUP BY type SAMPLE 1 USING WEIGHTS id;
+SELECT type FROM vals GROUP BY type SAMPLE 1 USING WEIGHTS id;
ROLLBACK;
diff --git a/sql/backends/monet5/Tests/sample02.sql
b/sql/backends/monet5/Tests/sample02.sql
--- a/sql/backends/monet5/Tests/sample02.sql
+++ b/sql/backends/monet5/Tests/sample02.sql
@@ -6,7 +6,7 @@ CREATE TABLE vals(id DOUBLE, type INTEGE
INSERT INTO vals VALUES (1, 100), (0, 50);
-SELECT SUM(id) FROM vals SAMPLE 1 USING WEIGHTS cast(type AS double);
+SELECT SUM(id) FROM vals SAMPLE 1 USING WEIGHTS id;
ROLLBACK;
diff --git a/sql/backends/monet5/Tests/sample03.sql
b/sql/backends/monet5/Tests/sample03.sql
--- a/sql/backends/monet5/Tests/sample03.sql
+++ b/sql/backends/monet5/Tests/sample03.sql
@@ -8,6 +8,15 @@ INSERT INTO vals VALUES (1, 100), (-1, 5
SELECT * FROM vals SAMPLE 1 USING WEIGHTS w;
+ROLLBACK;
+
+# fewer weights are non-zero than size of sample
+START TRANSACTION;
+
+CREATE TABLE vals(w DOUBLE, value INTEGER);
+INSERT INTO vals VALUES (1, 100), (0, 50);
+
+SELECT * FROM vals SAMPLE 2 USING WEIGHTS w;
ROLLBACK;
@@ -20,3 +29,45 @@ INSERT INTO vals VALUES ('#1 sample', 10
SELECT * FROM vals SAMPLE 1 USING WEIGHTS w;
ROLLBACK;
+
+# sample size bigger than amount of elements
+START TRANSACTION;
+
+CREATE TABLE vals(w DOUBLE, value INTEGER);
+INSERT INTO vals VALUES (1, 100), (1, 50);
+
+SELECT * FROM vals SAMPLE 10 USING WEIGHTS w;
+
+ROLLBACK;
+
+# weight overflow
+START TRANSACTION;
+
+CREATE TABLE vals(w DOUBLE, value INTEGER);
+INSERT INTO vals VALUES (1.7976931348623157E+306, 100), (0, 50);
+
+SELECT * FROM vals SAMPLE 1 USING WEIGHTS w;
+
+ROLLBACK;
+
+# intigus maximus
+START TRANSACTION;
+
+CREATE TABLE vals(w DOUBLE, value INTEGER);
+INSERT INTO vals VALUES (1, 100), (0, 50);
+
+
+SELECT * FROM vals SAMPLE 18446744073709551617 USING WEIGHTS w;
+
+ROLLBACK;
+
+# biggus samplus
+START TRANSACTION;
+
+CREATE TABLE vals(w DOUBLE, value INTEGER);
+INSERT INTO vals VALUES (1, 100), (0, 50);
+
+
+SELECT * FROM vals SAMPLE 2147483648 USING WEIGHTS w;
+
+ROLLBACK;
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -5783,8 +5783,13 @@ rel_mark_used(mvc *sql, sql_rel *rel, in
}
break;
+ case op_sample:
+ if (rel->exps->cnt == 2) {
+ // weighted sample
+ exps_mark_used(sql->sa, rel, rel->l);
+ }
+ /* fall through */
case op_topn:
- case op_sample:
if (proj) {
rel = rel ->l;
rel_mark_used(sql, rel, proj);
diff --git a/sql/server/rel_rel.c b/sql/server/rel_rel.c
--- a/sql/server/rel_rel.c
+++ b/sql/server/rel_rel.c
@@ -401,7 +401,7 @@ rel_crossproduct(sql_allocator *sa, sql_
}
sql_rel *
-rel_topn(sql_allocator *sa, sql_rel *l, list *exps )
+rel_topn(sql_allocator *sa, sql_rel *l, list *exps)
{
sql_rel *rel = rel_create(sa);
@@ -415,7 +415,7 @@ rel_topn(sql_allocator *sa, sql_rel *l,
}
sql_rel *
-rel_sample(sql_allocator *sa, sql_rel *l, list *exps )
+rel_sample(sql_allocator *sa, sql_rel *l, list *exps)
{
sql_rel *rel = rel_create(sa);
diff --git a/sql/server/rel_select.c b/sql/server/rel_select.c
--- a/sql/server/rel_select.c
+++ b/sql/server/rel_select.c
@@ -4716,6 +4716,11 @@ rel_select_exp(mvc *sql, sql_rel *rel, S
exp_kind iek = {type_value, card_column, FALSE};
symbol* weights = l->h->next->data.sym;
sql_exp* weights_exp = rel_value_exp(sql, &rel,
weights, 0, iek);
+ if (!weights_exp) {
+ if (sql->errstr[0] == 0)
+ return sql_error(sql, 02, "Samplu
failu");
+ return NULL;
+ }
if(l->h->type == type_lng) {
lng sample_size = l->h->data.l_val;
@@ -4737,6 +4742,11 @@ rel_select_exp(mvc *sql, sql_rel *rel, S
append(exps, o);
}
rel = rel_sample(sql->sa, rel, exps);
+ if (!rel) {
+ if (sql->errstr[0] == 0)
+ return sql_error(sql, 02, "Samplu failu");
+ return NULL;
+ }
}
if (rel) {
if (rel && sn->groupby) {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list