Changeset: 673b6b057051 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=673b6b057051
Added Files:
monetdb5/extras/jaql/Tests/group02.mal
monetdb5/extras/jaql/Tests/group02.stable.err
monetdb5/extras/jaql/Tests/group02.stable.out
Modified Files:
monetdb5/extras/jaql/Tests/All
monetdb5/extras/jaql/Tests/join03.mal
monetdb5/extras/jaql/jaqlfunc.mal
monetdb5/extras/jaql/jaqlgencode.c
Branch: Jul2012
Log Message:
group/function: allow processing empty groups
Make sure empty lists don't disappear, but show up with special means of
a nil value. This means functions handling these need to filter the nil
values out, after collecting the elements list to produce answers for.
diffs (truncated from 444 to 300 lines):
diff --git a/monetdb5/extras/jaql/Tests/All b/monetdb5/extras/jaql/Tests/All
--- a/monetdb5/extras/jaql/Tests/All
+++ b/monetdb5/extras/jaql/Tests/All
@@ -13,6 +13,7 @@ HAVE_JAQL?join00
HAVE_JAQL?join02
HAVE_JAQL?join03
HAVE_JAQL?group00
+HAVE_JAQL?group02
HAVE_JAQL?sort00
HAVE_JAQL?top00
HAVE_JAQL?variable00
diff --git a/monetdb5/extras/jaql/Tests/group02.mal
b/monetdb5/extras/jaql/Tests/group02.mal
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/jaql/Tests/group02.mal
@@ -0,0 +1,17 @@
+# various bugs in the group(functions) code
+
+#disabled: works in progress
+
+# should return 0
+jaql.x("[]->group into count($);");
+# only works when the groupkey exists for all elements
+jaql.x("[{\"a\": 1}, {\"a\": 2}]->group by d = $.a into count($);");
+#jaql.x("explain [{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into
count($);");
+# should return 0 for groups with no matching values
+#jaql.x("[{\"a\": 1, \"b\": 5}, {\"a\": 2}]->group by d = $.a into {d,
\"s\":sum($[*].b)};");
+#jaql.x("[{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into {d,
\"c\":count($[*].a)};");
+#jaql.x("[{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into {d,
\"c\":count($[*].b)};");
+
+jaql.x("[[],[1,2]] -> transform {\"cnt\": count($)};");
+jaql.x("[[],[1,2]] -> transform {\"sum\": sum($)};");
+jaql.x("[[],[1,2]] -> transform {\"avg\": avg($)};");
diff --git a/monetdb5/extras/jaql/Tests/group02.stable.err
b/monetdb5/extras/jaql/Tests/group02.stable.err
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/jaql/Tests/group02.stable.err
@@ -0,0 +1,31 @@
+stderr of test 'group02` in directory 'extras/jaql` itself:
+
+
+# 21:11:05 >
+# 21:11:05 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set"
"gdk_dbfarm=/net/sofia.ins.cwi.nl/export/scratch1/fabian/tmp/mtest-Jul2012-sofia.ins.cwi.nl/five/dbfarm"
"--set" "mapi_open=true" "--set" "mapi_port=33103" "--set" "monet_prompt="
"--trace" "--forcemito" "--set" "mal_listing=2" "--dbname=mTests_extras_jaql"
"group02.mal"
+# 21:11:05 >
+
+# builtin opt gdk_dbname = demo
+# builtin opt gdk_dbfarm =
/ufs/fabian/scratch/ssd/monetdb/Jul2012/program-x86_64/var/lib/monetdb5/dbfarm
+# builtin opt gdk_debug = 0
+# builtin opt gdk_alloc_map = no
+# builtin opt gdk_vmtrim = yes
+# builtin opt monet_prompt = >
+# builtin opt monet_daemon = no
+# builtin opt mapi_port = 50000
+# builtin opt mapi_open = false
+# builtin opt mapi_autosense = false
+# builtin opt sql_optimizer = default_pipe
+# builtin opt sql_debug = 0
+# cmdline opt gdk_nr_threads = 0
+# cmdline opt gdk_dbfarm =
/net/sofia.ins.cwi.nl/export/scratch1/fabian/tmp/mtest-Jul2012-sofia.ins.cwi.nl/five/dbfarm
+# cmdline opt mapi_open = true
+# cmdline opt mapi_port = 33103
+# cmdline opt monet_prompt =
+# cmdline opt mal_listing = 2
+# cmdline opt gdk_dbname = mTests_extras_jaql
+
+# 21:11:06 >
+# 21:11:06 > "Done."
+# 21:11:06 >
+
diff --git a/monetdb5/extras/jaql/Tests/group02.stable.out
b/monetdb5/extras/jaql/Tests/group02.stable.out
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/jaql/Tests/group02.stable.out
@@ -0,0 +1,45 @@
+stdout of test 'group02` in directory 'extras/jaql` itself:
+
+
+# 21:11:05 >
+# 21:11:05 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set"
"gdk_dbfarm=/net/sofia.ins.cwi.nl/export/scratch1/fabian/tmp/mtest-Jul2012-sofia.ins.cwi.nl/five/dbfarm"
"--set" "mapi_open=true" "--set" "mapi_port=33103" "--set" "monet_prompt="
"--trace" "--forcemito" "--set" "mal_listing=2" "--dbname=mTests_extras_jaql"
"group02.mal"
+# 21:11:05 >
+
+# MonetDB 5 server v11.11.6 "Jul2012-a04c99a2b437"
+# Serving database 'mTests_extras_jaql', using 8 threads
+# Compiled for x86_64-pc-linux-gnu/64bit with 64bit OIDs dynamically linked
+# Found 15.629 GiB available main-memory.
+# Copyright (c) 1993-July 2008 CWI.
+# Copyright (c) August 2008-2012 MonetDB B.V., all rights reserved
+# Visit http://www.monetdb.org/ for further information
+# Listening for connection requests on mapi:monetdb://sofia.ins.cwi.nl:33103/
+# MonetDB/GIS module loaded
+# MonetDB/JAQL module loaded
+# MonetDB/SQL module loaded
+# MonetDB/DataCell loaded
+function user.main():void;
+# various bugs in the group(functions) code
+#disabled: works in progress
+# should return 0
+ jaql.x("[]->group into count($);");
+# only works when the groupkey exists for all elements
+ jaql.x("[{\"a\": 1}, {\"a\": 2}]->group by d = $.a into count($);");
+#jaql.x("explain [{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into
count($);");
+# should return 0 for groups with no matching values
+#jaql.x("[{\"a\": 1, \"b\": 5}, {\"a\": 2}]->group by d = $.a into {d,
\"s\":sum($[*].b)};");
+#jaql.x("[{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into {d,
\"c\":count($[*].a)};");
+#jaql.x("[{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into {d,
\"c\":count($[*].b)};");
+ jaql.x("[[],[1,2]] -> transform {\"cnt\": count($)};");
+ jaql.x("[[],[1,2]] -> transform {\"sum\": sum($)};");
+ jaql.x("[[],[1,2]] -> transform {\"avg\": avg($)};");
+end main;
+[ 0 ]
+[ 1, 1 ]
+[ { "cnt": 0 }, { "cnt": 2 } ]
+[ { "sum": null }, { "sum": 3 } ]
+[ { "avg": null }, { "avg": 1.500000 } ]
+
+# 21:11:06 >
+# 21:11:06 > "Done."
+# 21:11:06 >
+
diff --git a/monetdb5/extras/jaql/Tests/join03.mal
b/monetdb5/extras/jaql/Tests/join03.mal
--- a/monetdb5/extras/jaql/Tests/join03.mal
+++ b/monetdb5/extras/jaql/Tests/join03.mal
@@ -1,8 +1,10 @@
# identified a bug in preserve, preserving all inputs lead to no
-# preservation at all
+# preservation at all, which in turn yielded in unexpected results for
+# the co-group function
jaql.x("A=[1,2];");
jaql.x("B=[{\"a\":1,\"b\":4},{\"a\":1,\"b\":5}];");
jaql.x("join preserve A, B where A == B.a into {B.*, A};");
jaql.x("join preserve A, preserve B where A == B.a into {B.*, A};");
jaql.x("join A, preserve B where A == B.a into {B.*, A};");
+jaql.x("group A by g = $ as ga, B by g = $.a as gb into { \"grp\" : g,
\"cnt\": count(gb) };");
diff --git a/monetdb5/extras/jaql/jaqlfunc.mal
b/monetdb5/extras/jaql/jaqlfunc.mal
--- a/monetdb5/extras/jaql/jaqlfunc.mal
+++ b/monetdb5/extras/jaql/jaqlfunc.mal
@@ -92,11 +92,15 @@ end shred
# perform sum over the input array
function sum(v:bat[:oid,:lng]):bat[:oid,:lng];
k := algebra.kunique(v);
+ p := algebra.uselect(v, nil:lng);
+ v := algebra.kdifference(v, p);
r:bat[:oid,:lng] := aggr.sum(v, k);
return r;
end sum;
function sum(v:bat[:oid,:dbl]):bat[:oid,:dbl];
k := algebra.kunique(v);
+ p := algebra.uselect(v, nil:dbl);
+ v := algebra.kdifference(v, p);
r:bat[:oid,:dbl] := aggr.sum(v, k);
return r;
end sum;
@@ -104,11 +108,15 @@ end sum;
# perform average over the input array
function avg(v:bat[:oid,:lng]):bat[:oid,:dbl];
k := algebra.kunique(v);
+ p := algebra.uselect(v, nil:lng);
+ v := algebra.kdifference(v, p);
r := aggr.avg(v, k);
return r;
end avg;
function avg(v:bat[:oid,:dbl]):bat[:oid,:dbl];
k := algebra.kunique(v);
+ p := algebra.uselect(v, nil:dbl);
+ v := algebra.kdifference(v, p);
r := aggr.avg(v, k);
return r;
end avg;
@@ -116,6 +124,7 @@ end avg;
# perform count over the input array
function count(v:bat[:oid,:any]):bat[:oid,:lng];
k := algebra.kunique(v);
+ v := algebra.antiuselect(v, nil);
x := aggr.count(v, k, false);
r := batcalc.lng(x);
return r;
diff --git a/monetdb5/extras/jaql/jaqlgencode.c
b/monetdb5/extras/jaql/jaqlgencode.c
--- a/monetdb5/extras/jaql/jaqlgencode.c
+++ b/monetdb5/extras/jaql/jaqlgencode.c
@@ -4441,6 +4441,14 @@ dumpvariabletransformation(jc *j, Client
return a;
} else {
InstrPtr r;
+ int k = 0, l = 0, re = 0, rf = 0;
+ int lvn[] = {j->j2, j->j3, j->j4, -1};
+ int lvx[] = {2, 3, 4};
+ int lvt[] = {TYPE_str, TYPE_lng, TYPE_dbl};
+ char *lvm[] = {"str", "lng", "dbl"};
+ char lvT[] = {'s', 'i', 'd'};
+ int i;
+
r = newInstruction(mb, ASSIGNsymbol);
a = newTmpVariable(mb, newBatType(TYPE_oid,
TYPE_any));
r = pushReturn(mb, r, a);
@@ -4471,142 +4479,117 @@ dumpvariabletransformation(jc *j, Client
q = newInstruction(mb, ASSIGNsymbol);
q = pushReturn(mb, q, newTmpVariable(mb, e));
q = pushNil(mb, q, e);
- e = getArg(q, 0);
- pushInstruction(mb, q);
-
- f = newBatType(TYPE_oid, TYPE_lng);
- q = newInstruction(mb, ASSIGNsymbol);
- q = pushReturn(mb, q, newTmpVariable(mb, f));
- q = pushNil(mb, q, f);
- f = getArg(q, 0);
- pushInstruction(mb, q);
- q = newInstruction(mb, ASSIGNsymbol);
- setModuleId(q, calcRef);
- setFunctionId(q, putName("==", 2));
- q->barrier = BARRIERsymbol;
- q = pushReturn(mb, q, newTmpVariable(mb,
TYPE_any));
- q = pushArgument(mb, q, d);
- q = pushStr(mb, q, "lng");
- g = getArg(q, 0);
- pushInstruction(mb, q);
- q = newInstruction(mb, ASSIGNsymbol);
- setModuleId(q, putName("jaql", 4));
- setFunctionId(q, putName("cast", 4));
- q = pushReturn(mb, q, f);
- q = pushArgument(mb, q, c);
- q = pushType(mb, q, TYPE_lng);
- pushInstruction(mb, q);
- q = newInstruction(mb, ASSIGNsymbol);
- setModuleId(q, algebraRef);
- setFunctionId(q, projectRef);
- q = pushReturn(mb, q, e);
- q = pushArgument(mb, q, f);
- q = pushBte(mb, q, 'i');
- pushInstruction(mb, q);
- dumpbatwritable(j, mb, 3);
+ re = getArg(q, 0);
+ pushInstruction(mb, q);
+
+ for (i = 0; lvn[i] != -1; i++) {
+ f = newBatType(TYPE_oid, lvt[i]);
+ q = newInstruction(mb, ASSIGNsymbol);
+ q = pushReturn(mb, q,
newTmpVariable(mb, f));
+ q = pushNil(mb, q, f);
+ rf = getArg(q, 0);
+ pushInstruction(mb, q);
+ q = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(q, calcRef);
+ setFunctionId(q, putName("==", 2));
+ q->barrier = BARRIERsymbol;
+ q = pushReturn(mb, q,
newTmpVariable(mb, TYPE_any));
+ q = pushArgument(mb, q, d);
+ q = pushStr(mb, q, lvm[i]);
+ g = getArg(q, 0);
+ pushInstruction(mb, q);
+ q = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(q, putName("jaql", 4));
+ setFunctionId(q, putName("cast", 4));
+ q = pushReturn(mb, q,
newTmpVariable(mb, TYPE_any));
+ q = pushArgument(mb, q, c);
+ q = pushType(mb, q, lvt[i]);
+ f = getArg(q, 0);
+ pushInstruction(mb, q);
+ q = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(q, algebraRef);
+ setFunctionId(q, projectRef);
+ q = pushReturn(mb, q,
newTmpVariable(mb, TYPE_any));
+ q = pushArgument(mb, q, f);
+ q = pushBte(mb, q, lvT[i]);
+ e = getArg(q, 0);
+ pushInstruction(mb, q);
+ /* replace nil by json null */
+ q = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(q, algebraRef);
+ setFunctionId(q, uselectRef);
+ q = pushReturn(mb, q,
newTmpVariable(mb, TYPE_any));
+ q = pushArgument(mb, q, f);
+ q = pushNil(mb, q, lvt[i]);
+ k = getArg(q, 0);
+ pushInstruction(mb, q);
+ q = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(q, algebraRef);
+ setFunctionId(q, projectRef);
+ q = pushReturn(mb, q,
newTmpVariable(mb, TYPE_any));
+ q = pushArgument(mb, q, k);
+ q = pushBte(mb, q, 'n');
+ l = getArg(q, 0);
+ pushInstruction(mb, q);
+ q = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(q, algebraRef);
+ setFunctionId(q, putName("kdifference",
11));
+ q = pushReturn(mb, q,
newTmpVariable(mb, TYPE_any));
+ q = pushArgument(mb, q, e);
+ q = pushArgument(mb, q, l);
+ e = getArg(q, 0);
+ pushInstruction(mb, q);
+ q = newInstruction(mb, ASSIGNsymbol);
+ setModuleId(q, batRef);
+ setFunctionId(q, insertRef);
+ q = pushReturn(mb, q,
newTmpVariable(mb, TYPE_any));
+ q = pushArgument(mb, q, e);
+ q = pushArgument(mb, q, l);
+ e = getArg(q, 0);
+ pushInstruction(mb, q);
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list