Changeset: 673b6b057051 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=673b6b057051
Added Files:
        monetdb5/extras/jaql/Tests/group02.mal
        monetdb5/extras/jaql/Tests/group02.stable.err
        monetdb5/extras/jaql/Tests/group02.stable.out
Modified Files:
        monetdb5/extras/jaql/Tests/All
        monetdb5/extras/jaql/Tests/join03.mal
        monetdb5/extras/jaql/jaqlfunc.mal
        monetdb5/extras/jaql/jaqlgencode.c
Branch: Jul2012
Log Message:

group/function: allow processing empty groups

Make sure empty lists don't disappear, but show up with special means of
a nil value.  This means functions handling these need to filter the nil
values out, after collecting the elements list to produce answers for.


diffs (truncated from 444 to 300 lines):

diff --git a/monetdb5/extras/jaql/Tests/All b/monetdb5/extras/jaql/Tests/All
--- a/monetdb5/extras/jaql/Tests/All
+++ b/monetdb5/extras/jaql/Tests/All
@@ -13,6 +13,7 @@ HAVE_JAQL?join00
 HAVE_JAQL?join02
 HAVE_JAQL?join03
 HAVE_JAQL?group00
+HAVE_JAQL?group02
 HAVE_JAQL?sort00
 HAVE_JAQL?top00
 HAVE_JAQL?variable00
diff --git a/monetdb5/extras/jaql/Tests/group02.mal 
b/monetdb5/extras/jaql/Tests/group02.mal
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/jaql/Tests/group02.mal
@@ -0,0 +1,17 @@
+# various bugs in the group(functions) code
+
+#disabled: works in progress
+
+# should return 0
+jaql.x("[]->group into count($);");
+# only works when the groupkey exists for all elements
+jaql.x("[{\"a\": 1}, {\"a\": 2}]->group by d = $.a into count($);");
+#jaql.x("explain [{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into 
count($);");
+# should return 0 for groups with no matching values
+#jaql.x("[{\"a\": 1, \"b\": 5}, {\"a\": 2}]->group by d = $.a into {d, 
\"s\":sum($[*].b)};");
+#jaql.x("[{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into {d, 
\"c\":count($[*].a)};");
+#jaql.x("[{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into {d, 
\"c\":count($[*].b)};");
+
+jaql.x("[[],[1,2]] -> transform {\"cnt\": count($)};");
+jaql.x("[[],[1,2]] -> transform {\"sum\": sum($)};");
+jaql.x("[[],[1,2]] -> transform {\"avg\": avg($)};");
diff --git a/monetdb5/extras/jaql/Tests/group02.stable.err 
b/monetdb5/extras/jaql/Tests/group02.stable.err
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/jaql/Tests/group02.stable.err
@@ -0,0 +1,31 @@
+stderr of test 'group02` in directory 'extras/jaql` itself:
+
+
+# 21:11:05 >  
+# 21:11:05 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"gdk_dbfarm=/net/sofia.ins.cwi.nl/export/scratch1/fabian/tmp/mtest-Jul2012-sofia.ins.cwi.nl/five/dbfarm"
 "--set" "mapi_open=true" "--set" "mapi_port=33103" "--set" "monet_prompt=" 
"--trace" "--forcemito" "--set" "mal_listing=2" "--dbname=mTests_extras_jaql" 
"group02.mal"
+# 21:11:05 >  
+
+# builtin opt  gdk_dbname = demo
+# builtin opt  gdk_dbfarm = 
/ufs/fabian/scratch/ssd/monetdb/Jul2012/program-x86_64/var/lib/monetdb5/dbfarm
+# builtin opt  gdk_debug = 0
+# builtin opt  gdk_alloc_map = no
+# builtin opt  gdk_vmtrim = yes
+# builtin opt  monet_prompt = >
+# builtin opt  monet_daemon = no
+# builtin opt  mapi_port = 50000
+# builtin opt  mapi_open = false
+# builtin opt  mapi_autosense = false
+# builtin opt  sql_optimizer = default_pipe
+# builtin opt  sql_debug = 0
+# cmdline opt  gdk_nr_threads = 0
+# cmdline opt  gdk_dbfarm = 
/net/sofia.ins.cwi.nl/export/scratch1/fabian/tmp/mtest-Jul2012-sofia.ins.cwi.nl/five/dbfarm
+# cmdline opt  mapi_open = true
+# cmdline opt  mapi_port = 33103
+# cmdline opt  monet_prompt = 
+# cmdline opt  mal_listing = 2
+# cmdline opt  gdk_dbname = mTests_extras_jaql
+
+# 21:11:06 >  
+# 21:11:06 >  "Done."
+# 21:11:06 >  
+
diff --git a/monetdb5/extras/jaql/Tests/group02.stable.out 
b/monetdb5/extras/jaql/Tests/group02.stable.out
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/jaql/Tests/group02.stable.out
@@ -0,0 +1,45 @@
+stdout of test 'group02` in directory 'extras/jaql` itself:
+
+
+# 21:11:05 >  
+# 21:11:05 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"gdk_dbfarm=/net/sofia.ins.cwi.nl/export/scratch1/fabian/tmp/mtest-Jul2012-sofia.ins.cwi.nl/five/dbfarm"
 "--set" "mapi_open=true" "--set" "mapi_port=33103" "--set" "monet_prompt=" 
"--trace" "--forcemito" "--set" "mal_listing=2" "--dbname=mTests_extras_jaql" 
"group02.mal"
+# 21:11:05 >  
+
+# MonetDB 5 server v11.11.6 "Jul2012-a04c99a2b437"
+# Serving database 'mTests_extras_jaql', using 8 threads
+# Compiled for x86_64-pc-linux-gnu/64bit with 64bit OIDs dynamically linked
+# Found 15.629 GiB available main-memory.
+# Copyright (c) 1993-July 2008 CWI.
+# Copyright (c) August 2008-2012 MonetDB B.V., all rights reserved
+# Visit http://www.monetdb.org/ for further information
+# Listening for connection requests on mapi:monetdb://sofia.ins.cwi.nl:33103/
+# MonetDB/GIS module loaded
+# MonetDB/JAQL module loaded
+# MonetDB/SQL module loaded
+# MonetDB/DataCell loaded
+function user.main():void;
+# various bugs in the group(functions) code 
+#disabled: works in progress 
+# should return 0 
+    jaql.x("[]->group into count($);");
+# only works when the groupkey exists for all elements 
+    jaql.x("[{\"a\": 1}, {\"a\": 2}]->group by d = $.a into count($);");
+#jaql.x("explain [{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into 
count($);"); 
+# should return 0 for groups with no matching values 
+#jaql.x("[{\"a\": 1, \"b\": 5}, {\"a\": 2}]->group by d = $.a into {d, 
\"s\":sum($[*].b)};"); 
+#jaql.x("[{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into {d, 
\"c\":count($[*].a)};"); 
+#jaql.x("[{\"a\": 1}, {\"a\": 2}, {\"b\": 1}]->group by d = $.a into {d, 
\"c\":count($[*].b)};"); 
+    jaql.x("[[],[1,2]] -> transform {\"cnt\": count($)};");
+    jaql.x("[[],[1,2]] -> transform {\"sum\": sum($)};");
+    jaql.x("[[],[1,2]] -> transform {\"avg\": avg($)};");
+end main;
+[ 0 ]
+[ 1, 1 ]
+[ { "cnt": 0 }, { "cnt": 2 } ]
+[ { "sum": null }, { "sum": 3 } ]
+[ { "avg": null }, { "avg": 1.500000 } ]
+
+# 21:11:06 >  
+# 21:11:06 >  "Done."
+# 21:11:06 >  
+
diff --git a/monetdb5/extras/jaql/Tests/join03.mal 
b/monetdb5/extras/jaql/Tests/join03.mal
--- a/monetdb5/extras/jaql/Tests/join03.mal
+++ b/monetdb5/extras/jaql/Tests/join03.mal
@@ -1,8 +1,10 @@
 # identified a bug in preserve, preserving all inputs lead to no
-# preservation at all
+# preservation at all, which in turn yielded in unexpected results for
+# the co-group function
 
 jaql.x("A=[1,2];");
 jaql.x("B=[{\"a\":1,\"b\":4},{\"a\":1,\"b\":5}];");
 jaql.x("join preserve A, B where A == B.a into {B.*, A};");
 jaql.x("join preserve A, preserve B where A == B.a into {B.*, A};");
 jaql.x("join A, preserve B where A == B.a into {B.*, A};");
+jaql.x("group A by g = $ as ga, B by g = $.a as gb into { \"grp\" : g, 
\"cnt\": count(gb) };");
diff --git a/monetdb5/extras/jaql/jaqlfunc.mal 
b/monetdb5/extras/jaql/jaqlfunc.mal
--- a/monetdb5/extras/jaql/jaqlfunc.mal
+++ b/monetdb5/extras/jaql/jaqlfunc.mal
@@ -92,11 +92,15 @@ end shred
 # perform sum over the input array
 function sum(v:bat[:oid,:lng]):bat[:oid,:lng];
        k := algebra.kunique(v);
+       p := algebra.uselect(v, nil:lng);
+       v := algebra.kdifference(v, p);
        r:bat[:oid,:lng] := aggr.sum(v, k);
        return r;
 end sum;
 function sum(v:bat[:oid,:dbl]):bat[:oid,:dbl];
        k := algebra.kunique(v);
+       p := algebra.uselect(v, nil:dbl);
+       v := algebra.kdifference(v, p);
        r:bat[:oid,:dbl] := aggr.sum(v, k);
        return r;
 end sum;
@@ -104,11 +108,15 @@ end sum;
 # perform average over the input array
 function avg(v:bat[:oid,:lng]):bat[:oid,:dbl];
        k := algebra.kunique(v);
+       p := algebra.uselect(v, nil:lng);
+       v := algebra.kdifference(v, p);
        r := aggr.avg(v, k);
        return r;
 end avg;
 function avg(v:bat[:oid,:dbl]):bat[:oid,:dbl];
        k := algebra.kunique(v);
+       p := algebra.uselect(v, nil:dbl);
+       v := algebra.kdifference(v, p);
        r := aggr.avg(v, k);
        return r;
 end avg;
@@ -116,6 +124,7 @@ end avg;
 # perform count over the input array
 function count(v:bat[:oid,:any]):bat[:oid,:lng];
        k := algebra.kunique(v);
+       v := algebra.antiuselect(v, nil);
        x := aggr.count(v, k, false);
        r := batcalc.lng(x);
        return r;
diff --git a/monetdb5/extras/jaql/jaqlgencode.c 
b/monetdb5/extras/jaql/jaqlgencode.c
--- a/monetdb5/extras/jaql/jaqlgencode.c
+++ b/monetdb5/extras/jaql/jaqlgencode.c
@@ -4441,6 +4441,14 @@ dumpvariabletransformation(jc *j, Client
                                return a;
                        } else {
                                InstrPtr r;
+                               int k = 0, l = 0, re = 0, rf = 0;
+                               int lvn[] = {j->j2, j->j3, j->j4, -1};
+                               int lvx[] = {2, 3, 4};
+                               int lvt[] = {TYPE_str, TYPE_lng, TYPE_dbl};
+                               char *lvm[] = {"str", "lng", "dbl"};
+                               char lvT[] = {'s', 'i', 'd'};
+                               int i;
+
                                r = newInstruction(mb, ASSIGNsymbol);
                                a = newTmpVariable(mb, newBatType(TYPE_oid, 
TYPE_any));
                                r = pushReturn(mb, r, a);
@@ -4471,142 +4479,117 @@ dumpvariabletransformation(jc *j, Client
                                q = newInstruction(mb, ASSIGNsymbol);
                                q = pushReturn(mb, q, newTmpVariable(mb, e));
                                q = pushNil(mb, q, e);
-                               e = getArg(q, 0);
-                               pushInstruction(mb, q);
-
-                               f = newBatType(TYPE_oid, TYPE_lng);
-                               q = newInstruction(mb, ASSIGNsymbol);
-                               q = pushReturn(mb, q, newTmpVariable(mb, f));
-                               q = pushNil(mb, q, f);
-                               f = getArg(q, 0);
-                               pushInstruction(mb, q);
-                               q = newInstruction(mb, ASSIGNsymbol);
-                               setModuleId(q, calcRef);
-                               setFunctionId(q, putName("==", 2));
-                               q->barrier = BARRIERsymbol;
-                               q = pushReturn(mb, q, newTmpVariable(mb, 
TYPE_any));
-                               q = pushArgument(mb, q, d);
-                               q = pushStr(mb, q, "lng");
-                               g = getArg(q, 0);
-                               pushInstruction(mb, q);
-                               q = newInstruction(mb, ASSIGNsymbol);
-                               setModuleId(q, putName("jaql", 4));
-                               setFunctionId(q, putName("cast", 4));
-                               q = pushReturn(mb, q, f);
-                               q = pushArgument(mb, q, c);
-                               q = pushType(mb, q, TYPE_lng);
-                               pushInstruction(mb, q);
-                               q = newInstruction(mb, ASSIGNsymbol);
-                               setModuleId(q, algebraRef);
-                               setFunctionId(q, projectRef);
-                               q = pushReturn(mb, q, e);
-                               q = pushArgument(mb, q, f);
-                               q = pushBte(mb, q, 'i');
-                               pushInstruction(mb, q);
-                               dumpbatwritable(j, mb, 3);
+                               re = getArg(q, 0);
+                               pushInstruction(mb, q);
+
+                               for (i = 0; lvn[i] != -1; i++) {
+                                       f = newBatType(TYPE_oid, lvt[i]);
+                                       q = newInstruction(mb, ASSIGNsymbol);
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, f));
+                                       q = pushNil(mb, q, f);
+                                       rf = getArg(q, 0);
+                                       pushInstruction(mb, q);
+                                       q = newInstruction(mb, ASSIGNsymbol);
+                                       setModuleId(q, calcRef);
+                                       setFunctionId(q, putName("==", 2));
+                                       q->barrier = BARRIERsymbol;
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushArgument(mb, q, d);
+                                       q = pushStr(mb, q, lvm[i]);
+                                       g = getArg(q, 0);
+                                       pushInstruction(mb, q);
+                                       q = newInstruction(mb, ASSIGNsymbol);
+                                       setModuleId(q, putName("jaql", 4));
+                                       setFunctionId(q, putName("cast", 4));
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushArgument(mb, q, c);
+                                       q = pushType(mb, q, lvt[i]);
+                                       f = getArg(q, 0);
+                                       pushInstruction(mb, q);
+                                       q = newInstruction(mb, ASSIGNsymbol);
+                                       setModuleId(q, algebraRef);
+                                       setFunctionId(q, projectRef);
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushArgument(mb, q, f);
+                                       q = pushBte(mb, q, lvT[i]);
+                                       e = getArg(q, 0);
+                                       pushInstruction(mb, q);
+                                       /* replace nil by json null */
+                                       q = newInstruction(mb, ASSIGNsymbol);
+                                       setModuleId(q, algebraRef);
+                                       setFunctionId(q, uselectRef);
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushArgument(mb, q, f);
+                                       q = pushNil(mb, q, lvt[i]);
+                                       k = getArg(q, 0);
+                                       pushInstruction(mb, q);
+                                       q = newInstruction(mb, ASSIGNsymbol);
+                                       setModuleId(q, algebraRef);
+                                       setFunctionId(q, projectRef);
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushArgument(mb, q, k);
+                                       q = pushBte(mb, q, 'n');
+                                       l = getArg(q, 0);
+                                       pushInstruction(mb, q);
+                                       q = newInstruction(mb, ASSIGNsymbol);
+                                       setModuleId(q, algebraRef);
+                                       setFunctionId(q, putName("kdifference", 
11));
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushArgument(mb, q, e);
+                                       q = pushArgument(mb, q, l);
+                                       e = getArg(q, 0);
+                                       pushInstruction(mb, q);
+                                       q = newInstruction(mb, ASSIGNsymbol);
+                                       setModuleId(q, batRef);
+                                       setFunctionId(q, insertRef);
+                                       q = pushReturn(mb, q, 
newTmpVariable(mb, TYPE_any));
+                                       q = pushArgument(mb, q, e);
+                                       q = pushArgument(mb, q, l);
+                                       e = getArg(q, 0);
+                                       pushInstruction(mb, q);
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to