Changeset: 1acce398fc2c for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1acce398fc2c
Modified Files:
        monetdb5/extras/jaql/Tests/All
        monetdb5/extras/jaql/Tests/join00.mal
        monetdb5/extras/jaql/Tests/join00.stable.err
        monetdb5/extras/jaql/Tests/join00.stable.out
        monetdb5/extras/jaql/Tests/join01.mal
        monetdb5/extras/jaql/jaql.c
        monetdb5/extras/jaql/jaql.h
        monetdb5/extras/jaql/jaqlgencode.c
Branch: jacqueline
Log Message:

jaql: implemented join operation

currently var.* notation cannot be used
preserve is left as an exercise for the future as well


diffs (truncated from 1133 to 300 lines):

diff --git a/monetdb5/extras/jaql/Tests/All b/monetdb5/extras/jaql/Tests/All
--- a/monetdb5/extras/jaql/Tests/All
+++ b/monetdb5/extras/jaql/Tests/All
@@ -6,6 +6,7 @@ json03
 expand00
 filter00
 transform00
+join00
 sort00
 top00
 variable00
diff --git a/monetdb5/extras/jaql/Tests/join00.mal 
b/monetdb5/extras/jaql/Tests/join00.mal
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/jaql/Tests/join00.mal
@@ -0,0 +1,8 @@
+jaql.x("users = [ {\"name\": \"Jon Doe\", \"password\": \"asdf1234\", \"id\": 
1}, {\"name\": \"Jane Doe\", \"password\": \"qwertyui\", \"id\": 2}, {\"name\": 
\"Max Mustermann\", \"password\": \"q1w2e3r4\", \"id\": 3} ];");
+jaql.x("pages = [ {\"userid\": 1, \"url\":\"code.google.com/p/jaql/\"}, 
{\"userid\": 2, \"url\":\"www.cnn.com\"}, {\"userid\": 1, 
\"url\":\"java.sun.com/javase/6/docs/api/\"} ];");
+
+jaql.x("join users, pages where users.id == pages.userid into {users.name, 
pages.url};");
+jaql.x("join u in users, p in pages where u.id == p.userid into {u.name, 
p.url};");
+# TODO: preserve
+#jaql.x("join preserve u in users, p in pages where u.id == p.userid into 
{u.name, p.url};");
+
diff --git a/monetdb5/extras/jaql/Tests/join00.stable.err 
b/monetdb5/extras/jaql/Tests/join00.stable.err
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/jaql/Tests/join00.stable.err
@@ -0,0 +1,31 @@
+stderr of test 'join00` in directory 'extras/jaql` itself:
+
+
+# 17:33:31 >  
+# 17:33:31 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"gdk_dbfarm=/net/sofia.ins.cwi.nl/export/scratch1/fabian/tmp/mtest-jacqueline-sofia.ins.cwi.nl/five/dbfarm"
 "--set" "mapi_open=true" "--set" "mapi_port=34627" "--set" "monet_prompt=" 
"--trace" "--forcemito" "--set" "mal_listing=2" "--dbname=mTests_extras_jaql" 
"join00.mal"
+# 17:33:31 >  
+
+# builtin opt  gdk_dbname = demo
+# builtin opt  gdk_dbfarm = 
/ufs/fabian/scratch/ssd/monetdb/jacqueline/program-x86_64/var/lib/monetdb5/dbfarm
+# builtin opt  gdk_debug = 0
+# builtin opt  gdk_alloc_map = no
+# builtin opt  gdk_vmtrim = yes
+# builtin opt  monet_prompt = >
+# builtin opt  monet_daemon = no
+# builtin opt  mapi_port = 50000
+# builtin opt  mapi_open = false
+# builtin opt  mapi_autosense = false
+# builtin opt  sql_optimizer = default_pipe
+# builtin opt  sql_debug = 0
+# cmdline opt  gdk_nr_threads = 0
+# cmdline opt  gdk_dbfarm = 
/net/sofia.ins.cwi.nl/export/scratch1/fabian/tmp/mtest-jacqueline-sofia.ins.cwi.nl/five/dbfarm
+# cmdline opt  mapi_open = true
+# cmdline opt  mapi_port = 34627
+# cmdline opt  monet_prompt = 
+# cmdline opt  mal_listing = 2
+# cmdline opt  gdk_dbname = mTests_extras_jaql
+
+# 17:33:31 >  
+# 17:33:31 >  "Done."
+# 17:33:31 >  
+
diff --git a/monetdb5/extras/jaql/Tests/join00.stable.out 
b/monetdb5/extras/jaql/Tests/join00.stable.out
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/jaql/Tests/join00.stable.out
@@ -0,0 +1,33 @@
+stdout of test 'join00` in directory 'extras/jaql` itself:
+
+
+# 17:33:31 >  
+# 17:33:31 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"gdk_dbfarm=/net/sofia.ins.cwi.nl/export/scratch1/fabian/tmp/mtest-jacqueline-sofia.ins.cwi.nl/five/dbfarm"
 "--set" "mapi_open=true" "--set" "mapi_port=34627" "--set" "monet_prompt=" 
"--trace" "--forcemito" "--set" "mal_listing=2" "--dbname=mTests_extras_jaql" 
"join00.mal"
+# 17:33:31 >  
+
+# MonetDB 5 server v11.8.0 "jacqueline-0c67cb41e11b+"
+# Serving database 'mTests_extras_jaql', using 8 threads
+# Compiled for x86_64-pc-linux-gnu/64bit with 64bit OIDs dynamically linked
+# Found 15.662 GiB available main-memory.
+# Copyright (c) 1993-July 2008 CWI.
+# Copyright (c) August 2008-2012 MonetDB B.V., all rights reserved
+# Visit http://www.monetdb.org/ for further information
+# Listening for connection requests on mapi:monetdb://sofia.ins.cwi.nl:34627/
+# MonetDB/GIS module loaded
+# MonetDB/SQL module loaded
+# MonetDB/DataCell module not loaded: MALException:jaql.context:JAQL 
environment not found
+function user.main():void;
+    jaql.x("users = [ {\"name\": \"Jon Doe\", \"password\": \"asdf1234\", 
\"id\": 1}, {\"name\": \"Jane Doe\", \"password\": \"qwertyui\", \"id\": 2}, 
{\"name\": \"Max Mustermann\", \"password\": \"q1w2e3r4\", \"id\": 3} ];");
+    jaql.x("pages = [ {\"userid\": 1, \"url\":\"code.google.com/p/jaql/\"}, 
{\"userid\": 2, \"url\":\"www.cnn.com\"}, {\"userid\": 1, 
\"url\":\"java.sun.com/javase/6/docs/api/\"} ];");
+    jaql.x("join users, pages where users.id == pages.userid into {users.name, 
pages.url};");
+    jaql.x("join u in users, p in pages where u.id == p.userid into {u.name, 
p.url};");
+# TODO: preserve 
+#jaql.x("join preserve u in users, p in pages where u.id == p.userid into 
{u.name, p.url};"); 
+end main;
+[ { "name": "Jon Doe", "url": "java.sun.com/javase/6/docs/api/" }, { "name": 
"Jon Doe", "url": "code.google.com/p/jaql/" }, { "name": "Jane Doe", "url": 
"www.cnn.com" } ]
+[ { "name": "Jon Doe", "url": "java.sun.com/javase/6/docs/api/" }, { "name": 
"Jon Doe", "url": "code.google.com/p/jaql/" }, { "name": "Jane Doe", "url": 
"www.cnn.com" } ]
+
+# 17:33:31 >  
+# 17:33:31 >  "Done."
+# 17:33:31 >  
+
diff --git a/monetdb5/extras/jaql/Tests/join01.mal 
b/monetdb5/extras/jaql/Tests/join01.mal
--- a/monetdb5/extras/jaql/Tests/join01.mal
+++ b/monetdb5/extras/jaql/Tests/join01.mal
@@ -1,6 +1,6 @@
 # from the docs
 
-jaql.x("users = [ {\"name\": "Jon Doe", \"password\": \"asdf1234\", \"id\": 
1}, {\"name\": "Jane Doe", \"password\": \"qwertyui\", \"id\": 2}, {\"name\": 
"Max Mustermann", \"password\": \"q1w2e3r4\", \"id\": 3} ];");
+jaql.x("users = [ {\"name\": \"Jon Doe\", \"password\": \"asdf1234\", \"id\": 
1}, {\"name\": \"Jane Doe\", \"password\": \"qwertyui\", \"id\": 2}, {\"name\": 
\"Max Mustermann\", \"password\": \"q1w2e3r4\", \"id\": 3} ];");
 jaql.x("pages = [ {\"userid\": 1, \"url\":\"code.google.com/p/jaql/\"}, 
{\"userid\": 2, \"url\":\"www.cnn.com\"}, {\"userid\": 1, 
\"url\":\"java.sun.com/javase/6/docs/api/\"} ];");
 
 jaql.x("join users, pages where users.id == pages.userid into {users.name, 
pages.*};");
diff --git a/monetdb5/extras/jaql/jaql.c b/monetdb5/extras/jaql/jaql.c
--- a/monetdb5/extras/jaql/jaql.c
+++ b/monetdb5/extras/jaql/jaql.c
@@ -326,15 +326,29 @@ _check_exp_equals_only(tree *t)
        if (t == NULL)
                return NULL;
 
-       if (t->type == j_pred && t->tval2->type == j_comp && (
-                               t->tval2->cval != j_and &&
-                               t->tval2->cval != j_or &&
-                               t->tval2->cval != j_equals))
-       {
-               res = GDKzalloc(sizeof(tree));
-               res->type = j_error;
-               res->sval = GDKstrdup("join: only equality tests are allowed");
-               return res;
+       if (t->type == j_pred && t->tval2->type == j_comp) {
+               if (t->tval2->cval != j_and && t->tval2->cval != j_equals) {
+                       res = GDKzalloc(sizeof(tree));
+                       res->type = j_error;
+                       res->sval = GDKstrdup("join: only (conjunctions of) 
equality "
+                                       "tests are allowed");
+                       return res;
+               }
+               if (t->tval2->cval == j_equals) {
+                       if (t->tval1->type != j_var || t->tval3->type != j_var) 
{
+                               res = GDKzalloc(sizeof(tree));
+                               res->type = j_error;
+                               res->sval = GDKstrdup("join: equality tests 
must be between "
+                                               "two variables");
+                               return res;
+                       }
+                       if (strcmp(t->tval1->sval, t->tval3->sval) == 0) {
+                               res = GDKzalloc(sizeof(tree));
+                               res->type = j_error;
+                               res->sval = GDKstrdup("join: self-joins not 
allowed");
+                               return res;
+                       }
+               }
        }
 
        if ((res = _check_exp_equals_only(t->tval1)) != NULL)
@@ -347,6 +361,30 @@ _check_exp_equals_only(tree *t)
        return NULL;
 }
 
+static tree *
+_extract_equals(tree *t)
+{
+       tree *p, *q;
+
+       assert(t->type == j_pred);
+       assert(t->tval2->type == j_comp);
+
+       if (t->tval2->cval == j_equals)
+               return t;
+
+       assert(t->tval2->cval == j_and);
+
+       p = q = _extract_equals(t->tval1);
+       while (p->next != NULL)
+               p = p->next;
+       p->next = _extract_equals(t->tval3);
+
+       t->tval1 = t->tval3 = t->next = NULL;
+       freetree(t);
+
+       return q;
+}
+
 /* create a join operation over 2 or more inputs, applying predicates,
  * producing output defined by tmpl */
 tree *
@@ -383,9 +421,40 @@ make_jaql_join(tree *inputs, tree *pred,
        if ((res = _check_exp_var("join", vars, tmpl)) != NULL)
                return res;
 
+       /* JAQL defines that only conjunctions of equality expressions may
+        * be used (and + ==), where self-joins are disallowed */
        if ((res = _check_exp_equals_only(pred)) != NULL)
                return res;
 
+       /* JAQL defines that each of the inputs must be linked through a
+        * join path, collect all equality tests and put them in a simple
+        * list */
+       pred = _extract_equals(pred);
+       for (i = 0; vars[i] != NULL; i++) {
+               for (res = pred; res != NULL; res = res->next) {
+                       if (strcmp(vars[i], res->tval1->sval) == 0 ||
+                                       strcmp(vars[i], res->tval3->sval) == 0)
+                       {
+                               vars[i] = "";
+                               break;
+                       }
+               }
+               if (vars[i][0] != '\0') {
+                       char buf[128];
+                       res = GDKzalloc(sizeof(tree));
+                       res->type = j_error;
+                       snprintf(buf, sizeof(buf), "join: input not referenced "
+                                       "in where: %s", vars[i]);
+                       res->sval = GDKstrdup(buf);
+                       freetree(inputs);
+                       freetree(pred);
+                       freetree(tmpl);
+                       return res;
+               }
+       }
+
+       /* FIXME: we skip the graph/path check for now */
+
        res = GDKzalloc(sizeof(tree));
        res->type = j_join;
        res->tval1 = inputs;
@@ -1035,6 +1104,9 @@ printtree(tree *t, int level, char op)
                                        case j_divide:
                                                printf("/ ");
                                                break;
+                                       case j_cinvalid:
+                                               printf("<<invalid compare 
node>>");
+                                               break;
                                }
                                break;
                        case j_join_input:
@@ -1064,12 +1136,16 @@ printtree(tree *t, int level, char op)
                                        printf(", ");
                                        printtree(t->tval3, level + step, op);
                                        printf(") ");
+                                       if (t->next != NULL)
+                                               printf("&& ");
                                } else {
                                        printf("( ");
                                        printtree(t->tval1, level + step, op);
                                        printtree(t->tval2, level + step, op);
                                        printtree(t->tval3, level + step, op);
                                        printf(") ");
+                                       if (t->next != NULL)
+                                               printf("and ");
                                }
                                break;
                        case j_operation:
@@ -1140,6 +1216,9 @@ printtree(tree *t, int level, char op)
                                        printf("!%s\n", t->sval);
                                }
                                break;
+                       case j_invalid:
+                               printf("<<invalid tree node>>");
+                               break;
                }
                if (t != NULL)
                        t = t->next;
diff --git a/monetdb5/extras/jaql/jaql.h b/monetdb5/extras/jaql/jaql.h
--- a/monetdb5/extras/jaql/jaql.h
+++ b/monetdb5/extras/jaql/jaql.h
@@ -56,6 +56,7 @@ typedef struct _jc {
 } jc;
 
 enum treetype {
+       j_invalid,
        j_output_var,
        j_output,
        j_json,
@@ -84,6 +85,7 @@ enum treetype {
 };
 
 enum comptype {
+       j_cinvalid,
        j_equals,
        j_nequal,
        j_greater,
@@ -142,6 +144,7 @@ tree *make_double(double d);
 tree *make_string(char *s);
 tree *make_bool(char b);
 void printtree (tree *t, int level, char op);
+void freetree(tree *t);
 
 
 jaql_export str JAQLexecute(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
diff --git a/monetdb5/extras/jaql/jaqlgencode.c 
b/monetdb5/extras/jaql/jaqlgencode.c
--- a/monetdb5/extras/jaql/jaqlgencode.c
+++ b/monetdb5/extras/jaql/jaqlgencode.c
@@ -21,7 +21,28 @@
 #include "jaqlgencode.h"
 #include "opt_prelude.h"
 
+typedef struct _json_var {
+       const char *name;
+       char preserve;
+       int j1;
+       int j2;
+       int j3;
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to