Changeset: f5e4fef2ba79 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/f5e4fef2ba79
Branch: tvtree
Log Message:
merged with nested
diffs (272 lines):
diff --git a/sql/backends/monet5/sql_result.c b/sql/backends/monet5/sql_result.c
--- a/sql/backends/monet5/sql_result.c
+++ b/sql/backends/monet5/sql_result.c
@@ -2194,14 +2194,23 @@ end:
}
#define skipspace(s) while(*s && isspace(*s)) s++;
+static char *
+FINDsep(char *s, char tsep, char rsep)
+{
+ for (; *s; s++) {
+ if (s[0] == tsep || s[0] == rsep) {
+ break;
+ }
+ }
+ return s;
+}
static str
VALUEparser(char **S, Column *cols, int elm, sql_subtype *t, char tsep, char
rsep)
{
/* handle literals */
char *s = *S;
- int skip = 0;
- char *ns = NULL;
+ char *ns = s;
if (t->type->localtype == TYPE_str) {
/* todo improve properly skip "" strings. */
if (*s != '"')
@@ -2212,13 +2221,10 @@ VALUEparser(char **S, Column *cols, int
ns++;
if (*ns != '"')
throw(SQL, "SQLfrom_varchar", SQLSTATE(42000) "missing
\" at end of string value");
- skip++;
- } else if (tsep) {
- ns = strchr(s, tsep);
+ *ns = 0;
+ ns++;
}
- if (!ns && rsep) {
- ns = strchr(s, rsep);
- }
+ ns = FINDsep(ns, tsep, rsep);
char sep = 0;
if (!ns)
throw(SQL, "SQLfrom_varchar", SQLSTATE(42000) "missing '%c' at
end of value", rsep?rsep:tsep);
@@ -2230,10 +2236,7 @@ VALUEparser(char **S, Column *cols, int
if (elm >= 0 && d && BUNappend(cols[elm].c, d, false) != GDK_SUCCEED)
throw(SQL, "SQLfrom_varchar", SQLSTATE(42000) "append failed");
*ns = sep;
- s = ns;
- if (skip)
- s++;
- *S = s;
+ *S = ns;
return NULL;
}
diff --git a/sql/backends/monet5/vaults/json/json.c
b/sql/backends/monet5/vaults/json/json.c
--- a/sql/backends/monet5/vaults/json/json.c
+++ b/sql/backends/monet5/vaults/json/json.c
@@ -68,6 +68,7 @@ json_open(const char *fname, allocator *
return res;
}
+
static void
json_close(JSONFileHandle *jfh)
{
@@ -93,6 +94,7 @@ read_json_file(JSONFileHandle *jfh)
return content;
}
+
static str
append_terms(allocator *sa, JSON *jt, BAT *b)
{
@@ -191,8 +193,10 @@ json_load(void *BE, sql_subfunc *f, char
return s;
}
+
int TYPE_json;
+
static str
JSONprelude(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
{
@@ -203,6 +207,7 @@ JSONprelude(Client cntxt, MalBlkPtr mb,
return MAL_SUCCEED;
}
+
static str
JSONepilogue(void *ret)
{
@@ -252,12 +257,79 @@ JSONread_json(Client cntxt, MalBlkPtr mb
return msg;
}
+
+static str
+JSONread_nd_json(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+ (void) cntxt; (void) mb;
+ char *msg = MAL_SUCCEED;
+ char *fname = *(str*)getArgReference(stk, pci, pci->retc);
+ allocator *sa = sa_create(NULL);
+ JSONFileHandle *jfh = json_open(fname, sa);
+ if (!jfh) {
+ sa_destroy(sa);
+ msg = createException(SQL, "json.read_nd_json", "Failed to open
file %s", fname);
+ return msg;
+ }
+ char *content = read_json_file(jfh);
+ json_close(jfh);
+ BAT *b = COLnew(0, TYPE_json, 0, TRANSIENT);
+ if (content) {
+ if (b) {
+ size_t cnt = 0;
+ char *head = content;
+ char *tail = content;
+ while (cnt < (jfh->size + 1)) {
+ if (head[0] == '\n' || (head[0] == '\r' &&
head[1] == '\n')) {
+ int skip = 1;
+ if (head[0] == '\r' && head[1] == '\n')
+ skip = 2;
+ head[0] = '\0';
+ JSON *jt = JSONparse(tail);
+ if (jt) {
+ // must be valid json obj str
+ if (BUNappend(b, tail, false)
!= GDK_SUCCEED) {
+ msg =
createException(SQL, "json.read_nd_json", "BUNappend failed!");
+ break;
+ }
+ } else {
+ msg =
createException(SQL, "json.read_nd_json", "Invalid json object, JSONparse
failed!");
+ break;
+ }
+ tail = head + skip;
+ while (tail[0] == '\n') // multiple
newlines e.g. \n\n
+ tail ++;
+ head = tail;
+ }
+ head ++;
+ cnt ++;
+ }
+ } else {
+ msg = createException(SQL, "json.read_nd_json", "Failed
to allocate bat");
+ }
+ } else {
+ msg = createException(SQL, "json.read_nd_json", "Failed to read
file %s", fname);
+ }
+ if (msg == MAL_SUCCEED) {
+ bat *res = getArgReference_bat(stk, pci, 0);
+ *res = b->batCacheid;
+ BBPkeepref(b);
+ } else {
+ BBPreclaim(b);
+ }
+
+ sa_destroy(sa);
+ return msg;
+}
+
+
#include "mel.h"
static mel_func json_init_funcs[] = {
pattern("json", "prelude", JSONprelude, false, "", noargs),
command("json", "epilogue", JSONepilogue, false, "", noargs),
- pattern("json", "read_json", JSONread_json, false, "Reads json file
into a table", args(1,2, batarg("", json), arg("filename", str))),
+ pattern("json", "read_json", JSONread_json, false, "Reads json file",
args(1,2, batarg("", json), arg("filename", str))),
+ pattern("json", "read_nd_json", JSONread_nd_json, false, "Reads new
line delimited json objects", args(1,2, batarg("", json), arg("filename",
str))),
{ .imp=NULL }
};
diff --git a/sql/scripts/40_json.sql b/sql/scripts/40_json.sql
--- a/sql/scripts/40_json.sql
+++ b/sql/scripts/40_json.sql
@@ -104,3 +104,9 @@ create aggregate json.tojsonarray( x str
GRANT EXECUTE ON AGGREGATE json.tojsonarray( string ) TO PUBLIC;
create aggregate json.tojsonarray( x double ) returns string external name
aggr.jsonaggr;
GRANT EXECUTE ON AGGREGATE json.tojsonarray( double ) TO PUBLIC;
+
+
+create function sys.read_nd_json(fname string)
+returns table(json JSON)
+external name json.read_nd_json;
+-- GRANT EXECUTE ON FUNCTION json.read_nd_json(string) TO PUBLIC;
diff --git a/sql/test/nested/Tests/bluesky.json
b/sql/test/nested/Tests/bluesky.json
new file mode 100644
--- /dev/null
+++ b/sql/test/nested/Tests/bluesky.json
@@ -0,0 +1,5 @@
+{"did":"did:plc:3i4xf2v4wcnyktgv6satke64","time_us":1732206349000644,"kind":"commit","commit":{"rev":"3lbhuvzds6d2a","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdked2a","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.221Z","subject":{"cid":"bafyreidjvrcmckkm765mct5fph36x7kupkfo35rjklbf2k76xkzwyiauge","uri":"at://did:plc:azrv4rcbws6kmcga4fsbphg2/app.bsky.feed.post/3lbgjdpbiec2l"}},"cid":"bafyreia5l5vrkh5oj4cjyhcqby2dprhyvcyofo2q5562tijlae2pzih23m"}}
+{"did":"did:plc:gccfnqqizz4urhchsaie6jft","time_us":1732206349001108,"kind":"commit","commit":{"rev":"3lbhuvze3gi2u","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhuvzdtmi2u","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:27:40.923Z","subject":"did:plc:r7cdh4sgzqbfdc6wcdxxti7c"},"cid":"bafyreiew2p6cgirfaj45qoenm4fgumib7xoloclrap3jgkz5es7g7kby3i"}}
+{"did":"did:plc:msxqf3twq7abtdw7dbfskphk","time_us":1732206349001372,"kind":"commit","commit":{"rev":"3lbhueija5p22","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhueiizcx22","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:15:58.232Z","subject":{"cid":"bafyreiavpshyqzrlo5m7fqodjhs6jevweqnif4phasiwimv4a7mnsqi2fe","uri":"at://did:plc:fusulxqc52zbrc75fi6xrcof/app.bsky.feed.post/3lbhskq5zn22f"}},"cid":"bafyreidjix4dauj2afjlbzmhj3a7gwftcevvmmy6edww6vrjdbst26rkby"}}
+{"did":"did:plc:cdsd346mwow7aj3tgfkwsct3","time_us":1732206349003461,"kind":"commit","commit":{"rev":"3lbhus5vior2t","operation":"create","collection":"app.bsky.feed.repost","rkey":"3lbhus5vbtz2t","record":{"$type":"app.bsky.feed.repost","createdAt":"2024-11-21T16:23:36.714Z","subject":{"cid":"bafyreieaacfiobnuqvjhhsndyi5s3fd6krbzdduxsyrzfv43kczpcmkl6y","uri":"at://did:plc:o5q6dynpme4ndolc3heztasm/app.bsky.feed.post/3lbfli3qsoc2o"}},"cid":"bafyreid5ycocp5zq2g7fcx2xxzxrbafuh7b5qhtwuwiomzo6vqila2cbpu"}}
+{"did":"did:plc:hbc74dlsxhq53kp5oxges6d7","time_us":1732206349004769,"kind":"commit","commit":{"rev":"3lbhuvzedg52j","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdyof2j","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.167Z","subject":{"cid":"bafyreiaumopip75nzx2xjbugtwemdppsyx54bd2odf6q45f3o7xkocgari","uri":"at://did:plc:ig2jv6gqup4t7gdq2pmanknw/app.bsky.feed.post/3lbhuvtlaec2c"}},"cid":"bafyreidjk2svg2fdjiiwohmfmvp3hdxhpb33ycnixzbkyib5m6cocindxq"}}
diff --git a/sql/test/nested/Tests/bluesky.test.in
b/sql/test/nested/Tests/bluesky.test.in
new file mode 100644
--- /dev/null
+++ b/sql/test/nested/Tests/bluesky.test.in
@@ -0,0 +1,34 @@
+statement ok
+create type "subject" as ("cid" varchar, "uri" varchar)
+
+statement ok
+create type "record" as ("$type" varchar, "createdAt" timestamp, "subject"
subject)
+
+statement ok
+create type "commit" as ("rev" varchar, "operation" varchar, "collection"
varchar, "rkey" varchar, "record" "record", "cid" varchar)
+
+statement ok
+create type event as ("did" varchar, "time_us" int, "kind" varchar, "commit"
"commit")
+
+statement ok
+create table bluesky ("event" event)
+
+query I
+select count(*) from read_nd_json(r'$TSTSRCDIR/bluesky.json')
+----
+5
+
+statement ok
+select cast(t.json as event) as e from (select json from
read_nd_json(r'$TSTSRCDIR/bluesky.json')) t
+
+statement ok
+drop table bluesky
+
+statement ok
+drop type event
+
+statement ok
+drop type "commit"
+
+statement ok
+drop type "record"
diff --git a/sql/test/nested/Tests/simple.test
b/sql/test/nested/Tests/simple.test
--- a/sql/test/nested/Tests/simple.test
+++ b/sql/test/nested/Tests/simple.test
@@ -199,3 +199,17 @@ left
1
l1
2
+
+statement ok
+create table tpi( p pair[2], i integer)
+
+statement ok
+insert into tpi values (array [ ('left','right'), ('l2','r2') ], 1), (array [
('left','right'), ('l2','r2') ], 2), (array [ ('l3', 'r3') ], 3)
+
+#statement ok
+#insert into tpi values ('{("l4", "r4"), ("l42", "r42")}', 4), ('{("l5",
"r5")}', 5);
+
+query I
+select count(*) from tpi
+----
+3
diff --git a/sql/test/nested/Tests/webclicks.test.in
b/sql/test/nested/Tests/webclicks.test.in
--- a/sql/test/nested/Tests/webclicks.test.in
+++ b/sql/test/nested/Tests/webclicks.test.in
@@ -46,7 +46,7 @@ select count(*) from r'$TSTSRCDIR/webcli
2
query TTTTTT
-select e.eventid, e.sessionid, e.userid, e.event, e.timestamp, e.user_agent
from ( select cast(t.json as event) as e from (select json from
r'$TSTSRCDIR/webclicks.json') t);
+select e.eventid, e.sessionid, e.userid, e.event, e.timestamp, e.user_agent
from ( select cast(t.json as event) as e from (select json from
r'$TSTSRCDIR/webclicks.json') t)
----
996257967-103007874
47f07c1399c9c6bd1012861f9c5c958e042732e259b909e077f7e8967b650c75
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]