Changeset: 2774b50a1a48 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/2774b50a1a48
Modified Files:
sql/backends/monet5/sql.c
sql/include/sql_catalog.h
Branch: nested
Log Message:
fix out of order and missing attributes in json file loader
diffs (truncated from 334 to 300 lines):
diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -5867,30 +5867,35 @@ static str
insert_json_value(JSONterm *jt, sql_subtype *t, BAT *b)
{
char *msg = MAL_SUCCEED;
- size_t vsize = jt->valuelen;
- char *val = (char *)jt->value;
-
- ValPtr v = NULL;
- ValRecord vr = (ValRecord) {.bat=false, .vtype=t->type->localtype};
- if (t->type->localtype == ATOMindex("json"))
- vr.vtype = TYPE_str;
- char eos = val[vsize];
- val[vsize] = '\0';
- v = jsonv2local(&vr, val);
- val[vsize] = eos;
- if (v) {
- if (BUNappend(b, VALget(v), false) != GDK_SUCCEED)
- msg = createException(SQL, "sql.insert_json_value",
"BUNappend failed");
+ if (jt) {
+ size_t vsize = jt->valuelen;
+ char *val = (char *)jt->value;
+
+ ValPtr v = NULL;
+ ValRecord vr = (ValRecord) {.bat=false,
.vtype=t->type->localtype};
+ if (t->type->localtype == ATOMindex("json"))
+ vr.vtype = TYPE_str;
+ char eos = val[vsize];
+ val[vsize] = '\0';
+ v = jsonv2local(&vr, val);
+ val[vsize] = eos;
+ if (v) {
+ if (BUNappend(b, VALget(v), false) != GDK_SUCCEED)
+ msg = createException(SQL,
"sql.insert_json_value", "Error appending value for type %d", v->vtype);
+ } else {
+ msg = createException(SQL, "sql.insert_json_value",
"jsonv2local failed");
+ }
+ if (v->vtype == TYPE_str)
+ GDKfree(v->val.sval);
} else {
- msg = createException(SQL, "sql.insert_json_value",
"jsonv2local failed");
- }
- if (v->vtype == TYPE_str)
- GDKfree(v->val.sval);
+ if (BUNappend(b, ATOMnilptr(t->type->localtype), false) !=
GDK_SUCCEED)
+ msg = createException(SQL, "sql.insert_json_value",
"Error appending NULL for %d", t->type->localtype);
+ }
return msg;
}
static sql_subtype*
-find_subtype_field(sql_subtype *t, const char *kname, size_t klen)
+find_subtype_field(sql_subtype *t, const char *kname, size_t klen, size_t
*offset, size_t *index)
{
sql_subtype *nt = NULL;
for(node *n = t->type->d.fields->h; n; n = n->next) {
@@ -5899,22 +5904,78 @@ find_subtype_field(sql_subtype *t, const
if (klen == alen && strncmp(kname, a->name, klen) == 0) {
nt = &a->type;
break;
+ } else {
+ *offset += composite_type_resultsize(&a->type);
+ *index += 1;
}
}
return nt;
}
+static sql_subtype*
+find_subtype_field_by_index(sql_subtype *t, size_t index, size_t *offset)
+{
+ size_t indx = 0;
+ sql_subtype *nt = NULL;
+ for(node *n = t->type->d.fields->h; n; n = n->next) {
+ sql_arg *a = n->data;
+ nt = &a->type;
+ if(indx == index)
+ break;
+ *offset += composite_type_resultsize(nt);
+ indx++;
+ }
+ return nt;
+}
+
static int
+fill_null(char **msg, sql_subtype *t, BAT **bats, int offset)
+{
+ if (t->multiset) {
+ offset += composite_type_resultsize(t) - 1;
+ BAT *b = bats[offset];
+ if (BUNappend(b, ATOMnilptr(TYPE_int), false) != GDK_SUCCEED) {
+ *msg = createException(SQL, "sql.fill_null", "Append
NULL for multiset failed!");
+ return -1;
+ }
+ offset +=1;
+ } else if (t->type->composite) {
+ for (node *n = t->type->d.fields->h; n; n = n->next) {
+ sql_arg *a = n->data;
+ offset = fill_null(msg, &a->type, bats, offset);
+ if (offset < 0)
+ return -1;
+ }
+ } else {
+ BAT *b = bats[offset];
+ if ((*msg = insert_json_value(NULL, t, b)) != MAL_SUCCEED)
+ return -1;
+ offset += 1;
+ }
+ return offset;
+}
+
+#define MAX_ATTR_SIZE 256
+#define ERROR_UNKNOWN_FIELD "unknown field"
+
+static int
insert_json_object(char **msg, JSON *js, BAT **bats, int *BO, int nr, int elm,
sql_subtype *t)
{
int bat_offset = *BO;
+ int start_offset = *BO;
JSONterm *ja = js->elm+elm;
if (ja->kind != JSON_OBJECT || !t->type->composite) {
- *msg = "missing object start";
+ *msg = createException(SQL, "sql.insert_json_object", "missing
object start");
return -1;
}
const char *name = NULL;
int nlen = 0;
+ size_t alen = list_length(t->type->d.fields); // num attributes
+ if (alen > MAX_ATTR_SIZE) {
+ *msg = createException(SQL, "sql.insert_json_object", "max
attribute size exceeded");
+ return -1;
+ }
+ int used_mask[MAX_ATTR_SIZE] = {0}; // assume up to that many attributes
/* TODO check if full object is there */
for (elm++; elm > 0 && elm <= ja->tail+1; elm++) {
JSONterm *jt = js->elm+elm;
@@ -5922,46 +5983,56 @@ insert_json_object(char **msg, JSON *js,
if (bat_offset > nr)
return -10;
switch (jt->kind) {
- case JSON_OBJECT:
- if (name && nlen) {
- sql_subtype *nt = find_subtype_field(t, name,
nlen);
- if (nt && nt->type->composite)
- elm = insert_json_object(msg, js, bats,
&bat_offset, nr, elm, nt);
- else if (nt && nt->type->localtype ==
ATOMindex("json")){
- // json string value
- if ((*msg = insert_json_value(jt, nt,
bats[bat_offset])) != MAL_SUCCEED)
- return -1;
- // set term offset
- elm = ((jt - 1)->next) - 1; // ? is
this right
- bat_offset ++;
+ case JSON_OBJECT: {
+ assert(name && nlen);
+ size_t offset = 0;
+ size_t index = 0;
+ sql_subtype *nt = find_subtype_field(t, name,
nlen, &offset, &index);
+ if (nt) {
+ bat_offset = start_offset + offset;
+ if (nt->type->composite) {
+ if ((elm =
insert_json_object(msg, js, bats, &bat_offset, nr, elm, nt)) < 0)
+ return elm;
+ } else if (nt->type->localtype ==
ATOMindex("json")){
+ // json string value
+ if ((*msg =
insert_json_value(jt, nt, bats[bat_offset])) != MAL_SUCCEED)
+ return -1;
+ // set term offset
+ elm = ((jt - 1)->next) - 1; //
? is this right
+ //bat_offset++;
+ }
+ used_mask[index] = 1;
} else {
- assert(0);
+ *msg = createException(SQL,
"sql.insert_json_object", ERROR_UNKNOWN_FIELD);
+ return -1;
}
- } else {
- assert(0);
+ break;
}
- break;
- case JSON_ARRAY:
- /* TODO get id for nested array from the a global
struct */
- if (name && nlen) {
- // find subtype matching field
- sql_subtype *nt = find_subtype_field(t, name,
nlen);
- if(nt && nt->multiset)
- elm = insert_json_array(msg, js, bats,
&bat_offset, nr, elm, nt);
- else if (nt && nt->type->localtype ==
ATOMindex("json")) {
- // json string value
- if ((*msg = insert_json_value(jt, nt,
bats[bat_offset])) != MAL_SUCCEED)
- return -1;
- // set term offset
- elm = ((jt - 1)->next) - 1; // ? is
this right
- bat_offset ++;
+ case JSON_ARRAY: {
+ assert(name && nlen);
+ size_t offset = 0;
+ size_t index = 0;
+ sql_subtype *nt = find_subtype_field(t, name,
nlen, &offset, &index);
+ if (nt) {
+ bat_offset = start_offset + offset;
+ if(nt->multiset) {
+ if ((elm =
insert_json_array(msg, js, bats, &bat_offset, nr, elm, nt)) < 0)
+ return elm;
+ } else if (nt->type->localtype ==
ATOMindex("json")) {
+ // json string value
+ if ((*msg =
insert_json_value(jt, nt, bats[bat_offset])) != MAL_SUCCEED)
+ return -1;
+ // set term offset
+ elm = ((jt - 1)->next) - 1; //
? is this right
+ //bat_offset++;
+ }
+ used_mask[index] = 1;
} else {
- assert(0);
+ *msg = createException(SQL,
"sql.insert_json_object", ERROR_UNKNOWN_FIELD);
+ return -1;
}
- } else {
- assert(0);
+ break;
}
- break;
case JSON_ELEMENT: // field
name = jt->value;
nlen = (int)jt->valuelen;
@@ -5976,14 +6047,19 @@ insert_json_object(char **msg, JSON *js,
case JSON_NUMBER:
case JSON_BOOL:
case JSON_NULL:
+ assert(name && nlen);
if (name && nlen) {
- sql_subtype *nt = find_subtype_field(t, name,
nlen);
+ size_t offset = 0;
+ size_t index = 0;
+ sql_subtype *nt = find_subtype_field(t, name,
nlen, &offset, &index);
if (nt) {
+ bat_offset = start_offset + offset;
if ((*msg = insert_json_value(jt, nt,
bats[bat_offset])) != MAL_SUCCEED)
return -1;
- bat_offset ++;
+ //bat_offset++;
+ used_mask[index] = 1;
} else {
- *msg = "field name missing";
+ *msg = createException(SQL,
"sql.insert_json_object", ERROR_UNKNOWN_FIELD);
return -1;
}
}
@@ -5991,6 +6067,31 @@ insert_json_object(char **msg, JSON *js,
}
if (bat_offset > nr)
return -10;
+ // append null for all unused fields
+ for (size_t i=0; i < alen; i++) {
+ if (used_mask[i] == 0) {
+ size_t offset = 0;
+ sql_subtype *nt = find_subtype_field_by_index(t, i,
&offset);
+ if (nt) {
+ int index = start_offset + offset;
+ if((index = fill_null(msg, nt, bats, index)) <
0) {
+ TRC_ERROR(SQL_EXECUTION, "fill_null
failed");
+ return -1;
+ }
+ //if (index > bat_offset)
+ // bat_offset = index;
+ } else {
+ *msg = createException(SQL,
"sql.insert_json_object", ERROR_UNKNOWN_FIELD);
+ return -1;
+ }
+ }
+ }
+ // fix offset
+ bat_offset = start_offset;
+ for(node *n = t->type->d.fields->h; n; n = n->next) {
+ sql_arg *a = n->data;
+ bat_offset += composite_type_resultsize(&a->type);
+ }
*BO = bat_offset;
return elm;
}
@@ -6002,7 +6103,7 @@ insert_json_array(char **msg, JSON *js,
JSONterm *ja = js->elm+elm;
int tail = ja->tail;
if (ja->kind != JSON_ARRAY) {
- *msg = "missing array start";
+ *msg = createException(SQL, "sql.insert_json_array", "missing
array start");
return -1;
}
int id = -1, anr = 1;
@@ -6035,8 +6136,10 @@ insert_json_array(char **msg, JSON *js,
}
if (bat_offset > nr)
return -10;
- if (id == -1)
- bat_offset += composite_type_resultsize(t) - 1;
+ if (id == -1) {
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]