On Wed, 15 Aug 2012 13:45:43 -0500 Michael Roth <mdr...@linux.vnet.ibm.com> wrote:
> Currently, when parsing a stream of tokens we make a copy of the token > list at the beginning of each level of recursion so that we do not > modify the original list in cases where we need to fall back to an > earlier state. > > In the worst case, we will only read 1 or 2 tokens off the list before > recursing again, which means an upper bound of roughly N^2 token allocations. > > For a "reasonably" sized QMP request (in this a QMP representation of > cirrus_vga's device state, generated via QIDL, being passed in via > qom-set), this caused my 16GB's of memory to be exhausted before any > noticeable progress was made by the parser. > > This patch works around the issue by using single copy of the token list > in the form of an indexable array so that we can save/restore state by > manipulating indices. > > A subsequent commit adds a "large_dict" test case which exhibits the > same behavior as above. With this patch applied the test case successfully > completes in under a second. > > Tested with valgrind, make check, and QMP. > > Signed-off-by: Michael Roth <mdr...@linux.vnet.ibm.com> > --- > json-parser.c | 230 > +++++++++++++++++++++++++++++++++++---------------------- > 1 file changed, 142 insertions(+), 88 deletions(-) > > diff --git a/json-parser.c b/json-parser.c > index 849e215..457291b 100644 > --- a/json-parser.c > +++ b/json-parser.c > @@ -27,6 +27,11 @@ > typedef struct JSONParserContext > { > Error *err; > + struct { > + QObject **buf; > + size_t pos; > + size_t count; > + } tokens; > } JSONParserContext; > > #define BUG_ON(cond) assert(!(cond)) > @@ -40,7 +45,7 @@ typedef struct JSONParserContext > * 4) deal with premature EOI > */ > > -static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list > *ap); > +static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); > > /** > * Token manipulators > @@ -270,27 +275,111 @@ out: > return NULL; > } > > +static QObject *parser_context_pop_token(JSONParserContext *ctxt) > +{ > + QObject *token; > + g_assert(ctxt->tokens.pos < ctxt->tokens.count); > + token = ctxt->tokens.buf[ctxt->tokens.pos]; > + ctxt->tokens.pos++; Shouldn't pos be decremented instead? Haven't tried it yet to confirm, but if I'm right I can fix it myself (unless this requires fixes in other areas). > + return token; > +} > + > +/* Note: parser_context_{peek|pop}_token do not increment the > + * token object's refcount. In both cases the references will continue > + * to be tracked and cleaned up in parser_context_free(), so do not > + * attempt to free the token object. > + */ > +static QObject *parser_context_peek_token(JSONParserContext *ctxt) > +{ > + QObject *token; > + g_assert(ctxt->tokens.pos < ctxt->tokens.count); > + token = ctxt->tokens.buf[ctxt->tokens.pos]; > + return token; > +} > + > +static JSONParserContext parser_context_save(JSONParserContext *ctxt) > +{ > + JSONParserContext saved_ctxt = {0}; > + saved_ctxt.tokens.pos = ctxt->tokens.pos; > + saved_ctxt.tokens.count = ctxt->tokens.count; > + saved_ctxt.tokens.buf = ctxt->tokens.buf; > + return saved_ctxt; > +} > + > +static void parser_context_restore(JSONParserContext *ctxt, > + JSONParserContext saved_ctxt) > +{ > + ctxt->tokens.pos = saved_ctxt.tokens.pos; > + ctxt->tokens.count = saved_ctxt.tokens.count; > + ctxt->tokens.buf = saved_ctxt.tokens.buf; > +} > + > +static void tokens_append_from_iter(QObject *obj, void *opaque) > +{ > + JSONParserContext *ctxt = opaque; > + g_assert(ctxt->tokens.pos < ctxt->tokens.count); > + ctxt->tokens.buf[ctxt->tokens.pos++] = obj; > + qobject_incref(obj); > +} > + > +static JSONParserContext *parser_context_new(QList *tokens) > +{ > + JSONParserContext *ctxt; > + size_t count; > + > + if (!tokens) { > + return NULL; > + } > + > + count = qlist_size(tokens); > + if (count == 0) { > + return NULL; > + } > + > + ctxt = g_malloc0(sizeof(JSONParserContext)); > + ctxt->tokens.pos = 0; > + ctxt->tokens.count = count; > + ctxt->tokens.buf = g_malloc(count * sizeof(QObject *)); > + qlist_iter(tokens, tokens_append_from_iter, ctxt); > + ctxt->tokens.pos = 0; > + > + return ctxt; > +} > + > +/* to support error propagation, ctxt->err must be freed separately */ > +static void parser_context_free(JSONParserContext *ctxt) > +{ > + int i; > + if (ctxt) { > + for (i = 0; i < ctxt->tokens.count; i++) { > + qobject_decref(ctxt->tokens.buf[i]); > + } > + g_free(ctxt->tokens.buf); > + g_free(ctxt); > + } > +} > + > /** > * Parsing rules > */ > -static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, > va_list *ap) > +static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) > { > QObject *key = NULL, *token = NULL, *value, *peek; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > - peek = qlist_peek(working); > + peek = parser_context_peek_token(ctxt); > if (peek == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > } > > - key = parse_value(ctxt, &working, ap); > + key = parse_value(ctxt, ap); > if (!key || qobject_type(key) != QTYPE_QSTRING) { > parse_error(ctxt, peek, "key is not a string in object"); > goto out; > } > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > @@ -301,7 +390,7 @@ static int parse_pair(JSONParserContext *ctxt, QDict > *dict, QList **tokens, va_l > goto out; > } > > - value = parse_value(ctxt, &working, ap); > + value = parse_value(ctxt, ap); > if (value == NULL) { > parse_error(ctxt, token, "Missing value in dict"); > goto out; > @@ -309,28 +398,24 @@ static int parse_pair(JSONParserContext *ctxt, QDict > *dict, QList **tokens, va_l > > qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); > > - qobject_decref(token); > qobject_decref(key); > - QDECREF(*tokens); > - *tokens = working; > > return 0; > > out: > - qobject_decref(token); > + parser_context_restore(ctxt, saved_ctxt); > qobject_decref(key); > - QDECREF(working); > > return -1; > } > > -static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, > va_list *ap) > +static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) > { > QDict *dict = NULL; > QObject *token, *peek; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > goto out; > } > @@ -338,23 +423,22 @@ static QObject *parse_object(JSONParserContext *ctxt, > QList **tokens, va_list *a > if (!token_is_operator(token, '{')) { > goto out; > } > - qobject_decref(token); > token = NULL; > > dict = qdict_new(); > > - peek = qlist_peek(working); > + peek = parser_context_peek_token(ctxt); > if (peek == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > } > > if (!token_is_operator(peek, '}')) { > - if (parse_pair(ctxt, dict, &working, ap) == -1) { > + if (parse_pair(ctxt, dict, ap) == -1) { > goto out; > } > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > @@ -365,59 +449,52 @@ static QObject *parse_object(JSONParserContext *ctxt, > QList **tokens, va_list *a > parse_error(ctxt, token, "expected separator in dict"); > goto out; > } > - qobject_decref(token); > token = NULL; > > - if (parse_pair(ctxt, dict, &working, ap) == -1) { > + if (parse_pair(ctxt, dict, ap) == -1) { > goto out; > } > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > } > } > - qobject_decref(token); > token = NULL; > } else { > - token = qlist_pop(working); > - qobject_decref(token); > + token = parser_context_pop_token(ctxt); > token = NULL; > } > > - QDECREF(*tokens); > - *tokens = working; > - > return QOBJECT(dict); > > out: > - qobject_decref(token); > - QDECREF(working); > + parser_context_restore(ctxt, saved_ctxt); > QDECREF(dict); > return NULL; > } > > -static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list > *ap) > +static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) > { > QList *list = NULL; > QObject *token, *peek; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > goto out; > } > > if (!token_is_operator(token, '[')) { > + token = NULL; > goto out; > } > - qobject_decref(token); > token = NULL; > > list = qlist_new(); > > - peek = qlist_peek(working); > + peek = parser_context_peek_token(ctxt); > if (peek == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > @@ -426,7 +503,7 @@ static QObject *parse_array(JSONParserContext *ctxt, > QList **tokens, va_list *ap > if (!token_is_operator(peek, ']')) { > QObject *obj; > > - obj = parse_value(ctxt, &working, ap); > + obj = parse_value(ctxt, ap); > if (obj == NULL) { > parse_error(ctxt, token, "expecting value"); > goto out; > @@ -434,7 +511,7 @@ static QObject *parse_array(JSONParserContext *ctxt, > QList **tokens, va_list *ap > > qlist_append_obj(list, obj); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > @@ -446,10 +523,9 @@ static QObject *parse_array(JSONParserContext *ctxt, > QList **tokens, va_list *ap > goto out; > } > > - qobject_decref(token); > token = NULL; > > - obj = parse_value(ctxt, &working, ap); > + obj = parse_value(ctxt, ap); > if (obj == NULL) { > parse_error(ctxt, token, "expecting value"); > goto out; > @@ -457,39 +533,33 @@ static QObject *parse_array(JSONParserContext *ctxt, > QList **tokens, va_list *ap > > qlist_append_obj(list, obj); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > parse_error(ctxt, NULL, "premature EOI"); > goto out; > } > } > > - qobject_decref(token); > token = NULL; > } else { > - token = qlist_pop(working); > - qobject_decref(token); > + token = parser_context_pop_token(ctxt); > token = NULL; > } > > - QDECREF(*tokens); > - *tokens = working; > - > return QOBJECT(list); > > out: > - qobject_decref(token); > - QDECREF(working); > + parser_context_restore(ctxt, saved_ctxt); > QDECREF(list); > return NULL; > } > > -static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens) > +static QObject *parse_keyword(JSONParserContext *ctxt) > { > QObject *token, *ret; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > goto out; > } > @@ -507,29 +577,24 @@ static QObject *parse_keyword(JSONParserContext *ctxt, > QList **tokens) > goto out; > } > > - qobject_decref(token); > - QDECREF(*tokens); > - *tokens = working; > - > return ret; > > out: > - qobject_decref(token); > - QDECREF(working); > + parser_context_restore(ctxt, saved_ctxt); > > return NULL; > } > > -static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, > va_list *ap) > +static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) > { > QObject *token = NULL, *obj; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > if (ap == NULL) { > goto out; > } > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > goto out; > } > @@ -553,25 +618,20 @@ static QObject *parse_escape(JSONParserContext *ctxt, > QList **tokens, va_list *a > goto out; > } > > - qobject_decref(token); > - QDECREF(*tokens); > - *tokens = working; > - > return obj; > > out: > - qobject_decref(token); > - QDECREF(working); > + parser_context_restore(ctxt, saved_ctxt); > > return NULL; > } > > -static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens) > +static QObject *parse_literal(JSONParserContext *ctxt) > { > QObject *token, *obj; > - QList *working = qlist_copy(*tokens); > + JSONParserContext saved_ctxt = parser_context_save(ctxt); > > - token = qlist_pop(working); > + token = parser_context_pop_token(ctxt); > if (token == NULL) { > goto out; > } > @@ -591,35 +651,30 @@ static QObject *parse_literal(JSONParserContext *ctxt, > QList **tokens) > goto out; > } > > - qobject_decref(token); > - QDECREF(*tokens); > - *tokens = working; > - > return obj; > > out: > - qobject_decref(token); > - QDECREF(working); > + parser_context_restore(ctxt, saved_ctxt); > > return NULL; > } > > -static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list > *ap) > +static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) > { > QObject *obj; > > - obj = parse_object(ctxt, tokens, ap); > + obj = parse_object(ctxt, ap); > if (obj == NULL) { > - obj = parse_array(ctxt, tokens, ap); > + obj = parse_array(ctxt, ap); > } > if (obj == NULL) { > - obj = parse_escape(ctxt, tokens, ap); > + obj = parse_escape(ctxt, ap); > } > if (obj == NULL) { > - obj = parse_keyword(ctxt, tokens); > + obj = parse_keyword(ctxt); > } > if (obj == NULL) { > - obj = parse_literal(ctxt, tokens); > + obj = parse_literal(ctxt); > } > > return obj; > @@ -632,19 +687,18 @@ QObject *json_parser_parse(QList *tokens, va_list *ap) > > QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp) > { > - JSONParserContext ctxt = {}; > - QList *working; > + JSONParserContext *ctxt = parser_context_new(tokens); > QObject *result; > > - if (!tokens) { > + if (!ctxt) { > return NULL; > } > - working = qlist_copy(tokens); > - result = parse_value(&ctxt, &working, ap); > > - QDECREF(working); > + result = parse_value(ctxt, ap); > + > + error_propagate(errp, ctxt->err); > > - error_propagate(errp, ctxt.err); > + parser_context_free(ctxt); > > return result; > }