Eric Blake <ebl...@redhat.com> writes: > We want -Wformat to catch blatant programming errors in format > strings that we pass to qobject_from_jsonf(). But if someone > were to pass a JSON string "'%s'" as the format string, gcc would > insist that it be paired with a char* argument, even though our > lexer does not recognize % sequences inside a JSON string. You can > bypass -Wformat checking by passing the Unicode escape \u0025 for > %, but who wants to remember to type that? So the solution is that > anywhere we are relying on -Wformat checking, the caller should > pass the usual printf %% escape sequence where a literal % is > wanted on output. > > Note that since % can only appear in JSON inside a string, we don't > have to teach the lexer how to parse any new % sequences, but instead > only have to add code to the parser. Likewise, the parser is still > where we reject any attempt at mid-string % interpolation other than > %% (this is only a runtime failure, rather than compile-time), but > since we already document that qobject_from_jsonf() asserts on invalid > usage, presumably anyone that is adding a new usage will have tested > that their usage doesn't always fail. > > Simplify qstring_from_escaped_str() while touching it, by using > bool, a more compact conditional, and qstring_append_chr(). > Likewise, improve the error message when parse_escape() is reached > without interpolation (for example, if a client sends garbage > rather than JSON over a QMP connection). > > The testsuite additions pass under valgrind, proving that we are > indeed passing the reference of anything given through %p to the > returned containing object, even when more than one object is > interpolated. > > Signed-off-by: Eric Blake <ebl...@redhat.com> > --- > qobject/json-lexer.c | 6 ++++-- > qobject/json-parser.c | 49 ++++++++++++++++++++++++------------------------- > qobject/qjson.c | 4 ++-- > tests/check-qjson.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 80 insertions(+), 29 deletions(-) > > diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c > index b846d2852d..599b7446b7 100644 > --- a/qobject/json-lexer.c > +++ b/qobject/json-lexer.c > @@ -32,9 +32,11 @@ > * Extension for vararg handling in JSON construction, when using > * qobject_from_jsonf() instead of qobject_from_json() (this lexer > * actually accepts multiple forms of PRId64, but parse_escape() later > - * filters to only parse the current platform's spelling): > + * filters to only parse the current platform's spelling; meanwhile, > + * JSON only allows % inside strings, where the parser handles %%, so > + * we do not need to lex it here):
The parenthesis is becoming unwieldy. Turn it into a note... > * > - * %(PRI[du]64|(l|ll)?[ud]|[ipsf]) > + * %(PRI[du]64|(l|ll)?[ud]|[ipsf%]) > * ... here? > */ > > diff --git a/qobject/json-parser.c b/qobject/json-parser.c > index 388aa7a386..daafe77a0c 100644 > --- a/qobject/json-parser.c > +++ b/qobject/json-parser.c > @@ -120,25 +120,21 @@ static int hex2decimal(char ch) > * \n > * \r > * \t > - * \u four-hex-digits > + * \u four-hex-digits > + * > + * Additionally, if @percent is true, all % in @token must be doubled, > + * replaced by a single % will be in the result; this allows -Wformat > + * processing of qobject_from_jsonf(). > */ > static QString *qstring_from_escaped_str(JSONParserContext *ctxt, > - JSONToken *token) > + JSONToken *token, bool percent) > { > const char *ptr = token->str; > QString *str; > - int double_quote = 1; > - > - if (*ptr == '"') { > - double_quote = 1; > - } else { > - double_quote = 0; > - } > - ptr++; > + bool double_quote = *ptr++ == '"'; > > str = qstring_new(); > - while (*ptr && > - ((double_quote && *ptr != '"') || (!double_quote && *ptr != > '\''))) { > + while (*ptr && *ptr != "'\""[double_quote]) { Simpler: bool quote = *ptr++; and then while (*ptr && *ptr != quote) { Have you considered splitting the patch into one to simplify, and one to implement %%? > if (*ptr == '\\') { > ptr++; > > @@ -205,12 +201,13 @@ static QString > *qstring_from_escaped_str(JSONParserContext *ctxt, > goto out; > } > } else { > - char dummy[2]; > - > - dummy[0] = *ptr++; > - dummy[1] = 0; > - > - qstring_append(str, dummy); > + if (*ptr == '%' && percent) { > + if (*++ptr != '%') { > + parse_error(ctxt, token, "invalid %% sequence in > string"); > + goto out; > + } > + } > + qstring_append_chr(str, *ptr++); > } > } > > @@ -455,13 +452,15 @@ static QObject *parse_escape(JSONParserContext *ctxt, > va_list *ap) > { > JSONToken *token; > > - if (ap == NULL) { > - return NULL; > - } > - > token = parser_context_pop_token(ctxt); > assert(token && token->type == JSON_ESCAPE); > > + if (ap == NULL) { > + parse_error(ctxt, token, "escape parsing for '%s' not requested", > + token->str); > + return NULL; > + } > + When I manage to fat-finger a '%' into my QMP input, I now get this error message instead of "Invalid JSON syntax". Makes me go "what is escape parsing, and how do I request it?" Not an improvement, I'm afraid. > if (!strcmp(token->str, "%p")) { > return va_arg(*ap, QObject *); > } else if (!strcmp(token->str, "%i")) { > @@ -490,7 +489,7 @@ static QObject *parse_escape(JSONParserContext *ctxt, > va_list *ap) > return NULL; > } > > -static QObject *parse_literal(JSONParserContext *ctxt) > +static QObject *parse_literal(JSONParserContext *ctxt, bool percent) Let's make it take va_list *ap instead, for symmetry with the other parse_FOO(). > { > JSONToken *token; > > @@ -499,7 +498,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) > > switch (token->type) { > case JSON_STRING: > - return QOBJECT(qstring_from_escaped_str(ctxt, token)); > + return QOBJECT(qstring_from_escaped_str(ctxt, token, percent)); > case JSON_INTEGER: { > /* > * Represent JSON_INTEGER as QNUM_I64 if possible, else as > @@ -562,7 +561,7 @@ static QObject *parse_value(JSONParserContext *ctxt, > va_list *ap) > case JSON_INTEGER: > case JSON_FLOAT: > case JSON_STRING: > - return parse_literal(ctxt); > + return parse_literal(ctxt, ap); > case JSON_KEYWORD: > return parse_keyword(ctxt); > default: > diff --git a/qobject/qjson.c b/qobject/qjson.c > index 210c290aa9..2244292d1a 100644 > --- a/qobject/qjson.c > +++ b/qobject/qjson.c > @@ -66,8 +66,7 @@ QObject *qobject_from_json(const char *string, Error **errp) > * Parses JSON input with interpolation of % sequences. > * > * The set of sequences recognized is compatible with gcc's -Wformat > - * warnings, although not all printf sequences are valid. All use of > - * % must occur outside JSON strings. > + * warnings, although not all printf sequences are valid. Keep the "All use of %" sentence, but add ", except %% must occcur only within JSON strings". > * > * %i - treat corresponding integer value as JSON bool > * %[l[l]]d, %PRId64 - treat sized integer value as signed JSON number > @@ -75,6 +74,7 @@ QObject *qobject_from_json(const char *string, Error **errp) > * %f - treat double as JSON number (undefined for inf, NaN) > * %s - convert char * into JSON string (adds escapes, outer quotes) > * %p - embed QObject, transferring the reference to the returned object > + * %% - literal %, usable only within JSON string No need to repeat "only within JSON strings" then. > * > * IMPORTANT: This function aborts on error, thus it must not > * be used with untrusted arguments. > diff --git a/tests/check-qjson.c b/tests/check-qjson.c > index 1ad1f41a52..31815b2d04 100644 > --- a/tests/check-qjson.c > +++ b/tests/check-qjson.c > @@ -1408,6 +1408,55 @@ static void empty_input(void) > g_assert(obj == NULL); > } > > +static void percent_and_vararg(void) > +{ > + QObject *obj; > + QString *str; > + QList *list; > + Error *err = NULL; > + > + /* Use of % escapes requires vararg form */ > + obj = qobject_from_json("%d", &err); Since %d is not recognized, this is a lexical error. Okay. > + error_free_or_abort(&err); > + g_assert(!obj); > + > + /* In normal form, % in strings is literal */ > + obj = qobject_from_json("'%% %s \\u0025d'", &error_abort); > + str = qobject_to_qstring(obj); > + g_assert_cmpstr(qstring_get_str(str), ==, "%% %s %d"); > + qobject_decref(obj); > + > + /* > + * In vararg form, % in strings must be escaped (via the normal > + * printf-escaping, or via a \u escape). The returned value now > + * owns references to any %p counterpart. > + */ > + obj = qobject_from_jsonf("[ %p, '%% \\u0025s', %p ]", > + qstring_from_str("one"), > + qstring_from_str("three")); > + list = qobject_to_qlist(obj); > + str = qobject_to_qstring(qlist_pop(list)); > + g_assert_cmpstr(qstring_get_str(str), ==, "one"); > + QDECREF(str); > + str = qobject_to_qstring(qlist_pop(list)); > + g_assert_cmpstr(qstring_get_str(str), ==, "% %s"); > + QDECREF(str); > + str = qobject_to_qstring(qlist_pop(list)); > + g_assert_cmpstr(qstring_get_str(str), ==, "three"); > + QDECREF(str); > + g_assert(qlist_empty(list)); > + qobject_decref(obj); I get what you mean by "vararg form" and "normal form", but I'm afraid it's less than obvious for the uninitiated. What about /* * Check qobject_from_json() does not interpolate % */ /* outside JSON string */ obj = qobject_from_json("%d", &err); error_free_or_abort(&err); g_assert(!obj); /* within JSON string */ obj = qobject_from_json("'%% %s \\u0025d'", &error_abort); str = qobject_to_qstring(obj); g_assert_cmpstr(qstring_get_str(str), ==, "%% %s %d"); qobject_decref(obj); /* * Check how qobject_from_jsonf() interpolates % */ obj = qobject_from_jsonf("[ %p, '%% \\u0025s', %p ]", qstring_from_str("one"), qstring_from_str("three")); > + > + /* The following intentionally cause assertion failures */ > + > + /* In vararg form, %% must occur in strings */ > + /* qobject_from_jsonf("%%"); */ > + /* qobject_from_jsonf("{%%}"); */ > + > + /* In vararg form, strings must not use % except for %% */ > + /* qobject_from_jsonf("'%s'", "unpaired"); */ Could use g_test_trap_subprocess(). Not sure it's worth the bother. I hate code in comments. Better: /* The following intentionally cause assertion failures */ #if 0 /* In vararg form, %% must occur in strings */ qobject_from_jsonf("%%"); qobject_from_jsonf("{%%}"); /* In vararg form, strings must not use % except for %% */ qobject_from_jsonf("'%s'", "unpaired"); #endif > +} > + > static void unterminated_string(void) > { > Error *err = NULL; > @@ -1540,6 +1589,7 @@ int main(int argc, char **argv) > g_test_add_func("/varargs/simple_varargs", simple_varargs); > > g_test_add_func("/errors/empty_input", empty_input); > + g_test_add_func("/errors/percent_and_vararg", percent_and_vararg); > g_test_add_func("/errors/unterminated/string", unterminated_string); > g_test_add_func("/errors/unterminated/escape", unterminated_escape); > g_test_add_func("/errors/unterminated/sq_string", > unterminated_sq_string);