We want -Wformat to catch blatant programming errors in format strings that we pass to qobject_from_jsonf(). But if someone were to pass a JSON string "'%s'" as the format string, gcc would insist that it be paired with a char* argument, even though our lexer does not recognize % sequences inside a JSON string. You can bypass -Wformat checking by passing the Unicode escape \u0025 for %, but who wants to remember to type that? So the solution is that anywhere we are relying on -Wformat checking, the caller should pass the usual printf %% escape sequence where a literal % is wanted on output.
Note that since % can only appear in JSON inside a string, we don't have to teach the lexer how to parse any new % sequences, but instead only have to add code to the parser. Likewise, the parser is still where we reject any attempt at mid-string % interpolation other than %% (this is only a runtime failure, rather than compile-time), but since we already document that qobject_from_jsonf() asserts on invalid usage, presumably anyone that is adding a new usage will have tested that their usage doesn't always fail. Simplify qstring_from_escaped_str() while touching it, by using bool, a more compact conditional, and qstring_append_chr(). Likewise, improve the error message when parse_escape() is reached without interpolation (for example, if a client sends garbage rather than JSON over a QMP connection). The testsuite additions pass under valgrind, proving that we are indeed passing the reference of anything given through %p to the returned containing object, even when more than one object is interpolated. Signed-off-by: Eric Blake <ebl...@redhat.com> --- qobject/json-lexer.c | 6 ++++-- qobject/json-parser.c | 49 ++++++++++++++++++++++++------------------------- qobject/qjson.c | 4 ++-- tests/check-qjson.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 29 deletions(-) diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index b846d2852d..599b7446b7 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -32,9 +32,11 @@ * Extension for vararg handling in JSON construction, when using * qobject_from_jsonf() instead of qobject_from_json() (this lexer * actually accepts multiple forms of PRId64, but parse_escape() later - * filters to only parse the current platform's spelling): + * filters to only parse the current platform's spelling; meanwhile, + * JSON only allows % inside strings, where the parser handles %%, so + * we do not need to lex it here): * - * %(PRI[du]64|(l|ll)?[ud]|[ipsf]) + * %(PRI[du]64|(l|ll)?[ud]|[ipsf%]) * */ diff --git a/qobject/json-parser.c b/qobject/json-parser.c index 388aa7a386..daafe77a0c 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -120,25 +120,21 @@ static int hex2decimal(char ch) * \n * \r * \t - * \u four-hex-digits + * \u four-hex-digits + * + * Additionally, if @percent is true, all % in @token must be doubled, + * replaced by a single % will be in the result; this allows -Wformat + * processing of qobject_from_jsonf(). */ static QString *qstring_from_escaped_str(JSONParserContext *ctxt, - JSONToken *token) + JSONToken *token, bool percent) { const char *ptr = token->str; QString *str; - int double_quote = 1; - - if (*ptr == '"') { - double_quote = 1; - } else { - double_quote = 0; - } - ptr++; + bool double_quote = *ptr++ == '"'; str = qstring_new(); - while (*ptr && - ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { + while (*ptr && *ptr != "'\""[double_quote]) { if (*ptr == '\\') { ptr++; @@ -205,12 +201,13 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt, goto out; } } else { - char dummy[2]; - - dummy[0] = *ptr++; - dummy[1] = 0; - - qstring_append(str, dummy); + if (*ptr == '%' && percent) { + if (*++ptr != '%') { + parse_error(ctxt, token, "invalid %% sequence in string"); + goto out; + } + } + qstring_append_chr(str, *ptr++); } } @@ -455,13 +452,15 @@ static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) { JSONToken *token; - if (ap == NULL) { - return NULL; - } - token = parser_context_pop_token(ctxt); assert(token && token->type == JSON_ESCAPE); + if (ap == NULL) { + parse_error(ctxt, token, "escape parsing for '%s' not requested", + token->str); + return NULL; + } + if (!strcmp(token->str, "%p")) { return va_arg(*ap, QObject *); } else if (!strcmp(token->str, "%i")) { @@ -490,7 +489,7 @@ static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) return NULL; } -static QObject *parse_literal(JSONParserContext *ctxt) +static QObject *parse_literal(JSONParserContext *ctxt, bool percent) { JSONToken *token; @@ -499,7 +498,7 @@ static QObject *parse_literal(JSONParserContext *ctxt) switch (token->type) { case JSON_STRING: - return QOBJECT(qstring_from_escaped_str(ctxt, token)); + return QOBJECT(qstring_from_escaped_str(ctxt, token, percent)); case JSON_INTEGER: { /* * Represent JSON_INTEGER as QNUM_I64 if possible, else as @@ -562,7 +561,7 @@ static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) case JSON_INTEGER: case JSON_FLOAT: case JSON_STRING: - return parse_literal(ctxt); + return parse_literal(ctxt, ap); case JSON_KEYWORD: return parse_keyword(ctxt); default: diff --git a/qobject/qjson.c b/qobject/qjson.c index 210c290aa9..2244292d1a 100644 --- a/qobject/qjson.c +++ b/qobject/qjson.c @@ -66,8 +66,7 @@ QObject *qobject_from_json(const char *string, Error **errp) * Parses JSON input with interpolation of % sequences. * * The set of sequences recognized is compatible with gcc's -Wformat - * warnings, although not all printf sequences are valid. All use of - * % must occur outside JSON strings. + * warnings, although not all printf sequences are valid. * * %i - treat corresponding integer value as JSON bool * %[l[l]]d, %PRId64 - treat sized integer value as signed JSON number @@ -75,6 +74,7 @@ QObject *qobject_from_json(const char *string, Error **errp) * %f - treat double as JSON number (undefined for inf, NaN) * %s - convert char * into JSON string (adds escapes, outer quotes) * %p - embed QObject, transferring the reference to the returned object + * %% - literal %, usable only within JSON string * * IMPORTANT: This function aborts on error, thus it must not * be used with untrusted arguments. diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 1ad1f41a52..31815b2d04 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -1408,6 +1408,55 @@ static void empty_input(void) g_assert(obj == NULL); } +static void percent_and_vararg(void) +{ + QObject *obj; + QString *str; + QList *list; + Error *err = NULL; + + /* Use of % escapes requires vararg form */ + obj = qobject_from_json("%d", &err); + error_free_or_abort(&err); + g_assert(!obj); + + /* In normal form, % in strings is literal */ + obj = qobject_from_json("'%% %s \\u0025d'", &error_abort); + str = qobject_to_qstring(obj); + g_assert_cmpstr(qstring_get_str(str), ==, "%% %s %d"); + qobject_decref(obj); + + /* + * In vararg form, % in strings must be escaped (via the normal + * printf-escaping, or via a \u escape). The returned value now + * owns references to any %p counterpart. + */ + obj = qobject_from_jsonf("[ %p, '%% \\u0025s', %p ]", + qstring_from_str("one"), + qstring_from_str("three")); + list = qobject_to_qlist(obj); + str = qobject_to_qstring(qlist_pop(list)); + g_assert_cmpstr(qstring_get_str(str), ==, "one"); + QDECREF(str); + str = qobject_to_qstring(qlist_pop(list)); + g_assert_cmpstr(qstring_get_str(str), ==, "% %s"); + QDECREF(str); + str = qobject_to_qstring(qlist_pop(list)); + g_assert_cmpstr(qstring_get_str(str), ==, "three"); + QDECREF(str); + g_assert(qlist_empty(list)); + qobject_decref(obj); + + /* The following intentionally cause assertion failures */ + + /* In vararg form, %% must occur in strings */ + /* qobject_from_jsonf("%%"); */ + /* qobject_from_jsonf("{%%}"); */ + + /* In vararg form, strings must not use % except for %% */ + /* qobject_from_jsonf("'%s'", "unpaired"); */ +} + static void unterminated_string(void) { Error *err = NULL; @@ -1540,6 +1589,7 @@ int main(int argc, char **argv) g_test_add_func("/varargs/simple_varargs", simple_varargs); g_test_add_func("/errors/empty_input", empty_input); + g_test_add_func("/errors/percent_and_vararg", percent_and_vararg); g_test_add_func("/errors/unterminated/string", unterminated_string); g_test_add_func("/errors/unterminated/escape", unterminated_escape); g_test_add_func("/errors/unterminated/sq_string", unterminated_sq_string); -- 2.13.3