To parse a json string prior to this change, json_lex_input is called with each character of the string. If the character needs to be copied to the buffer, it is copied individually. This is an expensive operation, as often there are multiple characters in a row that need to be copied, and copying memory in blocks is more efficient than byte by byte. To improve this, the string is now copied in blocks with an offset counter. A copy is performed when the parser state equals done.
Functions that are called for each character use a lot of CPU cycles. Making these functions inline greatly reduces the cycles used and improves overall performance. Since json_lex_input was only needed in one place, it doesn't have to be its own function. There is also a conditional that checks if the current character is a new line, which is quite unlikely. When this was examined with perf, the comparison had a very high CPU cycle usage. To improve this, the OVS_UNLIKELY macro was used, which forces the compiler to switch the order of the instructions. Here is the improvement seen in the json-string-benchmark test: SIZE Q S BEFORE AFTER CHANGE -------------------------------------------------------- 100000 0 0 : 0.842 ms 0.489 ms -41.9 % 100000 2 1 : 0.917 ms 0.535 ms -41.7 % 100000 10 1 : 1.063 ms 0.656 ms -38.3 % 10000000 0 0 : 85.328 ms 49.878 ms -41.5 % 10000000 2 1 : 92.555 ms 54.778 ms -40.8 % 10000000 10 1 : 106.728 ms 66.735 ms -37.5 % 100000000 0 0 : 955.375 ms 621.950 ms -34.9 % 100000000 2 1 : 1031.700 ms 665.200 ms -35.5 % 100000000 10 1 : 1189.300 ms 796.050 ms -33.0 % Here Q is probability (%) for a character to be a '\"' and S is probability (%) to be a special character ( < 32). Signed-off-by: Rosemarie O'Riorden <[email protected]> --- Adds a local counter so p->start is no longer needed Moves contents of json_lex_input into json_parser_feed Adds function json_parser_account_byte Performs copy when parser state equals done lib/json.c | 200 +++++++++++++++++++++++++++++------------------------ 1 file changed, 110 insertions(+), 90 deletions(-) diff --git a/lib/json.c b/lib/json.c index 720c73d94..2b18ec550 100644 --- a/lib/json.c +++ b/lib/json.c @@ -976,88 +976,6 @@ json_lex_string(struct json_parser *p) } } -static bool -json_lex_input(struct json_parser *p, unsigned char c) -{ - struct json_token token; - - switch (p->lex_state) { - case JSON_LEX_START: - switch (c) { - case ' ': case '\t': case '\n': case '\r': - /* Nothing to do. */ - return true; - - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': case 'g': case 'h': case 'i': case 'j': - case 'k': case 'l': case 'm': case 'n': case 'o': - case 'p': case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': case 'y': - case 'z': - p->lex_state = JSON_LEX_KEYWORD; - break; - - case '[': case '{': case ']': case '}': case ':': case ',': - token.type = c; - json_parser_input(p, &token); - return true; - - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - p->lex_state = JSON_LEX_NUMBER; - break; - - case '"': - p->lex_state = JSON_LEX_STRING; - return true; - - default: - if (isprint(c)) { - json_error(p, "invalid character '%c'", c); - } else { - json_error(p, "invalid character U+%04x", c); - } - return true; - } - break; - - case JSON_LEX_KEYWORD: - if (!isalpha((unsigned char) c)) { - json_lex_keyword(p); - return false; - } - break; - - case JSON_LEX_NUMBER: - if (!strchr(".0123456789eE-+", c)) { - json_lex_number(p); - return false; - } - break; - - case JSON_LEX_STRING: - if (c == '\\') { - p->lex_state = JSON_LEX_ESCAPE; - } else if (c == '"') { - json_lex_string(p); - return true; - } else if (c < 0x20) { - json_error(p, "U+%04X must be escaped in quoted string", c); - return true; - } - break; - - case JSON_LEX_ESCAPE: - p->lex_state = JSON_LEX_STRING; - break; - - default: - abort(); - } - ds_put_char(&p->buffer, c); - return true; -} /* Parsing. */ @@ -1160,22 +1078,124 @@ json_parser_create(int flags) return p; } +static inline void ALWAYS_INLINE +json_parser_account_byte(struct json_parser *p, unsigned char c) +{ + p->byte_number++; + if (OVS_UNLIKELY(c == '\n')) { + p->column_number = 0; + p->line_number++; + } else { + p->column_number++; + } +} + size_t json_parser_feed(struct json_parser *p, const char *input, size_t n) { + size_t token_start = 0; size_t i; + for (i = 0; !p->done && i < n; ) { - if (json_lex_input(p, input[i])) { - p->byte_number++; - if (input[i] == '\n') { - p->column_number = 0; - p->line_number++; - } else { - p->column_number++; + bool consumed = true; + + const char *start_p = &input[token_start]; + unsigned char c = input[i]; + struct json_token token; + + switch (p->lex_state) { + case JSON_LEX_START: + switch (c) { + case ' ': case '\t': case '\n': case '\r': + /* Nothing to do. */ + + token_start = i + 1; + break; + + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + p->lex_state = JSON_LEX_KEYWORD; + token_start = i; + break; + + case '[': case '{': case ']': case '}': case ':': case ',': + token.type = c; + json_parser_input(p, &token); + token_start = i + 1; + break; + + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + p->lex_state = JSON_LEX_NUMBER; + token_start = i; + break; + + case '"': + p->lex_state = JSON_LEX_STRING; + token_start = i + 1; + break; + + default: + if (isprint(c)) { + json_error(p, "invalid character '%c'", c); + } else { + json_error(p, "invalid character U+%04x", c); + } + break; + } + break; + + case JSON_LEX_KEYWORD: + if (!isalpha((unsigned char) c)) { + ds_put_buffer(&p->buffer, start_p, i - token_start); + json_lex_keyword(p); + consumed = false; + break; } + break; + + case JSON_LEX_NUMBER: + if (!strchr(".0123456789eE-+", c)) { + ds_put_buffer(&p->buffer, start_p, i - token_start); + json_lex_number(p); + consumed = false; + break; + } + break; + + case JSON_LEX_STRING: + if (c == '\\') { + p->lex_state = JSON_LEX_ESCAPE; + } else if (c == '"') { + ds_put_buffer(&p->buffer, start_p, i - token_start); + json_lex_string(p); + } else if (c < 0x20) { + json_error(p, "U+%04X must be escaped in quoted string", c); + } + break; + + case JSON_LEX_ESCAPE: + p->lex_state = JSON_LEX_STRING; + break; + + default: + ovs_abort(0, "unexpected lexer state"); + } + + if (consumed) { + json_parser_account_byte(p, c); i++; } } + + if (!p->done) { + ds_put_buffer(&p->buffer, &input[token_start], i - token_start); + } return i; } @@ -1201,7 +1221,7 @@ json_parser_finish(struct json_parser *p) case JSON_LEX_NUMBER: case JSON_LEX_KEYWORD: - json_lex_input(p, ' '); + json_parser_feed(p, " ", 1); break; } -- 2.35.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
