On 3/22/22 21:17, Rosemarie O'Riorden wrote:
> Hi Dumitru,
>
> Thank you for the review! I'm going to send a v2 shortly with some changes.
>
>> I'm not sure what the benefit is to have 'start' as 'const unsigned char
>> *'; it could easily be 'const char *' and we wouldn't have to cast later
>> on. Also, a comment might be useful, describing that this points to the
>> start of the input that has been preprocessed but not yet copied to
>> 'buffer'.
>
> I agree with this and have updated 'start' to be a const char *, which
> took away the need for all those casts. I have also added a comment to
> explain what p->start is.
>
>
>> On the other hand, json_lex_keyword(p) calls json_parser_input(p, ..)
>> which clears p->buffer at the end.
>>
>> So I thought that we should set p->start = NULL when we clear p->buffer.
>>
>> But json_parser_input() changes the lexer state back to JSON_LEX_START
>> so p->start will be reset when json_lex_input() is called next.
>>
>> I don't really have a good suggestion about how to improve this, it just
>> took me a bit to figure out why it's working correctly now.
>
> Maybe I can add a few comments to better explain what is going on.
> SInce p->start is updated at the next json_lex_input call, I guess
> this logic doesn't need to be changed, but if you think it does, let
> me know.
>
>
>> I keep wondering if there's a way to avoid the p->start all together.
>> What if we instead pass 'start' as an argument to json_lex_input() along
>> with an offset to indicate the next character to process?
>
> I thought about this for a while. I originally had this idea too, but
> I found that it made less sense to me. If I removed start and replaced
> it with an offset count, I'd need to track the offset with a pointer
> that needs to be incremented for every character that is processed.
> The way I have it now, p->start is only assigned once at every state
> change, so it makes it quite simple in my opinion. If there is more
> input on how I can improve this, please let me know.
>
>
>>> - json_lex_input(p, ' ');
>>> + p->start = space;
>>> + json_lex_input(p, space);
>>
>> This can be:
>>
>> p->start = " ";
>> json_lex_input(p, " ");
>>
>
> I've updated this the way you suggested.
>
>
> Thank you,
> Rosemarie O'Riorden
>
Hi Rosemarie,
What about the alternative approach below (incremental on top of your
patch)? It might be a bit intrusive because we explicitly inline
json_lex_input() but that function was anyway used in a single place
(well, actually two places but the call in json_parser_finish()
can be replaced with a call to json_parser_feed()).
Like this we avoid the 'start' field in 'struct json_parser' and make it
all contained, in a single place, in json_parser_feed(). No need to
explain how p->start is used and what it does.
Thanks,
Dumitru
diff --git a/lib/json.c b/lib/json.c
index f48cb32415bf..2b18ec550d67 100644
--- a/lib/json.c
+++ b/lib/json.c
@@ -101,7 +101,6 @@ struct json_parser {
int line_number;
int column_number;
int byte_number;
- const unsigned char *start;
/* Parsing. */
enum json_parse_state parse_state;
@@ -977,96 +976,6 @@ json_lex_string(struct json_parser *p)
}
}
-static inline ALWAYS_INLINE bool
-json_lex_input(struct json_parser *p, const unsigned char *ch)
-{
- struct json_token token;
- unsigned char c = *ch;
-
- switch (p->lex_state) {
- case JSON_LEX_START:
- switch (c) {
- case ' ': case '\t': case '\n': case '\r':
- /* Nothing to do. */
- p->start = ch + 1;
- return true;
-
- case 'a': case 'b': case 'c': case 'd': case 'e':
- case 'f': case 'g': case 'h': case 'i': case 'j':
- case 'k': case 'l': case 'm': case 'n': case 'o':
- case 'p': case 'q': case 'r': case 's': case 't':
- case 'u': case 'v': case 'w': case 'x': case 'y':
- case 'z':
- p->lex_state = JSON_LEX_KEYWORD;
- p->start = ch;
- break;
-
- case '[': case '{': case ']': case '}': case ':': case ',':
- token.type = c;
- json_parser_input(p, &token);
- p->start = ch + 1;
- return true;
-
- case '-':
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- p->lex_state = JSON_LEX_NUMBER;
- p->start = ch;
- break;
-
- case '"':
- p->lex_state = JSON_LEX_STRING;
- p->start = ch + 1;
- return true;
-
- default:
- if (isprint(c)) {
- json_error(p, "invalid character '%c'", c);
- } else {
- json_error(p, "invalid character U+%04x", c);
- }
- return true;
- }
- break;
-
- case JSON_LEX_KEYWORD:
- if (!isalpha((unsigned char) c)) {
- ds_put_buffer(&p->buffer, (const char *) p->start, ch - p->start);
- json_lex_keyword(p);
- return false;
- }
- break;
-
- case JSON_LEX_NUMBER:
- if (!strchr(".0123456789eE-+", c)) {
- ds_put_buffer(&p->buffer, (const char *) p->start, ch - p->start);
- json_lex_number(p);
- return false;
- }
- break;
-
- case JSON_LEX_STRING:
- if (c == '\\') {
- p->lex_state = JSON_LEX_ESCAPE;
- } else if (c == '"') {
- ds_put_buffer(&p->buffer, (const char *) p->start, ch - p->start);
- json_lex_string(p);
- return true;
- } else if (c < 0x20) {
- json_error(p, "U+%04X must be escaped in quoted string", c);
- return true;
- }
- break;
-
- case JSON_LEX_ESCAPE:
- p->lex_state = JSON_LEX_STRING;
- break;
-
- default:
- abort();
- }
- return true;
-}
/* Parsing. */
@@ -1169,26 +1078,123 @@ json_parser_create(int flags)
return p;
}
+static inline void ALWAYS_INLINE
+json_parser_account_byte(struct json_parser *p, unsigned char c)
+{
+ p->byte_number++;
+ if (OVS_UNLIKELY(c == '\n')) {
+ p->column_number = 0;
+ p->line_number++;
+ } else {
+ p->column_number++;
+ }
+}
+
size_t
json_parser_feed(struct json_parser *p, const char *input, size_t n)
{
+ size_t token_start = 0;
size_t i;
- p->start = (const unsigned char *) input;
+
for (i = 0; !p->done && i < n; ) {
- if (json_lex_input(p, (unsigned const char *) &input[i])) {
- p->byte_number++;
- if (OVS_UNLIKELY(input[i] == '\n')) {
- p->column_number = 0;
- p->line_number++;
- } else {
- p->column_number++;
+ bool consumed = true;
+
+ const char *start_p = &input[token_start];
+ unsigned char c = input[i];
+ struct json_token token;
+
+ switch (p->lex_state) {
+ case JSON_LEX_START:
+ switch (c) {
+ case ' ': case '\t': case '\n': case '\r':
+ /* Nothing to do. */
+
+ token_start = i + 1;
+ break;
+
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f': case 'g': case 'h': case 'i': case 'j':
+ case 'k': case 'l': case 'm': case 'n': case 'o':
+ case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y':
+ case 'z':
+ p->lex_state = JSON_LEX_KEYWORD;
+ token_start = i;
+ break;
+
+ case '[': case '{': case ']': case '}': case ':': case ',':
+ token.type = c;
+ json_parser_input(p, &token);
+ token_start = i + 1;
+ break;
+
+ case '-':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ p->lex_state = JSON_LEX_NUMBER;
+ token_start = i;
+ break;
+
+ case '"':
+ p->lex_state = JSON_LEX_STRING;
+ token_start = i + 1;
+ break;
+
+ default:
+ if (isprint(c)) {
+ json_error(p, "invalid character '%c'", c);
+ } else {
+ json_error(p, "invalid character U+%04x", c);
+ }
+ break;
}
+ break;
+
+ case JSON_LEX_KEYWORD:
+ if (!isalpha((unsigned char) c)) {
+ ds_put_buffer(&p->buffer, start_p, i - token_start);
+ json_lex_keyword(p);
+ consumed = false;
+ break;
+ }
+ break;
+
+ case JSON_LEX_NUMBER:
+ if (!strchr(".0123456789eE-+", c)) {
+ ds_put_buffer(&p->buffer, start_p, i - token_start);
+ json_lex_number(p);
+ consumed = false;
+ break;
+ }
+ break;
+
+ case JSON_LEX_STRING:
+ if (c == '\\') {
+ p->lex_state = JSON_LEX_ESCAPE;
+ } else if (c == '"') {
+ ds_put_buffer(&p->buffer, start_p, i - token_start);
+ json_lex_string(p);
+ } else if (c < 0x20) {
+ json_error(p, "U+%04X must be escaped in quoted string", c);
+ }
+ break;
+
+ case JSON_LEX_ESCAPE:
+ p->lex_state = JSON_LEX_STRING;
+ break;
+
+ default:
+ ovs_abort(0, "unexpected lexer state");
+ }
+
+ if (consumed) {
+ json_parser_account_byte(p, c);
i++;
}
}
+
if (!p->done) {
- ds_put_buffer(&p->buffer, (const char *) p->start,
- (const unsigned char *) &input[i] - p->start);
+ ds_put_buffer(&p->buffer, &input[token_start], i - token_start);
}
return i;
}
@@ -1203,7 +1209,6 @@ struct json *
json_parser_finish(struct json_parser *p)
{
struct json *json;
- const unsigned char *space = (const unsigned char *) " ";
switch (p->lex_state) {
case JSON_LEX_START:
@@ -1216,8 +1221,7 @@ json_parser_finish(struct json_parser *p)
case JSON_LEX_NUMBER:
case JSON_LEX_KEYWORD:
- p->start = space;
- json_lex_input(p, space);
+ json_parser_feed(p, " ", 1);
break;
}
---
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev