On Wed, Oct 2, 2024 at 10:45 AM Andrew Dunstan <[email protected]> wrote: > Generally looks good. Should we have a check in > setJsonLexContextOwnsTokens() that we haven't started parsing yet, for > the incremental case?
Good catch. Added in v4. > > At the moment, we have a test matrix consisting of "standard frontend" > > and "shlib frontend" tests for the incremental parser. I'm planning > > for the v4 patch to extend that with a "owned/not owned" dimension; > > any objections? > > > > Sounds reasonable. This is also done, along with runs of pgindent/perltidy. I've added a 0002 as well. While running tests under Valgrind, it complained about the uninitialized dummy_lex on the stack, which is now fixed. (That bug was introduced with my patch in 0785d1b8b and is technically orthogonal to 0001, but I figured I could track it here for now.) This is also how I found out that my existing fuzzers weren't checking for uninitialized memory, like I thought they were. Grumble... Thanks, --Jacob
1: 81b1d7cdb6 ! 1: 6a0d088cbe jsonapi: add lexer option to keep token
ownership
@@ Commit message
of any tokens it wants to persist past the callback lifetime, but the
lexer will handle necessary cleanup on failure.
+ A -o option has been added to test_json_parser_incremental to exercise
+ the new setJsonLexContextOwnsTokens() API, and the test_json_parser TAP
+ tests make use of it. (The test program now cleans up allocated memory,
+ so that tests can be usefully run under leak sanitizers.)
+
## src/common/jsonapi.c ##
+@@ src/common/jsonapi.c: struct JsonParserStack
+ */
+ struct JsonIncrementalState
+ {
++ bool started;
+ bool is_last_chunk;
+ bool partial_completed;
+ jsonapi_StrValType partial_token;
@@ src/common/jsonapi.c: static JsonParseErrorType
parse_array_element(JsonLexContext *lex, const JsonSem
static JsonParseErrorType parse_array(JsonLexContext *lex, const
JsonSemAction *sem);
static JsonParseErrorType report_parse_error(JsonParseContext ctx,
JsonLexContext *lex);
@@ src/common/jsonapi.c: makeJsonLexContextIncremental(JsonLexContext *lex,
int enc
+void
+setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
+{
++ if (lex->incremental && lex->inc_state->started)
++ {
++ /*
++ * Switching this flag after parsing has already started is a
++ * programming error.
++ */
++ Assert(false);
++ return;
++ }
++
+ if (owned_by_context)
+ lex->flags |= JSONLEX_CTX_OWNS_TOKENS;
+ else
@@ src/common/jsonapi.c: inc_lex_level(JsonLexContext *lex)
+ if (lex->incremental)
+ {
+ /*
-+ * Ensure freeJsonLexContext() remains safe even if no fname is
assigned
-+ * at this level.
++ * Ensure freeJsonLexContext() remains safe even if no fname is
++ * assigned at this level.
+ */
+ lex->pstack->fnames[lex->lex_level] = NULL;
+ }
@@ src/common/jsonapi.c: inc_lex_level(JsonLexContext *lex)
static inline void
dec_lex_level(JsonLexContext *lex)
{
-+ set_fname(lex, NULL); /* free the current level's fname, if needed */
++ set_fname(lex, NULL); /* free the current level's fname, if
needed */
lex->lex_level -= 1;
}
@@ src/common/jsonapi.c: freeJsonLexContext(JsonLexContext *lex)
FREE(lex->pstack);
}
+@@ src/common/jsonapi.c: pg_parse_json_incremental(JsonLexContext *lex,
+ lex->input = lex->token_terminator = lex->line_start = json;
+ lex->input_length = len;
+ lex->inc_state->is_last_chunk = is_last;
++ lex->inc_state->started = true;
+
+ /* get the initial token */
+ result = json_lex(lex);
@@ src/common/jsonapi.c: pg_parse_json_incremental(JsonLexContext *lex,
if (sfunc != NULL)
{
@@ src/common/jsonapi.c: pg_parse_json_incremental(JsonLexContext *lex,
+
+ /*
+ * Either ownership of
the token passed to the
-+ * callback, or we need
to free it now. Either way,
-+ * clear our pointer to
it so it doesn't get freed
-+ * in the future.
++ * callback, or we need
to free it now. Either
++ * way, clear our
pointer to it so it doesn't get
++ * freed in the future.
+ */
+ if (lex->flags &
JSONLEX_CTX_OWNS_TOKENS)
+
FREE(pstack->scalar_val);
@@ src/include/common/jsonapi.h: extern JsonLexContext
*makeJsonLexContextIncrement
extern void freeJsonLexContext(JsonLexContext *lex);
/* lex one token */
+
+ ##
src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl ##
+@@
src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl: use
FindBin;
+
+ my $test_file = "$FindBin::RealBin/../tiny.json";
+
+-my @exes =
+- ("test_json_parser_incremental", "test_json_parser_incremental_shlib");
++my @exes = (
++ [ "test_json_parser_incremental", ],
++ [ "test_json_parser_incremental", "-o", ],
++ [ "test_json_parser_incremental_shlib", ],
++ [ "test_json_parser_incremental_shlib", "-o", ]);
+
+ foreach my $exe (@exes)
+ {
+- note "testing executable $exe";
++ note "testing executable @$exe";
+
+ # Test the usage error
+- my ($stdout, $stderr) = run_command([ $exe, "-c", 10 ]);
++ my ($stdout, $stderr) = run_command([ @$exe, "-c", 10 ]);
+ like($stderr, qr/Usage:/, 'error message if not enough arguments');
+
+ # Test that we get success for small chunk sizes from 64 down to 1.
+ for (my $size = 64; $size > 0; $size--)
+ {
+- ($stdout, $stderr) = run_command([ $exe, "-c", $size,
$test_file ]);
++ ($stdout, $stderr) = run_command([ @$exe, "-c", $size,
$test_file ]);
+
+ like($stdout, qr/SUCCESS/, "chunk size $size: test succeeds");
+ is($stderr, "", "chunk size $size: no error output");
+
+ ## src/test/modules/test_json_parser/t/002_inline.pl ##
+@@ src/test/modules/test_json_parser/t/002_inline.pl: use Test::More;
+ use File::Temp qw(tempfile);
+
+ my $dir = PostgreSQL::Test::Utils::tempdir;
+-my $exe;
++my @exe;
+
+ sub test
+ {
+@@ src/test/modules/test_json_parser/t/002_inline.pl: sub test
+
+ foreach my $size (reverse(1 .. $chunk))
+ {
+- my ($stdout, $stderr) = run_command([ $exe, "-c", $size, $fname
]);
++ my ($stdout, $stderr) = run_command([ @exe, "-c", $size, $fname
]);
+
+ if (defined($params{error}))
+ {
+@@ src/test/modules/test_json_parser/t/002_inline.pl: sub test
+ }
+ }
+
+-my @exes =
+- ("test_json_parser_incremental", "test_json_parser_incremental_shlib");
++my @exes = (
++ [ "test_json_parser_incremental", ],
++ [ "test_json_parser_incremental", "-o", ],
++ [ "test_json_parser_incremental_shlib", ],
++ [ "test_json_parser_incremental_shlib", "-o", ]);
+
+ foreach (@exes)
+ {
+- $exe = $_;
+- note "testing executable $exe";
++ @exe = @$_;
++ note "testing executable @exe";
+
+ test("number", "12345");
+ test("string", '"hello"');
+
+ ## src/test/modules/test_json_parser/t/003_test_semantic.pl ##
+@@ src/test/modules/test_json_parser/t/003_test_semantic.pl: use
File::Temp qw(tempfile);
+ my $test_file = "$FindBin::RealBin/../tiny.json";
+ my $test_out = "$FindBin::RealBin/../tiny.out";
+
+-my @exes =
+- ("test_json_parser_incremental", "test_json_parser_incremental_shlib");
++my @exes = (
++ [ "test_json_parser_incremental", ],
++ [ "test_json_parser_incremental", "-o", ],
++ [ "test_json_parser_incremental_shlib", ],
++ [ "test_json_parser_incremental_shlib", "-o", ]);
+
+ foreach my $exe (@exes)
+ {
+- note "testing executable $exe";
++ note "testing executable @$exe";
+
+- my ($stdout, $stderr) = run_command([ $exe, "-s", $test_file ]);
++ my ($stdout, $stderr) = run_command([ @$exe, "-s", $test_file ]);
+
+ is($stderr, "", "no error output");
+
+
+ ## src/test/modules/test_json_parser/test_json_parser_incremental.c ##
+@@
+ * If the -s flag is given, the program does semantic processing. This
should
+ * just mirror back the json, albeit with white space changes.
+ *
++ * If the -o flag is given, the JSONLEX_CTX_OWNS_TOKENS flag is set.
(This can
++ * be used in combination with a leak sanitizer; without the option, the
parser
++ * may leak memory with invalid JSON.)
++ *
+ * The argument specifies the file containing the JSON input.
+ *
+
*-------------------------------------------------------------------------
+@@ src/test/modules/test_json_parser/test_json_parser_incremental.c:
static JsonSemAction sem = {
+ .scalar = do_scalar
+ };
+
++static bool lex_owns_tokens = false;
++
+ int
+ main(int argc, char **argv)
+ {
+@@ src/test/modules/test_json_parser/test_json_parser_incremental.c:
main(int argc, char **argv)
+ char *testfile;
+ int c;
+ bool need_strings = false;
++ int ret = 0;
+
+ pg_logging_init(argv[0]);
+
+- while ((c = getopt(argc, argv, "c:s")) != -1)
++ while ((c = getopt(argc, argv, "c:os")) != -1)
+ {
+ switch (c)
+ {
+@@ src/test/modules/test_json_parser/test_json_parser_incremental.c:
main(int argc, char **argv)
+ if (chunk_size > BUFSIZE)
+ pg_fatal("chunk size cannot exceed %d",
BUFSIZE);
+ break;
++ case 'o': /* switch token
ownership */
++ lex_owns_tokens = true;
++ break;
+ case 's': /* do semantic
processing */
+ testsem = &sem;
+ sem.semstate = palloc(sizeof(struct DoState));
+@@ src/test/modules/test_json_parser/test_json_parser_incremental.c:
main(int argc, char **argv)
+
+ if (optind < argc)
+ {
+- testfile = pg_strdup(argv[optind]);
++ testfile = argv[optind];
+ optind++;
+ }
+ else
+@@ src/test/modules/test_json_parser/test_json_parser_incremental.c:
main(int argc, char **argv)
+ }
+
+ makeJsonLexContextIncremental(&lex, PG_UTF8, need_strings);
++ setJsonLexContextOwnsTokens(&lex, lex_owns_tokens);
+ initStringInfo(&json);
+
+ if ((json_file = fopen(testfile, PG_BINARY_R)) == NULL)
+@@ src/test/modules/test_json_parser/test_json_parser_incremental.c:
main(int argc, char **argv)
+ if (result != JSON_INCOMPLETE)
+ {
+ fprintf(stderr, "%s\n", json_errdetail(result,
&lex));
+- exit(1);
++ ret = 1;
++ goto cleanup;
+ }
+ resetStringInfo(&json);
+ }
+@@ src/test/modules/test_json_parser/test_json_parser_incremental.c:
main(int argc, char **argv)
+ if (result != JSON_SUCCESS)
+ {
+ fprintf(stderr, "%s\n", json_errdetail(result,
&lex));
+- exit(1);
++ ret = 1;
++ goto cleanup;
+ }
+ if (!need_strings)
+ printf("SUCCESS!\n");
+ break;
+ }
+ }
++
++cleanup:
+ fclose(json_file);
+- exit(0);
++ freeJsonLexContext(&lex);
++ free(json.data);
++
++ return ret;
+ }
+
+ /*
+@@ src/test/modules/test_json_parser/test_json_parser_incremental.c:
do_object_field_start(void *state, char *fname, bool isnull)
+ static JsonParseErrorType
+ do_object_field_end(void *state, char *fname, bool isnull)
+ {
+- /* nothing to do really */
++ if (!lex_owns_tokens)
++ free(fname);
+
+ return JSON_SUCCESS;
+ }
+@@ src/test/modules/test_json_parser/test_json_parser_incremental.c:
do_scalar(void *state, char *token, JsonTokenType tokentype)
+ else
+ printf("%s", token);
+
++ if (!lex_owns_tokens)
++ free(token);
++
+ return JSON_SUCCESS;
+ }
+
+@@ src/test/modules/test_json_parser/test_json_parser_incremental.c:
usage(const char *progname)
+ {
+ fprintf(stderr, "Usage: %s [OPTION ...] testfile\n", progname);
+ fprintf(stderr, "Options:\n");
+- fprintf(stderr, " -c chunksize size of piece fed to parser
(default 64)n");
++ fprintf(stderr, " -c chunksize size of piece fed to parser
(default 64)\n");
++ fprintf(stderr, " -o set JSONLEX_CTX_OWNS_TOKENS for
leak checking\n");
+ fprintf(stderr, " -s do semantic processing\n");
+
+ }
-: ---------- > 2: d3e639ba2b jsonapi: fully initialize dummy lexer
v4-0001-jsonapi-add-lexer-option-to-keep-token-ownership.patch
Description: Binary data
v4-0002-jsonapi-fully-initialize-dummy-lexer.patch
Description: Binary data
