From 50e6953141feb0a9c2913070090a27ea02db9d6b Mon Sep 17 00:00:00 2001
From: Jacob Champion <jacob.champion@enterprisedb.com>
Date: Fri, 26 Sep 2025 16:31:31 -0700
Subject: [PATCH] WIP: try adding chunk ranges

---
 src/test/modules/test_json_parser/README      |  10 +-
 .../modules/test_json_parser/t/002_inline.pl  |  22 +++-
 .../test_json_parser_incremental.c            | 117 +++++++++++-------
 3 files changed, 93 insertions(+), 56 deletions(-)

diff --git a/src/test/modules/test_json_parser/README b/src/test/modules/test_json_parser/README
index ceccd499f43..61e7c78d588 100644
--- a/src/test/modules/test_json_parser/README
+++ b/src/test/modules/test_json_parser/README
@@ -6,10 +6,12 @@ This module contains two programs for testing the json parsers.
 - `test_json_parser_incremental` is for testing the incremental parser, It
   reads in a file and passes it in very small chunks (default is 60 bytes at a
   time) to the incremental parser. It's not meant to be a speed test but to
-  test the accuracy of the incremental parser.  There are two option arguments,
-  "-c nn" specifies an alternative chunk size, and "-s" specifies using
-  semantic routines. The semantic routines re-output the json, although not in
-  a very pretty form. The required non-option argument is the input file name.
+  test the accuracy of the incremental parser. The option "-c nn" specifies an
+  alternative chunk size, "-r nn" runs a range of chunk sizes down to one byte
+  on the same input (with output separated by null bytes), and "-s" specifies
+  using semantic routines. The semantic routines re-output the json, although
+  not in a very pretty form. The required non-option argument is the input file
+  name.
 - `test_json_parser_perf` is for speed testing both the standard
   recursive descent parser and the non-recursive incremental
   parser. If given the `-i` flag it uses the non-recursive parser,
diff --git a/src/test/modules/test_json_parser/t/002_inline.pl b/src/test/modules/test_json_parser/t/002_inline.pl
index 7c8b64977ec..285feb87797 100644
--- a/src/test/modules/test_json_parser/t/002_inline.pl
+++ b/src/test/modules/test_json_parser/t/002_inline.pl
@@ -33,23 +33,33 @@ sub test
 	print $fh "$json";
 	close($fh);
 
+	my ($all_stdout, $all_stderr) = run_command([ @exe, "-r", $chunk, $fname ]);
+
+	my @stdout = unpack("(Z*)*", $all_stdout);
+	my @stderr = unpack("(Z*)*", $all_stderr);
+
+	is(scalar @stdout, $chunk, "$name: stdout has correct number of entries");
+	is(scalar @stderr, $chunk, "$name: stderr has correct number of entries");
+
+	my $i = 0;
+
 	foreach my $size (reverse(1 .. $chunk))
 	{
-		my ($stdout, $stderr) = run_command([ @exe, "-c", $size, $fname ]);
-
 		if (defined($params{error}))
 		{
-			unlike($stdout, qr/SUCCESS/,
+			unlike($stdout[$i], qr/SUCCESS/,
 				"$name, chunk size $size: test fails");
-			like($stderr, $params{error},
+			like($stderr[$i], $params{error},
 				"$name, chunk size $size: correct error output");
 		}
 		else
 		{
-			like($stdout, qr/SUCCESS/,
+			like($stdout[$i], qr/SUCCESS/,
 				"$name, chunk size $size: test succeeds");
-			is($stderr, "", "$name, chunk size $size: no error output");
+			is($stderr[$i], "", "$name, chunk size $size: no error output");
 		}
+
+		$i++;
 	}
 }
 
diff --git a/src/test/modules/test_json_parser/test_json_parser_incremental.c b/src/test/modules/test_json_parser/test_json_parser_incremental.c
index d1e3e4ab4ea..cd83cd22a76 100644
--- a/src/test/modules/test_json_parser/test_json_parser_incremental.c
+++ b/src/test/modules/test_json_parser/test_json_parser_incremental.c
@@ -12,9 +12,14 @@
  * the parser in very small chunks. In practice you would normally use
  * much larger chunks, but doing this makes it more likely that the
  * full range of increment handling, especially in the lexer, is exercised.
+ *
  * If the "-c SIZE" option is provided, that chunk size is used instead
  * of the default of 60.
  *
+ * If the "-r SIZE" option is provided, a range of chunk sizes from SIZE down to
+ * 1 are run sequentially. A null byte is printed to the streams after each
+ * iteration.
+ *
  * If the -s flag is given, the program does semantic processing. This should
  * just mirror back the json, albeit with white space changes.
  *
@@ -88,8 +93,8 @@ main(int argc, char **argv)
 	StringInfoData json;
 	int			n_read;
 	size_t		chunk_size = DEFAULT_CHUNK_SIZE;
+	bool		run_chunk_ranges = false;
 	struct stat statbuf;
-	off_t		bytes_left;
 	const JsonSemAction *testsem = &nullSemAction;
 	char	   *testfile;
 	int			c;
@@ -102,14 +107,19 @@ main(int argc, char **argv)
 	if (!lex)
 		pg_fatal("out of memory");
 
-	while ((c = getopt(argc, argv, "c:os")) != -1)
+	while ((c = getopt(argc, argv, "c:r:os")) != -1)
 	{
 		switch (c)
 		{
 			case 'c':			/* chunksize */
+			case 'r':			/* chunk range */
 				chunk_size = strtou64(optarg, NULL, 10);
 				if (chunk_size > BUFSIZE)
 					pg_fatal("chunk size cannot exceed %d", BUFSIZE);
+
+				if (c == 'r')
+					run_chunk_ranges = true;
+
 				break;
 			case 'o':			/* switch token ownership */
 				lex_owns_tokens = true;
@@ -135,8 +145,6 @@ main(int argc, char **argv)
 		exit(1);
 	}
 
-	makeJsonLexContextIncremental(lex, PG_UTF8, need_strings);
-	setJsonLexContextOwnsTokens(lex, lex_owns_tokens);
 	initStringInfo(&json);
 
 	if ((json_file = fopen(testfile, PG_BINARY_R)) == NULL)
@@ -145,61 +153,78 @@ main(int argc, char **argv)
 	if (fstat(fileno(json_file), &statbuf) != 0)
 		pg_fatal("error statting input: %m");
 
-	bytes_left = statbuf.st_size;
-
-	for (;;)
+	do
 	{
-		/* We will break when there's nothing left to read */
+		off_t		bytes_left = statbuf.st_size;
+		size_t		to_read = chunk_size;
 
-		if (bytes_left < chunk_size)
-			chunk_size = bytes_left;
+		makeJsonLexContextIncremental(lex, PG_UTF8, need_strings);
+		setJsonLexContextOwnsTokens(lex, lex_owns_tokens);
 
-		n_read = fread(buff, 1, chunk_size, json_file);
-		if (n_read < chunk_size)
-			pg_fatal("error reading input file: %d", ferror(json_file));
+		rewind(json_file);
+		resetStringInfo(&json);
 
-		appendBinaryStringInfo(&json, buff, n_read);
-
-		/*
-		 * Append some trailing junk to the buffer passed to the parser. This
-		 * helps us ensure that the parser does the right thing even if the
-		 * chunk isn't terminated with a '\0'.
-		 */
-		appendStringInfoString(&json, "1+23 trailing junk");
-		bytes_left -= n_read;
-		if (bytes_left > 0)
+		for (;;)
 		{
-			result = pg_parse_json_incremental(lex, testsem,
-											   json.data, n_read,
-											   false);
-			if (result != JSON_INCOMPLETE)
+			/* We will break when there's nothing left to read */
+
+			if (bytes_left < to_read)
+				to_read = bytes_left;
+
+			n_read = fread(buff, 1, to_read, json_file);
+			if (n_read < to_read)
+				pg_fatal("error reading input file: %d", ferror(json_file));
+
+			appendBinaryStringInfo(&json, buff, n_read);
+
+			/*
+			 * Append some trailing junk to the buffer passed to the parser.
+			 * This helps us ensure that the parser does the right thing even
+			 * if the chunk isn't terminated with a '\0'.
+			 */
+			appendStringInfoString(&json, "1+23 trailing junk");
+			bytes_left -= n_read;
+			if (bytes_left > 0)
 			{
-				fprintf(stderr, "%s\n", json_errdetail(result, lex));
-				ret = 1;
-				goto cleanup;
+				result = pg_parse_json_incremental(lex, testsem,
+												   json.data, n_read,
+												   false);
+				if (result != JSON_INCOMPLETE)
+				{
+					fprintf(stderr, "%s\n", json_errdetail(result, lex));
+					ret = 1;
+					goto cleanup;
+				}
+				resetStringInfo(&json);
 			}
-			resetStringInfo(&json);
-		}
-		else
-		{
-			result = pg_parse_json_incremental(lex, testsem,
-											   json.data, n_read,
-											   true);
-			if (result != JSON_SUCCESS)
+			else
 			{
-				fprintf(stderr, "%s\n", json_errdetail(result, lex));
-				ret = 1;
-				goto cleanup;
+				result = pg_parse_json_incremental(lex, testsem,
+												   json.data, n_read,
+												   true);
+				if (result != JSON_SUCCESS)
+				{
+					fprintf(stderr, "%s\n", json_errdetail(result, lex));
+					ret = 1;
+					goto cleanup;
+				}
+				if (!need_strings)
+					printf("SUCCESS!\n");
+				break;
 			}
-			if (!need_strings)
-				printf("SUCCESS!\n");
-			break;
 		}
-	}
 
 cleanup:
+		if (run_chunk_ranges)
+		{
+			fputc('\0', stdout);
+			fputc('\0', stderr);
+		}
+
+		freeJsonLexContext(lex);
+	} while (run_chunk_ranges && (--chunk_size > 0));
+
 	fclose(json_file);
-	freeJsonLexContext(lex);
 	free(json.data);
 	free(lex);
 
-- 
2.34.1