Hello Alvaro,

I revive this patch because controlling the seed is useful for tap testing pgbench.

The output should include the random seed used, whether it was passed with --random-seed, environment variable or randomly determined. That way, the user that later wants to verify why a particular run caused some particular misbehavior knows what seed to use to reproduce that run.

Yep.

Here is a new version which output use used seed when a seed is explicitely set with an option or from the environment.

However, the default (current) behavior remains silent, so no visible changes unless tinkering with it.

The patch also allows to use a "strong" random for seeding the PRNG, thanks to pg_strong_random().

The tests assume that stdlib random/srandom behavior is standard thus deterministic between platform.

--
Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 1519fe7..49dda81 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -761,6 +761,37 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
        </para>
       </listitem>
      </varlistentry>
+
+     <varlistentry>
+      <term><option>--random-seed=</option><replaceable>SEED</repleaceable></term>
+      <listitem>
+       <para>
+        Set random generator seed.  This random generator is used to initialize
+        per-thread random generator states.
+        Expected values for <replaceable>SEED</repleaceable> are:
+        <literal>time</literal> (the default, the seed is based on the current time),
+        <literal>rand</literal> (use a strong random source if available),
+        or any unsigned integer value.
+        The random generator is invoked explicitely from a pgbench script
+        (<literal>random...</literal> functions) or implicitely (for instance option
+        <option>--rate</option> uses random to schedule transactions).
+        The random generator seed may also be provided through environment variable
+        <literal>PGBENCH_RANDOM_SEED</literal>.
+        To ensure that the provided seed impacts all possible uses, put this option
+        first or use the environment variable.
+      </para>
+      <para>
+        Setting the seed explicitely allows to reproduce a <command>pgbench</command>
+        run exactly, as far as random numbers are concerned.
+        From a statistical viewpoint this is a bad idea because it can hide the
+        performance variability or improve performance unduly, e.g. by hitting
+        the same pages than a previous run.
+        However it may also be of great help for debugging, for instance
+        re-running a tricky case which leads to an error.
+        Use wisely.
+       </para>
+      </listitem>
+     </varlistentry>
     </variablelist>
    </para>
 
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index e065f7b..fdd731d 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -557,6 +557,7 @@ usage(void)
 		   "  --log-prefix=PREFIX      prefix for transaction time log file\n"
 		   "                           (default: \"pgbench_log\")\n"
 		   "  --progress-timestamp     use Unix epoch timestamps for progress\n"
+		   "  --random-seed=SEED       set random seed (\"time\", \"rand\", integer)\n"
 		   "  --sampling-rate=NUM      fraction of transactions to log (e.g., 0.01 for 1%%)\n"
 		   "\nCommon options:\n"
 		   "  -d, --debug              print debugging output\n"
@@ -4010,6 +4011,49 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
 	}
 }
 
+/* call srandom based on some seed. NULL triggers the default behavior. */
+static void
+set_random_seed(const char *seed)
+{
+	unsigned int iseed;
+
+	if (seed == NULL)
+		seed = getenv("PGBENCH_RANDOM_SEED");
+
+	if (seed == NULL || *seed == '\0' || strcmp(seed, "time") == 0)
+	{
+		/* rely on current time */
+		instr_time	now;
+		INSTR_TIME_SET_CURRENT(now);
+		iseed = (unsigned int) INSTR_TIME_GET_MICROSEC(now);
+	}
+	else if (strcmp(seed, "rand") == 0)
+	{
+		/* use some "strong" random source */
+		if (!pg_strong_random(&iseed, sizeof(iseed)))
+		{
+			fprintf(stderr, "cannot seed random from a strong source\n");
+			exit(1);
+		}
+	}
+	else
+	{
+		/* parse seed value coming either from option or environment */
+		char garbage;
+		if (sscanf(seed, "%u%c", &iseed, &garbage) != 1)
+		{
+			fprintf(stderr,
+					"error while scanning '%s', expecting an unsigned integer\n",
+					seed);
+			exit(1);
+		}
+	}
+
+	if (seed != NULL && *seed != '\0')
+		fprintf(stderr, "setting random seed to %u\n", iseed);
+	srandom(iseed);
+}
+
 
 int
 main(int argc, char **argv)
@@ -4052,6 +4096,7 @@ main(int argc, char **argv)
 		{"progress-timestamp", no_argument, NULL, 6},
 		{"log-prefix", required_argument, NULL, 7},
 		{"foreign-keys", no_argument, NULL, 8},
+		{"random-seed", required_argument, NULL, 9},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -4120,6 +4165,9 @@ main(int argc, char **argv)
 	state = (CState *) pg_malloc(sizeof(CState));
 	memset(state, 0, sizeof(CState));
 
+	/* set random seed early, because it may be used while parsing scripts. */
+	set_random_seed(NULL);
+
 	while ((c = getopt_long(argc, argv, "iI:h:nvp:dqb:SNc:j:Crs:t:T:U:lf:D:F:M:P:R:L:", long_options, &optindex)) != -1)
 	{
 		char	   *script;
@@ -4392,6 +4440,10 @@ main(int argc, char **argv)
 				initialization_option_set = true;
 				foreign_keys = true;
 				break;
+			case 9:				/* random-seed */
+				benchmarking_option_set = true;
+				set_random_seed(optarg);
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -4698,10 +4750,6 @@ main(int argc, char **argv)
 	}
 	PQfinish(con);
 
-	/* set random seed */
-	INSTR_TIME_SET_CURRENT(start_time);
-	srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
-
 	/* set up thread data structures */
 	threads = (TState *) pg_malloc(sizeof(TState) * nthreads);
 	nclients_dealt = 0;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 3dd080e..f50c8d2 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -210,11 +210,16 @@ COMMIT;
 } });
 
 # test expressions
+# command 1..3 and 23 depend on random seed which is used to call srandom.
 pgbench(
-	'-t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dminint=-9223372036854775808',
+	'--random-seed=5432 -t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dminint=-9223372036854775808',
 	0,
 	[ qr{type: .*/001_pgbench_expressions}, qr{processed: 1/1} ],
-	[   qr{command=4.: int 4\b},
+	[   qr{setting random seed to 5432\b},
+        qr{command=1.: int 28\b},
+	    qr{command=2.: int 7\b},
+	    qr{command=3.: int 47\b},
+	    qr{command=4.: int 4\b},
 		qr{command=5.: int 5\b},
 		qr{command=6.: int 6\b},
 		qr{command=7.: int 7\b},
@@ -232,7 +237,7 @@ pgbench(
 		qr{command=19.: double 19\b},
 		qr{command=20.: double 20\b},
 		qr{command=21.: int 9223372036854775807\b},
-		qr{command=23.: int [1-9]\b},
+		qr{command=23.: int 1\b},
 		qr{command=24.: double -27\b},
 		qr{command=25.: double 1024\b},
 		qr{command=26.: double 1\b},

Reply via email to