Hello Tom,
# progress: 2.6 s, 6.9 tps, lat 0.000 ms stddev 0.000, lag 0.000 ms, 18 skipped # progress: 3.0 s, 0.0 tps, lat -nan ms stddev -nan, lag -nan ms, 0 skipped # progress: 4.0 s, 1.0 tps, lat 2682.730 ms stddev 0.000, lag 985.509 ms, 0 skipped
(BTW, the "-nan" bits suggest an actual pgbench bug, independently of anything else.)
From my point of view, NaN is expected when no test were executed in theinterval: if there was no transaction, it does not make sense to talk about its latency, so NaN is the right answer.
However, the above "6.9 tps, lat 0.000, stddev 0.000, lag 0.000" is inconsistent. As "6.9 = 18 / 2.6", it means that progress tps calculation should remove skipped transactions...
Attached patch attempts to report more consistent figures in the progress and in final report when transactions are skipped.
sh> cat sleep-100.sql \sleep 100 ms SELECT 1; sh> ./pgbench -P 1 -t 100 -f sleep-100.sql -R 20 -L 1 [...] progress: 1.0 s, 7.0 tps, lat 100.145 ms stddev 0.042, lag 0.000 ms, 16 skipped progress: 2.0 s, 6.0 tps, lat 100.133 ms stddev 0.040, lag 0.021 ms, 7 skipped progress: 3.0 s, 9.0 tps, lat 100.115 ms stddev 0.016, lag 0.000 ms, 11 skipped [...] number of transactions actually processed: 38/100 number of transactions skipped: 62 (62.000 %) number of transactions above the 1.0 ms latency limit: 38 (38.000 %) latency average = 100.142 ms tps = 7.091010 (including connections establishing) tps = 7.094144 (excluding connections establishing) script statistics: - number of transactions skipped: 62 (62.000%) -- Fabien.
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index e37496c..9ca9734 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -2584,7 +2584,7 @@ processXactStats(TState *thread, CState *st, instr_time *now, doLog(thread, st, agg, skipped, latency, lag); /* XXX could use a mutex here, but we choose not to */ - if (per_script_stats) + if (per_script_stats || latency_limit) accumStats(&sql_script[st->use_file].stats, skipped, latency, lag); } @@ -3522,11 +3522,14 @@ printResults(TState *threads, StatsData *total, instr_time total_time, double time_include, tps_include, tps_exclude; + int64 ntx = total->cnt - total->skipped; time_include = INSTR_TIME_GET_DOUBLE(total_time); - tps_include = total->cnt / time_include; - tps_exclude = total->cnt / (time_include - - (INSTR_TIME_GET_DOUBLE(conn_total_time) / nclients)); + + /* tps is about actually executed transactions */ + tps_include = ntx / time_include; + tps_exclude = ntx / + (time_include - (INSTR_TIME_GET_DOUBLE(conn_total_time) / nclients)); /* Report test parameters. */ printf("transaction type: %s\n", @@ -3539,7 +3542,7 @@ printResults(TState *threads, StatsData *total, instr_time total_time, { printf("number of transactions per client: %d\n", nxacts); printf("number of transactions actually processed: " INT64_FORMAT "/%d\n", - total->cnt - total->skipped, nxacts * nclients); + total->cnt, nxacts * nclients); } else { @@ -4660,7 +4663,8 @@ threadRun(void *arg) { /* generate and show report */ StatsData cur; - int64 run = now - last_report; + int64 run = now - last_report, + ntx; double tps, total_run, latency, @@ -4675,7 +4679,7 @@ threadRun(void *arg) * XXX: No locking. There is no guarantee that we get an * atomic snapshot of the transaction count and latencies, so * these figures can well be off by a small amount. The - * progress is report's purpose is to give a quick overview of + * progress report's purpose is to give a quick overview of * how the test is going, so that shouldn't matter too much. * (If a read from a 64-bit integer is not atomic, you might * get a "torn" read and completely bogus latencies though!) @@ -4689,15 +4693,14 @@ threadRun(void *arg) cur.skipped += thread[i].stats.skipped; } + /* we count only actually executed transactions */ + ntx = (cur.cnt - cur.skipped) - (last.cnt - last.skipped); total_run = (now - thread_start) / 1000000.0; - tps = 1000000.0 * (cur.cnt - last.cnt) / run; - latency = 0.001 * (cur.latency.sum - last.latency.sum) / - (cur.cnt - last.cnt); - sqlat = 1.0 * (cur.latency.sum2 - last.latency.sum2) - / (cur.cnt - last.cnt); + tps = 1000000.0 * ntx / run; + latency = 0.001 * (cur.latency.sum - last.latency.sum) / ntx; + sqlat = 1.0 * (cur.latency.sum2 - last.latency.sum2) / ntx; stdev = 0.001 * sqrt(sqlat - 1000000.0 * latency * latency); - lag = 0.001 * (cur.lag.sum - last.lag.sum) / - (cur.cnt - last.cnt); + lag = 0.001 * (cur.lag.sum - last.lag.sum) / ntx; if (progress_timestamp) { @@ -4714,6 +4717,7 @@ threadRun(void *arg) (long) tv.tv_sec, (long) (tv.tv_usec / 1000)); } else + /* round seconds are expected, nut the thread may be late */ snprintf(tbuf, sizeof(tbuf), "%.1f s", total_run); fprintf(stderr,
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers