Hi, Hackers.

I've attached the patch that I failed to include in my previous email.
(I'm still a bit confused about how to attach files using the standard Mail client on macOS.)

Best Regards,
Rintaro Ikeda
From fca20d18dbedc8a9c66408da3e7139cd4192ff5b Mon Sep 17 00:00:00 2001
From: "Rintaro.Ikeda" <ikedarinta...@oss.nttdata.com>
Date: Mon, 12 May 2025 21:57:39 +0900
Subject: [PATCH] Add continue-on-error option to pgbench

When the option is set, client rolls back the failed transaction and starts a
new one when its transaction fails due to the reason other than the deadlock and
serialization failure.
---
 doc/src/sgml/ref/pgbench.sgml | 12 +++++++++++
 src/bin/pgbench/pgbench.c     | 39 +++++++++++++++++++++++++++++++----
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index ab252d9fc74..dcb8c1c487c 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -914,6 +914,18 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry id="pgbench-option-continue-on-error">
+      <term><option>--continue-on-error</option></term>
+      <listitem>
+       <para>
+        Client rolls back the failed transaction and starts a new one when its
+        transaction fails due to the reason other than the deadlock and
+        serialization failure. This allows all clients specified with -c option
+        to continuously apply load to the server, even if some transactions fail.
+       </para>
+      </listitem>
+     </varlistentry>
+
     </variablelist>
    </para>
 
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 497a936c141..5db222f2c1e 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -440,6 +440,10 @@ typedef struct StatsData
 	int64		deadlock_failures;	/* number of transactions that were not
 									 * successfully retried after a deadlock
 									 * error */
+	int64		other_sql_failures;	/* number of failed transactions for reasons
+									 * other than serialization/deadlock failure
+									 * , which is enabled if --continue-on-error
+									 *  is used */
 	SimpleStats latency;
 	SimpleStats lag;
 } StatsData;
@@ -770,6 +774,7 @@ static int64 total_weight = 0;
 static bool verbose_errors = false; /* print verbose messages of all errors */
 
 static bool exit_on_abort = false;	/* exit when any client is aborted */
+static bool continue_on_error = false;	/* continue after errors */
 
 /* Builtin test scripts */
 typedef struct BuiltinScript
@@ -954,6 +959,8 @@ usage(void)
 		   "  --log-prefix=PREFIX      prefix for transaction time log file\n"
 		   "                           (default: \"pgbench_log\")\n"
 		   "  --max-tries=NUM          max number of tries to run transaction (default: 1)\n"
+		   "  --continue-on-error\n"
+		   "                           Continue and retry transactions that failed due to errors other than serialization or deadlocks.\n"
 		   "  --progress-timestamp     use Unix epoch timestamps for progress\n"
 		   "  --random-seed=SEED       set random seed (\"time\", \"rand\", integer)\n"
 		   "  --sampling-rate=NUM      fraction of transactions to log (e.g., 0.01 for 1%%)\n"
@@ -1467,6 +1474,7 @@ initStats(StatsData *sd, pg_time_usec_t start)
 	sd->retried = 0;
 	sd->serialization_failures = 0;
 	sd->deadlock_failures = 0;
+	sd->other_sql_failures = 0;
 	initSimpleStats(&sd->latency);
 	initSimpleStats(&sd->lag);
 }
@@ -1516,6 +1524,9 @@ accumStats(StatsData *stats, bool skipped, double lat, double lag,
 		case ESTATUS_DEADLOCK_ERROR:
 			stats->deadlock_failures++;
 			break;
+		case ESTATUS_OTHER_SQL_ERROR:
+			stats->other_sql_failures++;
+			break;
 		default:
 			/* internal error which should never occur */
 			pg_fatal("unexpected error status: %d", estatus);
@@ -3247,7 +3258,8 @@ static bool
 canRetryError(EStatus estatus)
 {
 	return (estatus == ESTATUS_SERIALIZATION_ERROR ||
-			estatus == ESTATUS_DEADLOCK_ERROR);
+			estatus == ESTATUS_DEADLOCK_ERROR ||
+			(continue_on_error && estatus == ESTATUS_OTHER_SQL_ERROR));
 }
 
 /*
@@ -4528,7 +4540,8 @@ static int64
 getFailures(const StatsData *stats)
 {
 	return (stats->serialization_failures +
-			stats->deadlock_failures);
+			stats->deadlock_failures +
+			stats->other_sql_failures);
 }
 
 /*
@@ -4548,6 +4561,8 @@ getResultString(bool skipped, EStatus estatus)
 				return "serialization";
 			case ESTATUS_DEADLOCK_ERROR:
 				return "deadlock";
+			case ESTATUS_OTHER_SQL_ERROR:
+				return "error (except serialization/deadlock)";
 			default:
 				/* internal error which should never occur */
 				pg_fatal("unexpected error status: %d", estatus);
@@ -4603,6 +4618,7 @@ doLog(TState *thread, CState *st,
 			int64		skipped = 0;
 			int64		serialization_failures = 0;
 			int64		deadlock_failures = 0;
+			int64		other_sql_failures = 0;
 			int64		retried = 0;
 			int64		retries = 0;
 
@@ -4643,10 +4659,12 @@ doLog(TState *thread, CState *st,
 			{
 				serialization_failures = agg->serialization_failures;
 				deadlock_failures = agg->deadlock_failures;
+				other_sql_failures = agg->other_sql_failures;
 			}
-			fprintf(logfile, " " INT64_FORMAT " " INT64_FORMAT,
+			fprintf(logfile, " " INT64_FORMAT " " INT64_FORMAT " " INT64_FORMAT,
 					serialization_failures,
-					deadlock_failures);
+					deadlock_failures,
+					other_sql_failures);
 
 			fputc('\n', logfile);
 
@@ -6285,6 +6303,7 @@ printProgressReport(TState *threads, int64 test_start, pg_time_usec_t now,
 		cur.serialization_failures +=
 			threads[i].stats.serialization_failures;
 		cur.deadlock_failures += threads[i].stats.deadlock_failures;
+		cur.other_sql_failures += threads[i].stats.other_sql_failures;
 	}
 
 	/* we count only actually executed transactions */
@@ -6443,6 +6462,9 @@ printResults(StatsData *total,
 		printf("number of deadlock failures: " INT64_FORMAT " (%.3f%%)\n",
 			   total->deadlock_failures,
 			   100.0 * total->deadlock_failures / total_cnt);
+		printf("number of other failures: " INT64_FORMAT " (%.3f%%)\n",
+			   total->other_sql_failures,
+			   100.0 * total->other_sql_failures / total_cnt);
 	}
 
 	/* it can be non-zero only if max_tries is not equal to one */
@@ -6546,6 +6568,10 @@ printResults(StatsData *total,
 							   sstats->deadlock_failures,
 							   (100.0 * sstats->deadlock_failures /
 								script_total_cnt));
+						printf(" - number of other failures: " INT64_FORMAT " (%.3f%%)\n",
+							sstats->other_sql_failures,
+							(100.0 * sstats->other_sql_failures /
+								script_total_cnt));
 					}
 
 					/*
@@ -6705,6 +6731,7 @@ main(int argc, char **argv)
 		{"verbose-errors", no_argument, NULL, 15},
 		{"exit-on-abort", no_argument, NULL, 16},
 		{"debug", no_argument, NULL, 17},
+		{"continue-on-error", no_argument, NULL, 18},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -7058,6 +7085,9 @@ main(int argc, char **argv)
 			case 17:			/* debug */
 				pg_logging_increase_verbosity();
 				break;
+			case 18:			/* continue-on-error */
+				continue_on_error = true;
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -7413,6 +7443,7 @@ main(int argc, char **argv)
 		stats.retried += thread->stats.retried;
 		stats.serialization_failures += thread->stats.serialization_failures;
 		stats.deadlock_failures += thread->stats.deadlock_failures;
+		stats.other_sql_failures += thread->stats.other_sql_failures;
 		latency_late += thread->latency_late;
 		conn_total_duration += thread->conn_duration;
 
-- 
2.39.5 (Apple Git-154)

Reply via email to