Hi,

On Sun, 13 Jul 2025 23:15:24 +0900
Rintaro Ikeda <ikedarinta...@oss.nttdata.com> wrote:

> I noticed one small thing I’d like to discuss. I'm not sure that users clearly
> understand which aborted in the following error message, the client or the 
> script.
> > pgbench: error: client 0 script 0 aborted in command ... query ...
> 
> Since the code path always results in a client abort, I wonder if the 
> following
> message might be clearer:
> > pgbench: error: client 0 aborted in script 0 command ... query ...

Indeed, it seems clearer to explicitly state that it is the client that
was aborted.

I've attached an updated patch that replaces the remaining message mentioned
above with a call to commandFailed(). With this change, the output in such
situations will now be:

 "client 0 aborted in command 0 (SQL) of script 0; ...."

Regards,
Yugo Nagata

-- 
Yugo Nagata <nag...@sraoss.co.jp>
>From 19c5ee6c077091eaf99e133b26a3e822a39f3964 Mon Sep 17 00:00:00 2001
From: Yugo Nagata <nag...@sraoss.co.jp>
Date: Thu, 10 Jul 2025 17:21:05 +0900
Subject: [PATCH v8 3/3] Improve error messages for errors that cause client
 abortion

This commit modifies relevant error messages to explicitly indicate that the
client was aborted. As part of this change, pg_log_error was replaced with
commandFailed().
---
 src/bin/pgbench/pgbench.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 7dbeb79ca8d..41a7c19fff5 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -3309,8 +3309,7 @@ readCommandResponse(CState *st, MetaCommand meta, char *varprefix)
 			case PGRES_EMPTY_QUERY: /* may be used for testing no-op overhead */
 				if (is_last && meta == META_GSET)
 				{
-					pg_log_error("client %d script %d command %d query %d: expected one row, got %d",
-								 st->id, st->use_file, st->command, qrynum, 0);
+					commandFailed(st, "gset", psprintf("expected one row, got %d", 0));
 					st->estatus = ESTATUS_META_COMMAND_ERROR;
 					goto error;
 				}
@@ -3324,8 +3323,7 @@ readCommandResponse(CState *st, MetaCommand meta, char *varprefix)
 					if (meta == META_GSET && ntuples != 1)
 					{
 						/* under \gset, report the error */
-						pg_log_error("client %d script %d command %d query %d: expected one row, got %d",
-									 st->id, st->use_file, st->command, qrynum, PQntuples(res));
+						commandFailed(st, "gset", psprintf("expected one row, got %d", PQntuples(res)));
 						st->estatus = ESTATUS_META_COMMAND_ERROR;
 						goto error;
 					}
@@ -3339,18 +3337,18 @@ readCommandResponse(CState *st, MetaCommand meta, char *varprefix)
 					for (int fld = 0; fld < PQnfields(res); fld++)
 					{
 						char	   *varname = PQfname(res, fld);
+						char	   *cmd = (meta == META_ASET ? "aset" : "gset");
 
 						/* allocate varname only if necessary, freed below */
 						if (*varprefix != '\0')
 							varname = psprintf("%s%s", varprefix, varname);
 
 						/* store last row result as a string */
-						if (!putVariable(&st->variables, meta == META_ASET ? "aset" : "gset", varname,
+						if (!putVariable(&st->variables, cmd, varname,
 										 PQgetvalue(res, ntuples - 1, fld)))
 						{
 							/* internal error */
-							pg_log_error("client %d script %d command %d query %d: error storing into variable %s",
-										 st->id, st->use_file, st->command, qrynum, varname);
+							commandFailed(st, cmd, psprintf("error storing into variable %s", varname));
 							st->estatus = ESTATUS_META_COMMAND_ERROR;
 							goto error;
 						}
-- 
2.43.0

>From 8a1583068ee4737ba82664a359638902c93e56a3 Mon Sep 17 00:00:00 2001
From: Rintaro Ikeda <ikedarinta...@oss.nttdata.com>
Date: Wed, 9 Jul 2025 23:50:36 +0900
Subject: [PATCH v8 2/3] Rename a confusing enumerator

Rename the confusing enumerator which may be mistakenly assumed to be related to
other_sql_errors
---
 src/bin/pgbench/pgbench.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index edd8b01f794..7dbeb79ca8d 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -485,7 +485,7 @@ typedef enum TStatus
 	TSTATUS_IDLE,
 	TSTATUS_IN_BLOCK,
 	TSTATUS_CONN_ERROR,
-	TSTATUS_OTHER_ERROR,
+	TSTATUS_UNKNOWN_ERROR,
 } TStatus;
 
 /* Various random sequences are initialized from this one. */
@@ -3577,12 +3577,12 @@ getTransactionStatus(PGconn *con)
 			 * not. Internal error which should never occur.
 			 */
 			pg_log_error("unexpected transaction status %d", tx_status);
-			return TSTATUS_OTHER_ERROR;
+			return TSTATUS_UNKNOWN_ERROR;
 	}
 
 	/* not reached */
 	Assert(false);
-	return TSTATUS_OTHER_ERROR;
+	return TSTATUS_UNKNOWN_ERROR;
 }
 
 /*
-- 
2.43.0

>From a5d4081648105990d1ce9085ea9ffe23f09e01f9 Mon Sep 17 00:00:00 2001
From: Rintaro Ikeda <ikedarinta...@oss.nttdata.com>
Date: Wed, 9 Jul 2025 23:36:37 +0900
Subject: [PATCH v8 1/3] Add --continue-on-error option

When the option is set, client rolls back the failed transaction and starts a
new one when its transaction fails due to the reason other than the deadlock and
serialization failure.
---
 doc/src/sgml/ref/pgbench.sgml                | 71 +++++++++++++++-----
 src/bin/pgbench/pgbench.c                    | 57 +++++++++++++---
 src/bin/pgbench/t/001_pgbench_with_server.pl | 22 ++++++
 3 files changed, 125 insertions(+), 25 deletions(-)

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index ab252d9fc74..15fcb45e223 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -76,9 +76,8 @@ tps = 896.967014 (without initial connection time)
   and number of transactions per client); these will be equal unless the run
   failed before completion or some SQL command(s) failed.  (In
   <option>-T</option> mode, only the actual number of transactions is printed.)
-  The next line reports the number of failed transactions due to
-  serialization or deadlock errors (see <xref linkend="failures-and-retries"/>
-  for more information).
+  The next line reports the number of failed transactions (see
+  <xref linkend="failures-and-retries"/> for more information).
   The last line reports the number of transactions per second.
  </para>
 
@@ -790,6 +789,9 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
          <listitem>
           <para>deadlock failures;</para>
          </listitem>
+         <listitem>
+          <para>other failures;</para>
+         </listitem>
         </itemizedlist>
         See <xref linkend="failures-and-retries"/> for more information.
        </para>
@@ -914,6 +916,26 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
       </listitem>
      </varlistentry>
 
+     <varlistentry id="pgbench-option-continue-on-error">
+      <term><option>--continue-on-error</option></term>
+      <listitem>
+       <para>
+        Allows clients to continue their run even if an SQL statement fails due to
+        errors other than serialization or deadlock. Unlike serialization and deadlock
+        failures, clients do not retry the same transactions but start new transaction.
+        This option is useful when your custom script may raise errors due to some
+        reason like unique constraints violation. Without this option, the client is
+        aborted after such errors.
+       </para>
+       <para>
+        Note that serialization and deadlock failures never cause the client to be
+        aborted even after clients retries <option>--max-tries</option> times by
+        default, so they are not affected by this option.
+        See <xref linkend="failures-and-retries"/> for more information.
+       </para>
+      </listitem>
+     </varlistentry>
+
     </variablelist>
    </para>
 
@@ -2409,8 +2431,8 @@ END;
    will be reported as <literal>failed</literal>. If you use the
    <option>--failures-detailed</option> option, the
    <replaceable>time</replaceable> of the failed transaction will be reported as
-   <literal>serialization</literal> or
-   <literal>deadlock</literal> depending on the type of failure (see
+   <literal>serialization</literal>, <literal>deadlock</literal>, or
+   <literal>other</literal> depending on the type of failure (see
    <xref linkend="failures-and-retries"/> for more information).
   </para>
 
@@ -2638,6 +2660,16 @@ END;
       </para>
      </listitem>
     </varlistentry>
+
+    <varlistentry>
+     <term><replaceable>other_sql_failures</replaceable></term>
+     <listitem>
+      <para>
+       number of transactions that got a SQL error
+       (zero unless <option>--failures-detailed</option> is specified)
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
   </para>
 
@@ -2646,8 +2678,8 @@ END;
 <screen>
 <userinput>pgbench --aggregate-interval=10 --time=20 --client=10 --log --rate=1000 --latency-limit=10 --failures-detailed --max-tries=10 test</userinput>
 
-1650260552 5178 26171317 177284491527 1136 44462 2647617 7321113867 0 9866 64 7564 28340 4148 0
-1650260562 4808 25573984 220121792172 1171 62083 3037380 9666800914 0 9998 598 7392 26621 4527 0
+1650260552 5178 26171317 177284491527 1136 44462 2647617 7321113867 0 9866 64 7564 28340 4148 0 0
+1650260562 4808 25573984 220121792172 1171 62083 3037380 9666800914 0 9998 598 7392 26621 4527 0 0
 </screen>
   </para>
 
@@ -2839,9 +2871,11 @@ statement latencies in milliseconds, failures and retries:
          <option>--exit-on-abort</option> is specified. Otherwise in the worst
          case they only lead to the abortion of the failed client while other
          clients continue their run (but some client errors are handled without
-         an abortion of the client and reported separately, see below). Later in
-         this section it is assumed that the discussed errors are only the
-         direct client errors and they are not internal
+         an abortion of the client and reported separately, see below). When
+         <option>--continue-on-error</option> is specified, the client
+         continues to process new transactions even if it encounters an error.
+         Later in this section it is assumed that the discussed errors are only
+         the direct client errors and they are not internal
          <application>pgbench</application> errors.
        </para>
      </listitem>
@@ -2851,14 +2885,17 @@ statement latencies in milliseconds, failures and retries:
   <para>
    A client's run is aborted in case of a serious error; for example, the
    connection with the database server was lost or the end of script was reached
-   without completing the last transaction. In addition, if execution of an SQL
+   without completing the last transaction. By default, if execution of an SQL
    or meta command fails for reasons other than serialization or deadlock errors,
-   the client is aborted. Otherwise, if an SQL command fails with serialization or
-   deadlock errors, the client is not aborted. In such cases, the current
-   transaction is rolled back, which also includes setting the client variables
-   as they were before the run of this transaction (it is assumed that one
-   transaction script contains only one transaction; see
-   <xref linkend="transactions-and-scripts"/> for more information).
+   the client is aborted. However, if the --continue-on-error option is specified,
+   the client does not abort and proceeds to the next transaction regardless of
+   the error. These cases are reported as "other failures" in the output.
+   On contrast, if an SQL command fails with serialization or deadlock errors, the
+   client is not aborted even without  <option>--continue-on-error</option>.
+   Instead, the current transaction is rolled back, which also includes setting
+   the client variables as they were before the run of this transaction
+   (it is assumed that one transaction script contains only one transaction;
+   see <xref linkend="transactions-and-scripts"/> for more information).
    Transactions with serialization or deadlock errors are repeated after
    rollbacks until they complete successfully or reach the maximum
    number of tries (specified by the <option>--max-tries</option> option) / the maximum
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 497a936c141..edd8b01f794 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -402,15 +402,23 @@ typedef struct StatsData
 	 *   directly successful transactions (they were successfully completed on
 	 *                                     the first try).
 	 *
-	 * A failed transaction is defined as unsuccessfully retried transactions.
-	 * It can be one of two types:
+	 * A failed transaction is counted differently depending on whether
+	 * the --continue-on-error option is specified.
 	 *
+	 * Without --continue-on-error:
 	 * failed (the number of failed transactions) =
 	 *   'serialization_failures' (they got a serialization error and were not
 	 *                             successfully retried) +
 	 *   'deadlock_failures' (they got a deadlock error and were not
 	 *                        successfully retried).
 	 *
+	 * When --continue-on-error is specified:
+	 *
+	 * failed (number of failed transactions) =
+	 *   'serialization_failures' + 'deadlock_failures' +
+	 *   'other_sql_failures'  (they got some other SQL error; the transaction was
+	 * not retried and counted as failed due to --continue-on-error).
+	 *
 	 * If the transaction was retried after a serialization or a deadlock
 	 * error this does not guarantee that this retry was successful. Thus
 	 *
@@ -440,6 +448,11 @@ typedef struct StatsData
 	int64		deadlock_failures;	/* number of transactions that were not
 									 * successfully retried after a deadlock
 									 * error */
+	int64		other_sql_failures; /* number of failed transactions for
+									 * reasons other than
+									 * serialization/deadlock failure, which
+									 * is counted if --continue-on-error is
+									 * specified */
 	SimpleStats latency;
 	SimpleStats lag;
 } StatsData;
@@ -770,6 +783,7 @@ static int64 total_weight = 0;
 static bool verbose_errors = false; /* print verbose messages of all errors */
 
 static bool exit_on_abort = false;	/* exit when any client is aborted */
+static bool continue_on_error = false;	/* continue after errors */
 
 /* Builtin test scripts */
 typedef struct BuiltinScript
@@ -954,6 +968,7 @@ usage(void)
 		   "  --log-prefix=PREFIX      prefix for transaction time log file\n"
 		   "                           (default: \"pgbench_log\")\n"
 		   "  --max-tries=NUM          max number of tries to run transaction (default: 1)\n"
+		   "  --continue-on-error      continue running after an SQL error\n"
 		   "  --progress-timestamp     use Unix epoch timestamps for progress\n"
 		   "  --random-seed=SEED       set random seed (\"time\", \"rand\", integer)\n"
 		   "  --sampling-rate=NUM      fraction of transactions to log (e.g., 0.01 for 1%%)\n"
@@ -1467,6 +1482,7 @@ initStats(StatsData *sd, pg_time_usec_t start)
 	sd->retried = 0;
 	sd->serialization_failures = 0;
 	sd->deadlock_failures = 0;
+	sd->other_sql_failures = 0;
 	initSimpleStats(&sd->latency);
 	initSimpleStats(&sd->lag);
 }
@@ -1516,6 +1532,9 @@ accumStats(StatsData *stats, bool skipped, double lat, double lag,
 		case ESTATUS_DEADLOCK_ERROR:
 			stats->deadlock_failures++;
 			break;
+		case ESTATUS_OTHER_SQL_ERROR:
+			stats->other_sql_failures++;
+			break;
 		default:
 			/* internal error which should never occur */
 			pg_fatal("unexpected error status: %d", estatus);
@@ -3356,7 +3375,7 @@ readCommandResponse(CState *st, MetaCommand meta, char *varprefix)
 			case PGRES_FATAL_ERROR:
 				st->estatus = getSQLErrorStatus(PQresultErrorField(res,
 																   PG_DIAG_SQLSTATE));
-				if (canRetryError(st->estatus))
+				if (continue_on_error || canRetryError(st->estatus))
 				{
 					if (verbose_errors)
 						commandError(st, PQerrorMessage(st->con));
@@ -4007,7 +4026,8 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg)
 					if (PQpipelineStatus(st->con) != PQ_PIPELINE_ON)
 						st->state = CSTATE_END_COMMAND;
 				}
-				else if (canRetryError(st->estatus))
+				else if ((st->estatus == ESTATUS_OTHER_SQL_ERROR && continue_on_error) ||
+						 canRetryError(st->estatus))
 					st->state = CSTATE_ERROR;
 				else
 					st->state = CSTATE_ABORTED;
@@ -4528,7 +4548,8 @@ static int64
 getFailures(const StatsData *stats)
 {
 	return (stats->serialization_failures +
-			stats->deadlock_failures);
+			stats->deadlock_failures +
+			stats->other_sql_failures);
 }
 
 /*
@@ -4548,6 +4569,8 @@ getResultString(bool skipped, EStatus estatus)
 				return "serialization";
 			case ESTATUS_DEADLOCK_ERROR:
 				return "deadlock";
+			case ESTATUS_OTHER_SQL_ERROR:
+				return "other";
 			default:
 				/* internal error which should never occur */
 				pg_fatal("unexpected error status: %d", estatus);
@@ -4603,6 +4626,7 @@ doLog(TState *thread, CState *st,
 			int64		skipped = 0;
 			int64		serialization_failures = 0;
 			int64		deadlock_failures = 0;
+			int64		other_sql_failures = 0;
 			int64		retried = 0;
 			int64		retries = 0;
 
@@ -4643,10 +4667,12 @@ doLog(TState *thread, CState *st,
 			{
 				serialization_failures = agg->serialization_failures;
 				deadlock_failures = agg->deadlock_failures;
+				other_sql_failures = agg->other_sql_failures;
 			}
-			fprintf(logfile, " " INT64_FORMAT " " INT64_FORMAT,
+			fprintf(logfile, " " INT64_FORMAT " " INT64_FORMAT " " INT64_FORMAT,
 					serialization_failures,
-					deadlock_failures);
+					deadlock_failures,
+					other_sql_failures);
 
 			fputc('\n', logfile);
 
@@ -6285,6 +6311,7 @@ printProgressReport(TState *threads, int64 test_start, pg_time_usec_t now,
 		cur.serialization_failures +=
 			threads[i].stats.serialization_failures;
 		cur.deadlock_failures += threads[i].stats.deadlock_failures;
+		cur.other_sql_failures += threads[i].stats.other_sql_failures;
 	}
 
 	/* we count only actually executed transactions */
@@ -6427,7 +6454,8 @@ printResults(StatsData *total,
 
 	/*
 	 * Remaining stats are nonsensical if we failed to execute any xacts due
-	 * to others than serialization or deadlock errors
+	 * to other than serialization or deadlock errors and --continue-on-error
+	 * is not set.
 	 */
 	if (total_cnt <= 0)
 		return;
@@ -6443,6 +6471,9 @@ printResults(StatsData *total,
 		printf("number of deadlock failures: " INT64_FORMAT " (%.3f%%)\n",
 			   total->deadlock_failures,
 			   100.0 * total->deadlock_failures / total_cnt);
+		printf("number of other failures: " INT64_FORMAT " (%.3f%%)\n",
+			   total->other_sql_failures,
+			   100.0 * total->other_sql_failures / total_cnt);
 	}
 
 	/* it can be non-zero only if max_tries is not equal to one */
@@ -6546,6 +6577,10 @@ printResults(StatsData *total,
 							   sstats->deadlock_failures,
 							   (100.0 * sstats->deadlock_failures /
 								script_total_cnt));
+						printf(" - number of other failures: " INT64_FORMAT " (%.3f%%)\n",
+							   sstats->other_sql_failures,
+							   (100.0 * sstats->other_sql_failures /
+								script_total_cnt));
 					}
 
 					/*
@@ -6705,6 +6740,7 @@ main(int argc, char **argv)
 		{"verbose-errors", no_argument, NULL, 15},
 		{"exit-on-abort", no_argument, NULL, 16},
 		{"debug", no_argument, NULL, 17},
+		{"continue-on-error", no_argument, NULL, 18},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -7058,6 +7094,10 @@ main(int argc, char **argv)
 			case 17:			/* debug */
 				pg_logging_increase_verbosity();
 				break;
+			case 18:			/* continue-on-error */
+				benchmarking_option_set = true;
+				continue_on_error = true;
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -7413,6 +7453,7 @@ main(int argc, char **argv)
 		stats.retried += thread->stats.retried;
 		stats.serialization_failures += thread->stats.serialization_failures;
 		stats.deadlock_failures += thread->stats.deadlock_failures;
+		stats.other_sql_failures += thread->stats.other_sql_failures;
 		latency_late += thread->latency_late;
 		conn_total_duration += thread->conn_duration;
 
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 7dd78940300..8bb35dda5f7 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -1813,6 +1813,28 @@ update counter set i = i+1 returning i \gset
 # Clean up
 $node->safe_psql('postgres', 'DROP TABLE counter;');
 
+# Test --continue-on-error
+$node->safe_psql('postgres',
+	'CREATE TABLE unique_table(i int unique);' . 'INSERT INTO unique_table VALUES (0);');
+
+$node->pgbench(
+	'-t 10 --continue-on-error --failures-detailed',
+	0,
+	[
+		qr{processed: 0/10\b},
+		qr{other failures: 10\b}
+	],
+	[],
+	'test --continue-on-error',
+	{
+		'002_continue_on_error' => q{
+		insert into unique_table values 0;
+		}
+	});
+
+# Clean up
+$node->safe_psql('postgres', 'DROP TABLE unique_table;');
+
 # done
 $node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
 $node->stop;
-- 
2.43.0

Reply via email to