In order to simulate real-world clog contention, we need to use
benchmarks that deal with real world situations.

Currently, pgbench pre-loads data using COPY and executes a VACUUM so
that all hint bits are set on every row of every page of every table.
Thus, as pgbench runs it sees zero clog accesses from historical data.
As a result, clog access is minimised and the effects of clog
contention in the real world go unnoticed.

The following patch adds a pgbench option -I to load data using
INSERTs, so that we can begin benchmark testing with rows that have
large numbers of distinct un-hinted transaction ids. With a database
pre-created using this we will be better able to simulate and thus
more easily measure clog contention. Note that current clog has space
for 1 million xids, so a scale factor of greater than 10 is required
to really stress the clog.

The patch uses multiple connections to load data using a predefined
script similar to the -N or -S logic.

$ pgbench --help
pgbench is a benchmarking tool for PostgreSQL.

Usage:
  pgbench [OPTIONS]... [DBNAME]

Initialization options:
  -i           invokes initialization mode using COPY
  -I           invokes initialization mode using INSERTs
...


$ pgbench -I -c 4 -t 10000
creating tables...
filling accounts table with 100000 rows using inserts
set primary key...
NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index
"pgbench_branches_pkey" for table "pgbench_branches"
NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index
"pgbench_tellers_pkey" for table "pgbench_tellers"
NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index
"pgbench_accounts_pkey" for table "pgbench_accounts"
done.
transactions option ignored
transaction type: Load pgbench_accounts using INSERTs
scaling factor: 1
query mode: simple
number of clients: 4
number of threads: 1
number of transactions per client: 25000
number of transactions actually processed: 100000/100000
tps = 828.194854 (including connections establishing)
tps = 828.440330 (excluding connections establishing)

Yes, my laptop really is that slow. Contributions to improve that
situation gratefully received.

-- 
 Simon Riggs                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index 9081f09..7423532 100644
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -282,6 +282,12 @@ static char *select_only = {
 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
 };
 
+/* -I case */
+static char *insert_accounts = {
+	"\\set naccounts " CppAsString2(naccounts) "\n"
+	"INSERT INTO pgbench_accounts (aid, bid, abalance) VALUES (nextval('pgbench_accounts_load_seq'), 1 + (lastval()/(:naccounts)), 0);\n"
+};
+
 /* Function prototypes */
 static void setalarm(int seconds);
 static void *threadRun(void *arg);
@@ -340,7 +346,8 @@ usage(const char *progname)
 		   "Usage:\n"
 		   "  %s [OPTIONS]... [DBNAME]\n"
 		   "\nInitialization options:\n"
-		   "  -i           invokes initialization mode\n"
+		   "  -i           invokes initialization mode using COPY\n"
+		   "  -I           invokes initialization mode using INSERTs\n"
 		   "  -F NUM       fill factor\n"
 		   "  -s NUM       scaling factor\n"
 		   "  --index-tablespace=TABLESPACE\n"
@@ -1256,7 +1263,7 @@ disconnect_all(CState *state, int length)
 
 /* create tables and setup data */
 static void
-init(void)
+init(bool init_uses_copy)
 {
 	/*
 	 * Note: TPC-B requires at least 100 bytes per row, and the "filler"
@@ -1308,6 +1315,8 @@ init(void)
 	if ((con = doConnect()) == NULL)
 		exit(1);
 
+	fprintf(stderr, "creating tables...\n");
+
 	for (i = 0; i < lengthof(DDLs); i++)
 	{
 		char		opts[256];
@@ -1356,50 +1365,73 @@ init(void)
 
 	executeStatement(con, "commit");
 
-	/*
-	 * fill the pgbench_accounts table with some data
-	 */
-	fprintf(stderr, "creating tables...\n");
+	if (init_uses_copy)
+	{
+		/*
+		 * fill the pgbench_accounts table with some data
+		 */
+		fprintf(stderr, "loading accounts table with %d rows using COPY\n",
+							naccounts * scale);
 
-	executeStatement(con, "begin");
-	executeStatement(con, "truncate pgbench_accounts");
+		executeStatement(con, "begin");
+		executeStatement(con, "truncate pgbench_accounts");
 
-	res = PQexec(con, "copy pgbench_accounts from stdin");
-	if (PQresultStatus(res) != PGRES_COPY_IN)
-	{
-		fprintf(stderr, "%s", PQerrorMessage(con));
-		exit(1);
-	}
-	PQclear(res);
+		res = PQexec(con, "copy pgbench_accounts from stdin");
+		if (PQresultStatus(res) != PGRES_COPY_IN)
+		{
+			fprintf(stderr, "%s", PQerrorMessage(con));
+			exit(1);
+		}
+		PQclear(res);
 
-	for (i = 0; i < naccounts * scale; i++)
-	{
-		int			j = i + 1;
+		for (i = 0; i < naccounts * scale; i++)
+		{
+			int			j = i + 1;
+
+			snprintf(sql, 256, "%d\t%d\t%d\t\n", j, i / naccounts + 1, 0);
+			if (PQputline(con, sql))
+			{
+				fprintf(stderr, "PQputline failed\n");
+				exit(1);
+			}
+
+			if (j % 10000 == 0)
+				fprintf(stderr, "%d tuples done.\n", j);
+		}
 
-		snprintf(sql, 256, "%d\t%d\t%d\t\n", j, i / naccounts + 1, 0);
-		if (PQputline(con, sql))
+		if (PQputline(con, "\\.\n"))
 		{
-			fprintf(stderr, "PQputline failed\n");
+			fprintf(stderr, "very last PQputline failed\n");
 			exit(1);
 		}
+		if (PQendcopy(con))
+		{
+			fprintf(stderr, "PQendcopy failed\n");
+			exit(1);
+		}
+		executeStatement(con, "commit");
 
-		if (j % 10000 == 0)
-			fprintf(stderr, "%d tuples done.\n", j);
 	}
-	if (PQputline(con, "\\.\n"))
-	{
-		fprintf(stderr, "very last PQputline failed\n");
-		exit(1);
-	}
-	if (PQendcopy(con))
+	else
 	{
-		fprintf(stderr, "PQendcopy failed\n");
-		exit(1);
+		/*
+		 * create sequence for use during execution of INSERTS into accounts
+		 *
+		 * Rows are loaded using statically defined script "insert_accounts"
+		 */
+		executeStatement(con, "drop sequence if exists pgbench_accounts_load_seq");
+		executeStatement(con, "create sequence pgbench_accounts_load_seq");
 	}
-	executeStatement(con, "commit");
 
 	/*
 	 * create indexes
+	 *
+	 * We do this after a load by COPY, but before a load via INSERT
+	 *
+	 * This is done deliberately to ensure that no heap or index hints are
+	 * set before we start running the benchmark. This emulates the case
+	 * where data has arrived row at a time by INSERT, rather than being
+	 * bulkloaded prior to update.
 	 */
 	fprintf(stderr, "set primary key...\n");
 	for (i = 0; i < lengthof(DDLAFTERs); i++)
@@ -1765,6 +1797,8 @@ printResults(int ttype, int normal_xacts, int nclients,
 		s = "Update only pgbench_accounts";
 	else if (ttype == 1)
 		s = "SELECT only";
+	else if (ttype == 3)
+		s = "Load pgbench_accounts using INSERTs";
 	else
 		s = "Custom query";
 
@@ -1842,10 +1876,12 @@ main(int argc, char **argv)
 	int			nclients = 1;	/* default number of simulated clients */
 	int			nthreads = 1;	/* default number of threads */
 	int			is_init_mode = 0;		/* initialize mode? */
+	int			init_uses_copy = 0;		/* does initialize use copy? */
 	int			is_no_vacuum = 0;		/* no vacuum at all before testing? */
 	int			do_vacuum_accounts = 0; /* do vacuum accounts before testing? */
 	int			ttype = 0;		/* transaction type. 0: TPC-B, 1: SELECT only,
-								 * 2: skip update of branches and tellers */
+								 * 2: skip update of branches and tellers
+								 * 3: load data into accounts using INSERTs */
 	int			optindex;
 	char	   *filename = NULL;
 	bool		scale_given = false;
@@ -1910,12 +1946,18 @@ main(int argc, char **argv)
 	state = (CState *) xmalloc(sizeof(CState));
 	memset(state, 0, sizeof(CState));
 
-	while ((c = getopt_long(argc, argv, "ih:nvp:dSNc:j:Crs:t:T:U:lf:D:F:M:", long_options, &optindex)) != -1)
+	while ((c = getopt_long(argc, argv, "iIh:nvp:dSNc:j:Crs:t:T:U:lf:D:F:M:", long_options, &optindex)) != -1)
 	{
 		switch (c)
 		{
 			case 'i':
 				is_init_mode++;
+				init_uses_copy = true;
+				break;
+			case 'I':
+				is_init_mode++;
+				init_uses_copy = false;
+				ttype = 3;
 				break;
 			case 'h':
 				pghost = optarg;
@@ -2092,8 +2134,19 @@ main(int argc, char **argv)
 
 	if (is_init_mode)
 	{
-		init();
-		exit(0);
+		init(init_uses_copy);
+		if (init_uses_copy)
+			exit(0);
+		else
+		{
+			if (nxacts != 0)
+				fprintf(stderr, "transactions option ignored\n");
+			if (duration > 0)
+				fprintf(stderr, "duration option ignored\n");
+			nxacts = (naccounts * scale) / nclients;
+			if (((naccounts * scale) % nclients) != 0)
+				nxacts++;
+		}
 	}
 
 	/* Use DEFAULT_NXACTS if neither nxacts nor duration is specified. */
@@ -2169,7 +2222,7 @@ main(int argc, char **argv)
 		exit(1);
 	}
 
-	if (ttype != 3)
+	if (ttype != 4)
 	{
 		/*
 		 * get the scaling factor that should be same as count(*) from
@@ -2249,6 +2302,11 @@ main(int argc, char **argv)
 			num_files = 1;
 			break;
 
+		case 3:
+			sql_files[0] = process_builtin(insert_accounts);
+			num_files = 1;
+			break;
+
 		default:
 			break;
 	}
diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml
index 2ae3b9e..49b2904 100644
--- a/doc/src/sgml/pgbench.sgml
+++ b/doc/src/sgml/pgbench.sgml
@@ -131,7 +131,22 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
       <term><option>-i</option></term>
       <listitem>
        <para>
-        Required to invoke initialization mode.
+        Required to invoke initialization mode. Initialization uses COPY to
+        to load data by default and performs a VACUUM afterwars, unless
+        <option>-n</option> is specified.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><option>-I</option></term>
+      <listitem>
+       <para>
+        Required to invoke initialization mode using INSERTs. Each INSERT
+        is performed inside its own transaction. Multiple connections should
+        be specified to increase the speed of loading using this option.
+        One purpose of using this option is to simulate a database that
+        contains data loaded by individual transactions.
        </para>
       </listitem>
      </varlistentry>
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to