In order to simulate real-world clog contention, we need to use benchmarks that deal with real world situations.
Currently, pgbench pre-loads data using COPY and executes a VACUUM so that all hint bits are set on every row of every page of every table. Thus, as pgbench runs it sees zero clog accesses from historical data. As a result, clog access is minimised and the effects of clog contention in the real world go unnoticed. The following patch adds a pgbench option -I to load data using INSERTs, so that we can begin benchmark testing with rows that have large numbers of distinct un-hinted transaction ids. With a database pre-created using this we will be better able to simulate and thus more easily measure clog contention. Note that current clog has space for 1 million xids, so a scale factor of greater than 10 is required to really stress the clog. The patch uses multiple connections to load data using a predefined script similar to the -N or -S logic. $ pgbench --help pgbench is a benchmarking tool for PostgreSQL. Usage: pgbench [OPTIONS]... [DBNAME] Initialization options: -i invokes initialization mode using COPY -I invokes initialization mode using INSERTs ... $ pgbench -I -c 4 -t 10000 creating tables... filling accounts table with 100000 rows using inserts set primary key... NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "pgbench_branches_pkey" for table "pgbench_branches" NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "pgbench_tellers_pkey" for table "pgbench_tellers" NOTICE: ALTER TABLE / ADD PRIMARY KEY will create implicit index "pgbench_accounts_pkey" for table "pgbench_accounts" done. transactions option ignored transaction type: Load pgbench_accounts using INSERTs scaling factor: 1 query mode: simple number of clients: 4 number of threads: 1 number of transactions per client: 25000 number of transactions actually processed: 100000/100000 tps = 828.194854 (including connections establishing) tps = 828.440330 (excluding connections establishing) Yes, my laptop really is that slow. Contributions to improve that situation gratefully received. -- Simon Riggs http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c index 9081f09..7423532 100644 --- a/contrib/pgbench/pgbench.c +++ b/contrib/pgbench/pgbench.c @@ -282,6 +282,12 @@ static char *select_only = { "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n" }; +/* -I case */ +static char *insert_accounts = { + "\\set naccounts " CppAsString2(naccounts) "\n" + "INSERT INTO pgbench_accounts (aid, bid, abalance) VALUES (nextval('pgbench_accounts_load_seq'), 1 + (lastval()/(:naccounts)), 0);\n" +}; + /* Function prototypes */ static void setalarm(int seconds); static void *threadRun(void *arg); @@ -340,7 +346,8 @@ usage(const char *progname) "Usage:\n" " %s [OPTIONS]... [DBNAME]\n" "\nInitialization options:\n" - " -i invokes initialization mode\n" + " -i invokes initialization mode using COPY\n" + " -I invokes initialization mode using INSERTs\n" " -F NUM fill factor\n" " -s NUM scaling factor\n" " --index-tablespace=TABLESPACE\n" @@ -1256,7 +1263,7 @@ disconnect_all(CState *state, int length) /* create tables and setup data */ static void -init(void) +init(bool init_uses_copy) { /* * Note: TPC-B requires at least 100 bytes per row, and the "filler" @@ -1308,6 +1315,8 @@ init(void) if ((con = doConnect()) == NULL) exit(1); + fprintf(stderr, "creating tables...\n"); + for (i = 0; i < lengthof(DDLs); i++) { char opts[256]; @@ -1356,50 +1365,73 @@ init(void) executeStatement(con, "commit"); - /* - * fill the pgbench_accounts table with some data - */ - fprintf(stderr, "creating tables...\n"); + if (init_uses_copy) + { + /* + * fill the pgbench_accounts table with some data + */ + fprintf(stderr, "loading accounts table with %d rows using COPY\n", + naccounts * scale); - executeStatement(con, "begin"); - executeStatement(con, "truncate pgbench_accounts"); + executeStatement(con, "begin"); + executeStatement(con, "truncate pgbench_accounts"); - res = PQexec(con, "copy pgbench_accounts from stdin"); - if (PQresultStatus(res) != PGRES_COPY_IN) - { - fprintf(stderr, "%s", PQerrorMessage(con)); - exit(1); - } - PQclear(res); + res = PQexec(con, "copy pgbench_accounts from stdin"); + if (PQresultStatus(res) != PGRES_COPY_IN) + { + fprintf(stderr, "%s", PQerrorMessage(con)); + exit(1); + } + PQclear(res); - for (i = 0; i < naccounts * scale; i++) - { - int j = i + 1; + for (i = 0; i < naccounts * scale; i++) + { + int j = i + 1; + + snprintf(sql, 256, "%d\t%d\t%d\t\n", j, i / naccounts + 1, 0); + if (PQputline(con, sql)) + { + fprintf(stderr, "PQputline failed\n"); + exit(1); + } + + if (j % 10000 == 0) + fprintf(stderr, "%d tuples done.\n", j); + } - snprintf(sql, 256, "%d\t%d\t%d\t\n", j, i / naccounts + 1, 0); - if (PQputline(con, sql)) + if (PQputline(con, "\\.\n")) { - fprintf(stderr, "PQputline failed\n"); + fprintf(stderr, "very last PQputline failed\n"); exit(1); } + if (PQendcopy(con)) + { + fprintf(stderr, "PQendcopy failed\n"); + exit(1); + } + executeStatement(con, "commit"); - if (j % 10000 == 0) - fprintf(stderr, "%d tuples done.\n", j); } - if (PQputline(con, "\\.\n")) - { - fprintf(stderr, "very last PQputline failed\n"); - exit(1); - } - if (PQendcopy(con)) + else { - fprintf(stderr, "PQendcopy failed\n"); - exit(1); + /* + * create sequence for use during execution of INSERTS into accounts + * + * Rows are loaded using statically defined script "insert_accounts" + */ + executeStatement(con, "drop sequence if exists pgbench_accounts_load_seq"); + executeStatement(con, "create sequence pgbench_accounts_load_seq"); } - executeStatement(con, "commit"); /* * create indexes + * + * We do this after a load by COPY, but before a load via INSERT + * + * This is done deliberately to ensure that no heap or index hints are + * set before we start running the benchmark. This emulates the case + * where data has arrived row at a time by INSERT, rather than being + * bulkloaded prior to update. */ fprintf(stderr, "set primary key...\n"); for (i = 0; i < lengthof(DDLAFTERs); i++) @@ -1765,6 +1797,8 @@ printResults(int ttype, int normal_xacts, int nclients, s = "Update only pgbench_accounts"; else if (ttype == 1) s = "SELECT only"; + else if (ttype == 3) + s = "Load pgbench_accounts using INSERTs"; else s = "Custom query"; @@ -1842,10 +1876,12 @@ main(int argc, char **argv) int nclients = 1; /* default number of simulated clients */ int nthreads = 1; /* default number of threads */ int is_init_mode = 0; /* initialize mode? */ + int init_uses_copy = 0; /* does initialize use copy? */ int is_no_vacuum = 0; /* no vacuum at all before testing? */ int do_vacuum_accounts = 0; /* do vacuum accounts before testing? */ int ttype = 0; /* transaction type. 0: TPC-B, 1: SELECT only, - * 2: skip update of branches and tellers */ + * 2: skip update of branches and tellers + * 3: load data into accounts using INSERTs */ int optindex; char *filename = NULL; bool scale_given = false; @@ -1910,12 +1946,18 @@ main(int argc, char **argv) state = (CState *) xmalloc(sizeof(CState)); memset(state, 0, sizeof(CState)); - while ((c = getopt_long(argc, argv, "ih:nvp:dSNc:j:Crs:t:T:U:lf:D:F:M:", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "iIh:nvp:dSNc:j:Crs:t:T:U:lf:D:F:M:", long_options, &optindex)) != -1) { switch (c) { case 'i': is_init_mode++; + init_uses_copy = true; + break; + case 'I': + is_init_mode++; + init_uses_copy = false; + ttype = 3; break; case 'h': pghost = optarg; @@ -2092,8 +2134,19 @@ main(int argc, char **argv) if (is_init_mode) { - init(); - exit(0); + init(init_uses_copy); + if (init_uses_copy) + exit(0); + else + { + if (nxacts != 0) + fprintf(stderr, "transactions option ignored\n"); + if (duration > 0) + fprintf(stderr, "duration option ignored\n"); + nxacts = (naccounts * scale) / nclients; + if (((naccounts * scale) % nclients) != 0) + nxacts++; + } } /* Use DEFAULT_NXACTS if neither nxacts nor duration is specified. */ @@ -2169,7 +2222,7 @@ main(int argc, char **argv) exit(1); } - if (ttype != 3) + if (ttype != 4) { /* * get the scaling factor that should be same as count(*) from @@ -2249,6 +2302,11 @@ main(int argc, char **argv) num_files = 1; break; + case 3: + sql_files[0] = process_builtin(insert_accounts); + num_files = 1; + break; + default: break; } diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml index 2ae3b9e..49b2904 100644 --- a/doc/src/sgml/pgbench.sgml +++ b/doc/src/sgml/pgbench.sgml @@ -131,7 +131,22 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</> <term><option>-i</option></term> <listitem> <para> - Required to invoke initialization mode. + Required to invoke initialization mode. Initialization uses COPY to + to load data by default and performs a VACUUM afterwars, unless + <option>-n</option> is specified. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><option>-I</option></term> + <listitem> + <para> + Required to invoke initialization mode using INSERTs. Each INSERT + is performed inside its own transaction. Multiple connections should + be specified to increase the speed of loading using this option. + One purpose of using this option is to simulate a database that + contains data loaded by individual transactions. </para> </listitem> </varlistentry>
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers