Sorry, previos attached patch has small bug.
Please use latest one.
> 134 - return min + (int64) (max - min + 1) * rand;
> 134 + return min + (int64)((max - min + 1) * rand);
Regards,
--
Mitsumasa KONDO
NTT Open Source Software Center
*** a/contrib/pgbench/pgbench.c
--- b/contrib/pgbench/pgbench.c
***************
*** 176,181 **** int progress_nthreads = 0; /* number of threads for progress report */
--- 176,183 ----
bool is_connect; /* establish connection for each transaction */
bool is_latencies; /* report per-command latencies */
int main_pid; /* main process id used in log filename */
+ double stdev_threshold = 5; /* standard deviation threshold */
+ bool gaussian_option = false; /* use gaussian distribution random generator */
char *pghost = "";
char *pgport = "";
***************
*** 338,346 **** static char *select_only = {
--- 340,390 ----
"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
};
+ /* --gaussian case */
+ static char *gaussian_tpc_b = {
+ "\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ "\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ "\\setgaussian aid 1 :naccounts :stdev_threshold\n"
+ "\\setrandom bid 1 :nbranches\n"
+ "\\setrandom tid 1 :ntellers\n"
+ "\\setrandom delta -5000 5000\n"
+ "BEGIN;\n"
+ "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
+ "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ "UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;\n"
+ "UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;\n"
+ "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
+ "END;\n"
+ };
+
+ /* --gaussian with -N case */
+ static char *gaussian_simple_update = {
+ "\\set nbranches " CppAsString2(nbranches) " * :scale\n"
+ "\\set ntellers " CppAsString2(ntellers) " * :scale\n"
+ "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ "\\setgaussian aid 1 :naccounts :stdev_threshold\n"
+ "\\setrandom bid 1 :nbranches\n"
+ "\\setrandom tid 1 :ntellers\n"
+ "\\setrandom delta -5000 5000\n"
+ "BEGIN;\n"
+ "UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
+ "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ "INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);\n"
+ "END;\n"
+ };
+
+ /* --gaussian with -S case */
+ static char *gaussian_select_only = {
+ "\\set naccounts " CppAsString2(naccounts) " * :scale\n"
+ "\\setgaussian aid 1 :naccounts :stdev_threshold\n"
+ "SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
+ };
+
/* Function prototypes */
static void setalarm(int seconds);
static void *threadRun(void *arg);
+ static inline double sqrtd(const double x);
static void
usage(void)
***************
*** 381,386 **** usage(void)
--- 425,431 ----
" -v, --vacuum-all vacuum all four standard tables before tests\n"
" --aggregate-interval=NUM aggregate data over NUM seconds\n"
" --sampling-rate=NUM fraction of transactions to log (e.g. 0.01 for 1%%)\n"
+ " --gaussian=NUM gaussian distribution with NUM standard deviation threshold\n"
"\nCommon options:\n"
" -d, --debug print debugging output\n"
" -h, --host=HOSTNAME database server host or socket directory\n"
***************
*** 477,482 **** getrand(TState *thread, int64 min, int64 max)
--- 522,597 ----
return min + (int64) ((max - min + 1) * pg_erand48(thread->random_state));
}
+ /* random number generator: gaussian distribution from min to max inclusive */
+ static int64
+ getGaussianrand(TState *thread, int64 min, int64 max, double stdev_threshold)
+ {
+ double stdev;
+ double rand;
+ static double rand1;
+ static double rand2;
+ static double var_sqrt;
+ static bool reuse = false;
+
+ /*
+ * Get user specified random number(-stdev_threshold < stdev <= stdev_threshold)
+ * in this loop. This loop is executed until appeared ranged number we want.
+ * However, this loop could not almost go on, because min stdev_threshold is 2
+ * then the possibility of retry-loop is under 4 percent. And possibility of
+ * re-retry-loop is under 1.6 percent. And it doesn't happen frequentry even if
+ * we also think about the cycle of the trigonometric function.
+ */
+ do
+ {
+ /* reuse pre calculation result as possible */
+ if(!reuse)
+ {
+ /*
+ * pg_erand48 generates [0,1) random number. However rand1
+ * needs (0,1) random number because log(0) cannot calculate.
+ * And rand2 also needs (0,1) random number in strictly. But
+ * normalization cost is high and we can substitute (0,1] at
+ * rand1 and [0,1) at rand2, so we use approximate calculation.
+ */
+ rand1 = 1.0 - pg_erand48(thread->random_state);
+ rand2 = pg_erand48(thread->random_state);
+
+ /* Box-Muller transform */
+ var_sqrt = sqrtd(-2.0 * log(rand1));
+ stdev = var_sqrt * sin(2.0 * M_PI * rand2);
+ reuse = true;
+ }
+ else
+ {
+ stdev = var_sqrt * cos(2.0 * M_PI * rand2);
+ reuse = false;
+ }
+ } while (stdev < -stdev_threshold || stdev >= stdev_threshold);
+
+ /* normalization to [0,1) */
+ rand = (stdev + stdev_threshold) / (stdev_threshold * 2.0);
+
+ /* return int64 random number within between min and max */
+ return min + (int64)((max - min + 1) * rand);
+ }
+
+ /*
+ * fast sqrt algorithm: reference from Fast inverse square root algorithms.
+ */
+ static inline double
+ sqrtd(const double x)
+ {
+ double x_half = 0.5 * x;
+ long long int tmp = 0x5FE6EB50C7B537AAl - ( *(long long int*)&x >> 1);
+ double x_result = *(double*)&tmp;
+
+ x_result *= (1.5 - (x_half * x_result * x_result));
+ /* retry this calculation, it becomes higher precision at sqrt */
+ x_result *= (1.5 - (x_half * x_result * x_result));
+
+ return x_result * x;
+ }
+
/* call PQexec() and exit() on failure */
static void
executeStatement(PGconn *con, const char *sql)
***************
*** 1391,1396 **** top:
--- 1506,1601 ----
st->listen = 1;
}
+ else if (pg_strcasecmp(argv[0], "setgaussian") == 0)
+ {
+ char *var;
+ char *endptr;
+ int64 min;
+ int64 max;
+ double stdev_threshold;
+ char res[64];
+
+ if (*argv[2] == ':')
+ {
+ if((var = getVariable(st, argv[2] + 1)) == NULL)
+ {
+ fprintf(stderr, "%s: undefined variable %s\n", argv[0], argv[2]);
+ st->ecnt++;
+ return true;
+ }
+ min = strtoint64(var);
+ }
+ else
+ min = strtoint64(argv[2]);
+ #ifdef NOT_USED
+ if (min < 0)
+ {
+ fprintf(stderr, "%s: invalid minimum number %d\n", argv[0], min);
+ st->ecnt++;
+ return;
+ }
+ #endif
+ if (*argv[3] == ':')
+ {
+ if((var = getVariable(st, argv[3] + 1)) == NULL)
+ {
+ fprintf(stderr, "%s: invalid maximum number %s\n", argv[0], argv[3]);
+ st->ecnt++;
+ return true;
+ }
+ max = strtoint64(var);
+ }
+ else
+ max = strtoint64(argv[3]);
+
+ /* check if min and max are appropriate value */
+ if(max < min)
+ {
+ fprintf(stderr, "%s: maximum is less than minimum\n", argv[0]);
+ st->ecnt++;
+ return true;
+ }
+
+ /* for not overflowing when generating random number */
+ if(max - min < 0 || (max - min) + 1 < 0)
+ {
+ fprintf(stderr, "%s: range too large\n", argv[0]);
+ st->ecnt++;
+ return true;
+ }
+
+ if(*argv[4] == ':')
+ {
+ if((var = getVariable(st, argv[4] + 1)) == NULL)
+ {
+ fprintf(stderr, "%s: invalid gaussian threshold number %s\n", argv[0], argv[4]);
+ st->ecnt++;
+ return true;
+ }
+ stdev_threshold = strtod(var, NULL);
+ }
+ else
+ stdev_threshold = strtod(argv[4], &endptr);
+
+ if ( stdev_threshold < 2)
+ {
+ fprintf(stderr, "%s: gaussian threshold must be more than 2\n,", argv[4]);
+ st->ecnt++;
+ return true;
+ }
+ #ifdef DEBUG
+ printf("min: " INT64_FORMAT " max: " INT64_FORMAT " random: " INT64_FORMAT "\n", min, max, getGaussianrand(thread, min, max, stdev_threshold));
+ #endif
+ snprintf(res, sizeof(res), INT64_FORMAT, getGaussianrand(thread, min, max, stdev_threshold));
+
+ if(!putVariable(st, argv[0], argv[1], res))
+ {
+ st->ecnt++;
+ return true;
+ }
+
+ st->listen = 1;
+ }
else if (pg_strcasecmp(argv[0], "set") == 0)
{
char *var;
***************
*** 1915,1920 **** process_commands(char *buf)
--- 2120,2137 ----
fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
my_commands->argv[0], my_commands->argv[j]);
}
+ else if (pg_strcasecmp(my_commands->argv[0], "setgaussian") == 0)
+ {
+ if (my_commands->argc < 5)
+ {
+ fprintf(stderr, "%s: missing argument\n", my_commands->argv[0]);
+ exit(1);
+ }
+
+ for (j = 5; j < my_commands->argc; j++)
+ fprintf(stderr, "%s: extra argument \"%s\" ignored\n",
+ my_commands->argv[0], my_commands->argv[j]);
+ }
else if (pg_strcasecmp(my_commands->argv[0], "set") == 0)
{
if (my_commands->argc < 3)
***************
*** 2188,2203 **** printResults(int ttype, int normal_xacts, int nclients,
(INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads));
if (ttype == 0)
! s = "TPC-B (sort of)";
else if (ttype == 2)
! s = "Update only pgbench_accounts";
else if (ttype == 1)
! s = "SELECT only";
else
s = "Custom query";
printf("transaction type: %s\n", s);
printf("scaling factor: %d\n", scale);
printf("query mode: %s\n", QUERYMODE[querymode]);
printf("number of clients: %d\n", nclients);
printf("number of threads: %d\n", nthreads);
--- 2405,2447 ----
(INSTR_TIME_GET_DOUBLE(conn_total_time) / nthreads));
if (ttype == 0)
! {
! if(gaussian_option)
! s = "TPC-B (sort of)";
! else
! s = "Gaussian distributed TPC-B (sort of)";
! }
else if (ttype == 2)
! {
! if(gaussian_option)
! s = "Gaussian distributed update only pgbench_accounts";
! else
! s = "Update only pgbench_accounts";
! }
else if (ttype == 1)
! {
! if(gaussian_option)
! s = "Gaussian distributed SELECT only";
! else
! s = "SELECT only";
! }
else
s = "Custom query";
printf("transaction type: %s\n", s);
printf("scaling factor: %d\n", scale);
+
+ /* output in only gaussian distributed benchmark */
+ if(gaussian_option)
+ {
+ printf("standard deviation threshold: %.5f\n", stdev_threshold);
+ printf("access probability of top 20%%, 10%% and 5%% records: %.5f %.5f %.5f\n",
+ (double) ((erf (stdev_threshold * 0.2 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))),
+ (double) ((erf (stdev_threshold * 0.1 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0)))),
+ (double) ((erf (stdev_threshold * 0.05 / sqrt(2.0))) / (erf (stdev_threshold / sqrt(2.0))))
+ );
+ }
+
printf("query mode: %s\n", QUERYMODE[querymode]);
printf("number of clients: %d\n", nclients);
printf("number of threads: %d\n", nthreads);
***************
*** 2327,2332 **** main(int argc, char **argv)
--- 2571,2577 ----
{"unlogged-tables", no_argument, &unlogged_tables, 1},
{"sampling-rate", required_argument, NULL, 4},
{"aggregate-interval", required_argument, NULL, 5},
+ {"gaussian", required_argument, NULL, 6},
{"rate", required_argument, NULL, 'R'},
{NULL, 0, NULL, 0}
};
***************
*** 2606,2611 **** main(int argc, char **argv)
--- 2851,2865 ----
}
#endif
break;
+ case 6:
+ gaussian_option = true;
+ stdev_threshold = atof(optarg);
+ if(stdev_threshold < 2)
+ {
+ fprintf(stderr, "--gaussian=NUM must be more than 2: %f\n", stdev_threshold);
+ exit(1);
+ }
+ break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
***************
*** 2803,2808 **** main(int argc, char **argv)
--- 3057,3073 ----
}
}
+ /* set :stdev_threshold variable */
+ if(getVariable(&state[0], "stdev_threshold") == NULL)
+ {
+ snprintf(val, sizeof(val), "%lf", stdev_threshold);
+ for (i = 0; i < nclients; i++)
+ {
+ if (!putVariable(&state[i], "startup", "stdev_threshold", val))
+ exit(1);
+ }
+ }
+
if (!is_no_vacuum)
{
fprintf(stderr, "starting vacuum...");
***************
*** 2828,2844 **** main(int argc, char **argv)
switch (ttype)
{
case 0:
! sql_files[0] = process_builtin(tpc_b);
num_files = 1;
break;
case 1:
! sql_files[0] = process_builtin(select_only);
num_files = 1;
break;
case 2:
! sql_files[0] = process_builtin(simple_update);
num_files = 1;
break;
--- 3093,3118 ----
switch (ttype)
{
case 0:
! if(gaussian_option)
! sql_files[0] = process_builtin(gaussian_tpc_b);
! else
! sql_files[0] = process_builtin(tpc_b);
num_files = 1;
break;
case 1:
! if(gaussian_option)
! sql_files[0] = process_builtin(gaussian_select_only);
! else
! sql_files[0] = process_builtin(select_only);
num_files = 1;
break;
case 2:
! if(gaussian_option)
! sql_files[0] = process_builtin(simple_update);
! else
! sql_files[0] = process_builtin(gaussian_simple_update);
num_files = 1;
break;
*** a/doc/src/sgml/pgbench.sgml
--- b/doc/src/sgml/pgbench.sgml
***************
*** 320,325 **** pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
--- 320,342 ----
</varlistentry>
<varlistentry>
+ <term><option>--gaussian</option><replaceable>standard deviation</></term>
+ <listitem>
+ <para>
+ Gaussian distribution pgbench option. Need the standard deviation threshold.
+ Standard deviation threshold can control distribution of access patern that
+ is used by aid in pgbench_accounts table. If we set larger standard deviation
+ threshold, pgbench access patern limited more specific records. On the other
+ hands, if you set smaller standard deviation, pgbench access patern will be
+ more gently distribution. Standard deviation threshold must be higher than 2.
+ This rule is needed for realizing realistic calculation costs. If you add
+ '-N' or '-S' options, you can execute gaussian distribution pgbench in these
+ benchmarks.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><option>-j</option> <replaceable>threads</></term>
<term><option>--jobs=</option><replaceable>threads</></term>
<listitem>
***************
*** 770,775 **** pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
--- 787,818 ----
<varlistentry>
<term>
+ <literal>\setgaussian <replaceable>varname</> <replaceable>min</> <replaceable>max</> <replaceable>
+ standard deviation threshold</literal>
+ </term>
+
+ <listitem>
+ <para>
+ Sets variable <replaceable>varname</> to a gaussian random integer value
+ between the limits <replaceable>min</> and <replaceable>max</> inclusive.
+ Each limit can be either an integer constant or a
+ <literal>:</><replaceable>variablename</> reference to a variable
+ having an integer value. Standard deviation threshold controls
+ distribution of access patern. If we set larger value in standard
+ deviation threshold, more frequentry access patern will be more
+ limited ranges. Min standard deviation threshold is 2. This rule
+ needs for realizing realistic calculation costs.
+ </para>
+
+ <para>
+ Example:
+ <programlisting>
+ \setgaussian aid 1 :naccounts 5
+ </programlisting></para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>
<literal>\sleep <replaceable>number</> [ us | ms | s ]</literal>
</term>
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers