Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
To clarify what state this is all in: Fabien's latest pgbench-throttle-v15.patch is the ready for a committer version. The last two revisions are just tweaking the comments at this point, and his version is more correct than my last one. Got it. I will take care of this. Please find attached an updated version which solves conflicts introduced by the progress patch. -- Fabien.diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c index 23ee53c..4111e8c 100644 --- a/contrib/pgbench/pgbench.c +++ b/contrib/pgbench/pgbench.c @@ -137,6 +137,12 @@ int unlogged_tables = 0; double sample_rate = 0.0; /* + * When threads are throttled to a given rate limit, this is the target delay + * to reach that rate in usec. 0 is the default and means no throttling. + */ +int64 throttle_delay = 0; + +/* * tablespace selection */ char *tablespace = NULL; @@ -202,11 +208,13 @@ typedef struct int listen; /* 0 indicates that an async query has been * sent */ int sleeping; /* 1 indicates that the client is napping */ + boolthrottling; /* whether nap is for throttling */ int64 until; /* napping until (usec) */ Variable *variables; /* array of variable definitions */ int nvariables; instr_time txn_begin; /* used for measuring transaction latencies */ instr_time stmt_begin; /* used for measuring statement latencies */ + bool is_throttled; /* whether transaction throttling is done */ int use_file; /* index in sql_files for this client */ bool prepared[MAX_FILES]; } CState; @@ -224,6 +232,9 @@ typedef struct instr_time *exec_elapsed; /* time spent executing cmds (per Command) */ int *exec_count; /* number of cmd executions (per Command) */ unsigned short random_state[3]; /* separate randomness for each thread */ + int64 throttle_trigger; /* previous/next throttling (us) */ + int64 throttle_lag; /* total transaction lag behind throttling */ + int64 throttle_lag_max; /* max transaction lag */ } TState; #define INVALID_THREAD ((pthread_t) 0) @@ -232,6 +243,8 @@ typedef struct { instr_time conn_time; int xacts; + int64 throttle_lag; + int64 throttle_lag_max; } TResult; /* @@ -356,6 +369,7 @@ usage(void) -N, --skip-some-updates skip updates of pgbench_tellers and pgbench_branches\n -P, --progress NUM show thread progress report every NUM seconds\n -r, --report-latencies report average latency per command\n + -R, --rate SPEC target rate in transactions per second\n -s, --scale=NUM report this scale factor in output\n -S, --select-onlyperform SELECT-only transactions\n -t, --transactions number of transactions each client runs @@ -898,17 +912,62 @@ doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile, AggVa { PGresult *res; Command **commands; + booltrans_needs_throttle = false; top: commands = sql_files[st-use_file]; + /* + * Handle throttling once per transaction by sleeping. It is simpler + * to do this here rather than at the end, because so much complicated + * logic happens below when statements finish. + */ + if (throttle_delay ! st-is_throttled) + { + /* + * Use inverse transform sampling to randomly generate a delay, such + * that the series of delays will approximate a Poisson distribution + * centered on the throttle_delay time. + * + * 1000 implies a 6.9 (-log(1/1000)) to 0.0 (log 1.0) delay multiplier. + * + * If transactions are too slow or a given wait is shorter than + * a transaction, the next transaction will start right away. + */ + int64 wait = (int64) + throttle_delay * -log(getrand(thread, 1, 1000)/1000.0); + + thread-throttle_trigger += wait; + + st-until = thread-throttle_trigger; + st-sleeping = 1; + st-throttling = true; + st-is_throttled = true; + if (debug) + fprintf(stderr, client %d throttling INT64_FORMAT us\n, + st-id, wait); + } + if (st-sleeping) { /* are we sleeping? */ instr_time now; + int64 now_us; INSTR_TIME_SET_CURRENT(now); - if (st-until = INSTR_TIME_GET_MICROSEC(now)) + now_us = INSTR_TIME_GET_MICROSEC(now); + if (st-until = now_us) + { st-sleeping = 0; /* Done sleeping, go ahead with next command */ + if (st-throttling) + { +/* Measure lag of throttled transaction relative to target */ +int64 lag = now_us - st-until; +thread-throttle_lag += lag; +if (lag thread-throttle_lag_max) + thread-throttle_lag_max = lag; +st-throttling = false; + } + } else return true; /* Still sleeping, nothing to do here */ } @@ -1095,6 +1154,15 @@ top: st-state = 0; st-use_file = (int) getrand(thread, 0, num_files - 1); commands = sql_files[st-use_file]; + st-is_throttled = false; + /* + * No transaction is underway anymore, which means there is nothing + * to
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
To clarify what state this is all in: Fabien's latest pgbench-throttle-v15.patch is the ready for a committer version. The last two revisions are just tweaking the comments at this point, and his version is more correct than my last one. Got it. I will take care of this. Please find attached an updated version which solves conflicts introduced by the progress patch. Thanks, but I already solved the conflict and fixed some minor indentation issues. Now I have question regarding the function. ./pgbench -p 5433 -S -T 10 -R 1 test starting vacuum...end. transaction type: SELECT only scaling factor: 1 query mode: simple number of clients: 1 number of threads: 1 duration: 10 s number of transactions actually processed: 71339 average rate limit lag: 862.534 ms (max 2960.913 ms) tps = 7133.745911 (including connections establishing) tps = 7135.130810 (excluding connections establishing) What does average rate limit lag mean? From the manual: -R rate --rate rate Execute transactions targeting the specified rate instead of running as fast as possible (the default). The rate is given in transactions per second. If the targeted rate is above the maximum possible rate these transactions can execute at, the rate limit won't have any impact on results. The rate is targeted by starting transactions along a Poisson-distributed event time line. When a rate limit is active, the average and maximum transaction lag time (the delay between the scheduled and actual transaction start times) are reported in ms. High values indicate that the database could not handle the scheduled load at some time. So in my understanding the number shows the delay time before *each* transaction starts. If my understanding is correct, why 71339 (total transactions) * 862.534 ms = 61532 sec could exceed 10 seconds, which is the total run time? Also I noticed small bug. ./pgbench -R 0 test invalid rate limit: 0 Shouldn't this be treated as if -R is not specified? Actually in the program: /* * When threads are throttled to a given rate limit, this is the target delay * to reach that rate in usec. 0 is the default and means no throttling. */ int64 throttle_delay = 0; So it seems treating -R 0 means no throttling makes more sense to me. -- Tatsuo Ishii SRA OSS, Inc. Japan English: http://www.sraoss.co.jp/index_en.php Japanese: http://www.sraoss.co.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Adding optionally commit number in PG_VERSION_STR
On Wed, July 17, 2013 05:27, Tom Lane wrote: Michael Paquier michael.paqu...@gmail.com writes: It happens that I work occasionally on multiple builds based on FWIW, I've been doing this for while: where $project is mostly the patchname, $commit_hash is extracted like this: commit_hash=$( cd /home/aardvark/pg_stuff/git/master; git log | head -n 1 | cut --delimiter= -f 2 ); version_string=${project}-${db_timestamp}-${commit_hash}; perl -i.original -ne s,(PACKAGE_.*[[:digit:]]+\.[[:digit:]]+(?:\.[[:digit:]]+)?(?:devel)?(?:(?:alpha|beta|rc)[[:digit:]]+)?),\\1-${version_string},; print; configure Which will then give: PostgreSQL 9.4devel-HEAD-20130717_0828-ffcf654547ef38555203e6d716f47b7065a0a87d on x86_64-unknown-linux-gnu, compiled by gcc (GCC) 4.8.1, 64-bit Btw, in an even more ugly hack I also stick some of that same config data in information_schema.sql_packages, with a feature_id = 100: select * from information_schema.sql_packages where cast(substring(feature_id from E'^PKG([[:digit:]]+)') as integer) = 100 feature_id |feature_name| is_supported | is_verified_by | comments ++--++-- PKG100 | project name | YES | ej | HEAD PKG101 | patched| NO | ej | NO PKG103 | build time | YES | ej | 2013-07-17 08:32:03.400521+02 PKG104 | server_version | YES | ej | 9.4devel-HEAD-20130717_0828-ffcf654547ef38555203e6d716f47b7065a0a87d PKG105 | server_version_num | YES | ej | 90400 PKG106 | port | YES | ej | 6544 PKG110 | commit hash| YES | ej | ffcf654547ef38555203e6d716f47b7065a0a87d PKG111 | catversion | YES | ej | 201307161 PKG112 | control version| YES | ej | 937 (9 rows) Needless to say, YMMV ... Thanks, Erik Rijkers -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Cube extension point support // GSoC'13
On Fri, Jul 12, 2013 at 3:57 PM, Stas Kelvich stas.kelv...@gmail.comwrote: Hello. here is a patch adding to cube extension support for compressed representation of point cubes. If cube is a point, i.e. has coincident lower left and upper right corners, than only one corner is stored. First bit of the cube header indicates whether the cube is point or not. Few moments: * Patch preserves binary compatibility with old indices New representation of points will work in both index and heap. So, we should speak about just compatibility with old cubes. * All functions that create cubes from user input, check whether it is a point or not * All internal functions that can return cubes takes care of all cases where a cube might become a point * Added tests for checking correct point behavior Also this patch includes adapted Alexander Korotkov's patch with kNN-based ordering operator, which he wrote for postgresql-9.0beta1 with knngist patch. More info there http://www.postgresql.org/message-id/aanlktimhfaq6hcibrnk0tlcqmiyhywhwaq2zd87wb...@mail.gmail.com I think ordering operator should be extracted into separated patch together with another ordering operators of your project. Patch contains some formatting issues. For example, this comment /* Point can arise in two cases: 1) When argument is point and r == 0 2) When all coordinates was set to their averages */ should contain star sign on the beginning of each line. Also it will be reflowed by pgindent. Correct formatting for this comment should look like this: /*-- * Point can arise in two cases: * 1) When argument is point and r == 0 * 2) When all coordinates was set to their averages */ See coding convention for details: http://www.postgresql.org/docs/current/static/source-format.html -- With best regards, Alexander Korotkov.
Re: [HACKERS] Adding optionally commit number in PG_VERSION_STR
On Wed, Jul 17, 2013 at 2:55 AM, Michael Paquier michael.paqu...@gmail.com wrote: Hi all, It happens that I work occasionally on multiple builds based on different stable branches at the same time to check fixes that need to be backpatched, and I tend to easily lose track on which version the build I created is based on (Duh!). There is of course the version number up to the 3rd digit available (for example 9.2.4, 9.3beta2, etc.), but as a developer I think that it would be helpful to include the commit ID in PG_VERSION_STR to get a better reference on exactly what the development build is based on. This could be controlled by an additional flag in ./configure.in called something like --enable-version-commit, of course disabled by default. If enabled, PG_VERSION_STR would be generated with the new information. configure would also return an error when this flag is enabled if git is either not found, or if the repository where configure is not a native git repository. FYI, we include the output from git describe --always in the pgAdmin version meta info, which is displayed on the About box along with the regular version info. That has proven to be extremely useful in the past, particularly during QA where people may be testing snapshot builds. -- Dave Page Blog: http://pgsnake.blogspot.com Twitter: @pgsnake EnterpriseDB UK: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
Hello Tatsuo, Now I have question regarding the function. ./pgbench -p 5433 -S -T 10 -R 1 test tps = 7133.745911 (including connections establishing) What does average rate limit lag mean? From the manual: [...] So in my understanding the number shows the delay time before *each* transaction starts. ... with respect to the schedule time assigned by the rate-limiting stochastic process. This is to detect that rate limiting does not work properly. If my understanding is correct, why 71339 (total transactions) * 862.534 ms = 61532 sec could exceed 10 seconds, which is the total run time? It is possible, because each transaction is far behind schedule, and you cumulate the lateness. Say you have transactions schedules every 0.1 second, but they take 2 second to complete: 1. scheduled at 0.0, start at 0.0 2. scheduled at 0.1, start at 2.0, 1.9 second lag 3. scheduled at 0.2, start at 4.0, 3.8 second lag, cumulative lag 5.7 s 4. scheduled at 0.3, start at 6.0, 5.7 second lag, cumulative lag 11.4 s 5. scheduled at 0.4, start at 8.0, 7.6 second lag, cumulative lag 19.0 s 6. scheduled at 0.5, never starts If we stop at 10.0 seconds, 5 transaction have been processed, the average lag is about 3.8 seconds, the cumulative lag is 19.0 seconds. The lag of a given transaction can cover lag from previous ones. Basically, if the lag is anything but small, it means that the database cannot handle the load and that something is amiss. In your example you required 1 tps, but the database can only handle 7000 tps. Note that the database could catchup at some point, say it usually can handle more that 1 tps, but while the database dump is running it falls far behing schedule, and then one the dump is done it goes back to nominal and late transactions are finally processed. The max lag would show that something was amiss during the bench, even if the average lag is quite low. Also I noticed small bug. ./pgbench -R 0 test invalid rate limit: 0 Shouldn't this be treated as if -R is not specified? Actually in the program: /* * When threads are throttled to a given rate limit, this is the target delay * to reach that rate in usec. 0 is the default and means no throttling. */ int64 throttle_delay = 0; So it seems treating -R 0 means no throttling makes more sense to me. Note that the rate is expressed in tps which make sense to users, but the internal variable is in usec which is more useful for scheduling, and is the inverse of the other. So -R 0 would mean zero tps, that is an infinite delay, but a 0 delay would require an infinite tps. As requiring 0 tps does not make sense, I decided to disable that. If you really fill that -R 0 should mean disable the feature, I'm fine with that, but this is not exactly logical wrt tps. -- Fabien. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
Hello Tatsuo, Now I have question regarding the function. ./pgbench -p 5433 -S -T 10 -R 1 test tps = 7133.745911 (including connections establishing) What does average rate limit lag mean? From the manual: [...] So in my understanding the number shows the delay time before *each* transaction starts. ... with respect to the schedule time assigned by the rate-limiting stochastic process. This is to detect that rate limiting does not work properly. If my understanding is correct, why 71339 (total transactions) * 862.534 ms = 61532 sec could exceed 10 seconds, which is the total run time? It is possible, because each transaction is far behind schedule, and you cumulate the lateness. Say you have transactions schedules every 0.1 second, but they take 2 second to complete: 1. scheduled at 0.0, start at 0.0 2. scheduled at 0.1, start at 2.0, 1.9 second lag 3. scheduled at 0.2, start at 4.0, 3.8 second lag, cumulative lag 5.7 s 4. scheduled at 0.3, start at 6.0, 5.7 second lag, cumulative lag 11.4 s 5. scheduled at 0.4, start at 8.0, 7.6 second lag, cumulative lag 19.0 s 6. scheduled at 0.5, never starts If we stop at 10.0 seconds, 5 transaction have been processed, the average lag is about 3.8 seconds, the cumulative lag is 19.0 seconds. The lag of a given transaction can cover lag from previous ones. Basically, if the lag is anything but small, it means that the database cannot handle the load and that something is amiss. In your example you required 1 tps, but the database can only handle 7000 tps. Note that the database could catchup at some point, say it usually can handle more that 1 tps, but while the database dump is running it falls far behing schedule, and then one the dump is done it goes back to nominal and late transactions are finally processed. The max lag would show that something was amiss during the bench, even if the average lag is quite low. Thanks for detailed explainations. I now understand the function. Also I noticed small bug. ./pgbench -R 0 test invalid rate limit: 0 Shouldn't this be treated as if -R is not specified? Actually in the program: /* * When threads are throttled to a given rate limit, this is the target * delay * to reach that rate in usec. 0 is the default and means no throttling. */ int64throttle_delay = 0; So it seems treating -R 0 means no throttling makes more sense to me. Note that the rate is expressed in tps which make sense to users, but the internal variable is in usec which is more useful for scheduling, and is the inverse of the other. So -R 0 would mean zero tps, that is an infinite delay, but a 0 delay would require an infinite tps. As requiring 0 tps does not make sense, I decided to disable that. If you really fill that -R 0 should mean disable the feature, I'm fine with that, but this is not exactly logical wrt tps. Ok, your statement seems to be fair. Unless someone complains the point, I leave it as it is. I'm going to test your patches on Mac OS X and Windows. -- Tatsuo Ishii SRA OSS, Inc. Japan English: http://www.sraoss.co.jp/index_en.php Japanese: http://www.sraoss.co.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Patch to add regression tests for SCHEMA
I've looked this version. The only reservation I have is that when changing the owner of a schema, the new owner is not always checked. I would suggest to query the new owner to check that it matches (5, 11, 12), just as you do in 3. Also, reowning is tested several times (5, 11, 12). I would suggest to remove 12 which does not bring much new things after both 5 and 11 get passed ? Otherwise the patch applies (with a minor warning about spaces on line 33) passes for me, and brings valuable new test coverage. -- Fabien. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
On 7/17/13 2:31 AM, Tatsuo Ishii wrote: ./pgbench -p 5433 -S -T 10 -R 1 test average rate limit lag: 862.534 ms (max 2960.913 ms) tps = 7133.745911 (including connections establishing) tps = 7135.130810 (excluding connections establishing) What does average rate limit lag mean? The whole concept of lag with the rate limit is complicated. At one point I thought this should be a debugging detail, rather than exposing the user to it. The problem is that if you do that, you might not notice that your limit failed to work as expected. Maybe it's good enough in a case like this that the user will see they tried to limit at 1, but they only got 7135, so something must not have worked as expected. Tatsuo: most of my tests were on Mac OS and Linux, I actually tested the Mac version a lot more than any other here. I didn't do any testing on Windows. -- Greg Smith 2ndQuadrant USg...@2ndquadrant.com Baltimore, MD PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
Thanks for detailed explainations. I now understand the function. Good. I've looked into the documentation. I'm not sure how I could improve it significantly without adding a lot of text which would also add a lot of confusion to the casual reader. I'm going to test your patches on Mac OS X and Windows. Great! I cannot do that. -- Fabien. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
The whole concept of lag with the rate limit is complicated. I must agree on that point, their interpretation is subtle. At one point I thought this should be a debugging detail, rather than exposing the user to it. The problem is that if you do that, you might not notice that your limit failed to work as expected. Maybe it's good enough in a case like this that the user will see they tried to limit at 1, but they only got 7135, so something must not have worked as expected. Yep. As I suggested in answering to Tatsuo, the process can catch up later, so you could have 1 in the end even with something amiss. -- Fabien. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
The whole concept of lag with the rate limit is complicated. I must agree on that point, their interpretation is subtle. At one point I thought this should be a debugging detail, rather than exposing the user to it. The problem is that if you do that, you might not notice that your limit failed to work as expected. Maybe it's good enough in a case like this that the user will see they tried to limit at 1, but they only got 7135, so something must not have worked as expected. Yep. As I suggested in answering to Tatsuo, the process can catch up later, so you could have 1 in the end even with something amiss. Fabian, I did another case. First, I run pgbench without -R. $ ./pgbench -p 5433 -S -n -c 10 -T 300 test ./pgbench -p 5433 -S -n -c 10 -T 300 test transaction type: SELECT only scaling factor: 1 query mode: simple number of clients: 10 number of threads: 1 duration: 300 s number of transactions actually processed: 2945652 tps = 9818.741060 (including connections establishing) tps = 9819.389689 (excluding connections establishing) So I thought I could squeeze 1 TPS from my box. Then I tried with -R 5000 tps. $ ./pgbench -p 5433 -S -n -c 10 -T 300 -R 5000 test ./pgbench -p 5433 -S -n -c 10 -T 300 -R 5000 test transaction type: SELECT only scaling factor: 1 query mode: simple number of clients: 10 number of threads: 1 duration: 300 s number of transactions actually processed: 1510640 average rate limit lag: 0.304 ms (max 19.101 ms) tps = 5035.409397 (including connections establishing) tps = 5035.731093 (excluding connections establishing) As you can see, I got about 5000 tps as expected. But I'm confused by the lag: 0.304 ms * 1510640 = 459.2 seconds, which is longer than 300 seconds (specified by -T). Am I missing something? -- Tatsuo Ishii SRA OSS, Inc. Japan English: http://www.sraoss.co.jp/index_en.php Japanese: http://www.sraoss.co.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
The whole concept of lag with the rate limit is complicated. I must agree on that point, their interpretation is subtle. At one point I thought this should be a debugging detail, rather than exposing the user to it. The problem is that if you do that, you might not notice that your limit failed to work as expected. Maybe it's good enough in a case like this that the user will see they tried to limit at 1, but they only got 7135, so something must not have worked as expected. Yep. As I suggested in answering to Tatsuo, the process can catch up later, so you could have 1 in the end even with something amiss. Fabian, I did another case. First, I run pgbench without -R. $ ./pgbench -p 5433 -S -n -c 10 -T 300 test ./pgbench -p 5433 -S -n -c 10 -T 300 test transaction type: SELECT only scaling factor: 1 query mode: simple number of clients: 10 number of threads: 1 duration: 300 s number of transactions actually processed: 2945652 tps = 9818.741060 (including connections establishing) tps = 9819.389689 (excluding connections establishing) So I thought I could squeeze 1 TPS from my box. Then I tried with -R 5000 tps. $ ./pgbench -p 5433 -S -n -c 10 -T 300 -R 5000 test ./pgbench -p 5433 -S -n -c 10 -T 300 -R 5000 test transaction type: SELECT only scaling factor: 1 query mode: simple number of clients: 10 number of threads: 1 duration: 300 s number of transactions actually processed: 1510640 average rate limit lag: 0.304 ms (max 19.101 ms) tps = 5035.409397 (including connections establishing) tps = 5035.731093 (excluding connections establishing) As you can see, I got about 5000 tps as expected. But I'm confused by the lag: 0.304 ms * 1510640 = 459.2 seconds, which is longer than 300 seconds (specified by -T). Am I missing something? BTW, the system was Linux (kernel 3.0.77). Now I tried on Mac OS X. $ pgbench -S -n -c 10 -T 10 test transaction type: SELECT only scaling factor: 1 query mode: simple number of clients: 10 number of threads: 1 duration: 10 s number of transactions actually processed: 67333 tps = 6730.940132 (including connections establishing) tps = 6751.078966 (excluding connections establishing) $ pgbench -S -n -c 10 -T 10 -R 3000 test transaction type: SELECT only scaling factor: 1 query mode: simple number of clients: 10 number of threads: 1 duration: 10 s number of transactions actually processed: 29840 average rate limit lag: 0.089 ms (max 27.301 ms) tps = 2983.707895 (including connections establishing) tps = 2991.919611 (excluding connections establishing) 0.089 ms * 29840 = 2.66 seconds. Not too bad compared with 10 seconds. On Linux maybe the overhead to calculate the lag is bigger than Mac OS X? Just my wild guess though... -- Tatsuo Ishii SRA OSS, Inc. Japan English: http://www.sraoss.co.jp/index_en.php Japanese: http://www.sraoss.co.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] XLogInsert scaling, revisited
On Monday, July 08, 2013 2:47 PM Heikki Linnakangas wrote: Ok, I've committed this patch now. Finally, phew! Few doubts while reading the code: 1. Why in function WALInsertSlotAcquireOne(int slotno), it does START_CRIT_SECTION() to Lock out cancel/die interrupts, whereas other places call HOLD_INTERRUPTS() 2. In function GetXLogBuffer(), why the logic to wakeup waiters is different when expectedEndPtr != endptr; When the wakeupwaiters is done in case expectedEndPtr == endptr? 3. static bool ReserveXLogSwitch(..) In above function header, why EndPos_p/StartPos_p is used when function arguments are EndPos/StartPos? With Regards, Amit Kapila. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Improvement of checkpoint IO scheduler for stable transaction responses
On 7/16/13 11:36 PM, Ants Aasma wrote: As you know running a full suite of write benchmarks takes a very long time, with results often being inconclusive (noise is greater than effect we are trying to measure). I didn't say that. What I said is that over a full suite of write benchmarks, the effect of changes like this has always averaged out to zero. You should try it sometime. Then we can have a useful discussion of non-trivial results instead of you continuing to tell me I don't understand things. -- Greg Smith 2ndQuadrant USg...@2ndquadrant.com Baltimore, MD PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Improvement of checkpoint IO scheduler for stable transaction responses
On Tuesday, July 16, 2013 10:16 PM Ants Aasma wrote: On Jul 14, 2013 9:46 PM, Greg Smith g...@2ndquadrant.com wrote: I updated and re-reviewed that in 2011: http://www.postgresql.org/message-id/4d31ae64.3000...@2ndquadrant.com and commented on why I think the improvement was difficult to reproduce back then. The improvement didn't follow for me either. It would take a really amazing bit of data to get me to believe write sorting code is worthwhile after that. On large systems capable of dirtying enough blocks to cause a problem, the operating system and RAID controllers are already sorting block. And *that* sorting is also considering concurrent read requests, which are a lot more important to an efficient schedule than anything the checkpoint process knows about. The database doesn't have nearly enough information yet to compete against OS level sorting. That reasoning makes no sense. OS level sorting can only see the writes in the time window between PostgreSQL write, and being forced to disk. Spread checkpoints sprinkles the writes out over a long period and the general tuning advice is to heavily bound the amount of memory the OS willing to keep dirty. This makes probability of scheduling adjacent writes together quite low, the merging window being limited either by dirty_bytes or dirty_expire_centisecs. The checkpointer has the best long term overview of the situation here, OS scheduling only has the short term view of outstanding read and write requests. By sorting checkpoint writes it is much more likely that adjacent blocks are visible to OS writeback at the same time and will be issued together. I think Oracle also use similar concept for making writes efficient, and they have patent also for this technology which you can find at below link: http://www.google.com/patents/US7194589?dq=645987hl=ensa=Xei=kn7mUZ-PIsWq rAe99oDgBwsqi=2pjf=1ved=0CEcQ6AEwAw Although Oracle has different concept for performing checkpoint writes, but I thought of sharing the above link with you, so that unknowingly we should not go into wrong path. AFAIK instead of depending on OS buffers, they use direct I/O and infact in the patent above they are using temporary buffer (Claim 3) to sort the writes which is not the same idea as far as I can understand by reading above thread. With Regards, Amit Kapila. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
tps = 9818.741060 (including connections establishing) So I thought I could squeeze 1 TPS from my box. Then I tried with -R 5000 tps. number of transactions actually processed: 1510640 average rate limit lag: 0.304 ms (max 19.101 ms) tps = 5035.409397 (including connections establishing) As you can see, I got about 5000 tps as expected. Yep, it works:-) But I'm confused by the lag: 0.304 ms * 1510640 = 459.2 seconds, which is longer than 300 seconds (specified by -T). Am I missing something? The lag is reasonnable, althought no too good. One transaction is about 1.2 ms, the lag is much smaller than that, and you are at about 50% of the maximum load. I've got similar figures on my box for such settings. It improves if your reduce the number of clients. If you reduce the number of clients, or add more threads, the lag is reduced. BTW, the system was Linux (kernel 3.0.77). tps = 6730.940132 (including connections establishing) $ pgbench -S -n -c 10 -T 10 -R 3000 test average rate limit lag: 0.089 ms (max 27.301 ms) tps = 2983.707895 (including connections establishing) 0.089 ms * 29840 = 2.66 seconds. Not too bad compared with 10 seconds. Indeed, that is better. Transactions are about 1.5 ms and you run at about 45% of the maximum load here. On Linux maybe the overhead to calculate the lag is bigger than Mac OS X? Just my wild guess though... I would be surprised that this would be the issue is to compute the measure, compared to network connections and the like. With -S the bench is cpu bound. Possibly a better scheduler/thread management on OSX? Or more available cores? Well, I do not know! At high load with clients running on the same box as the server, and with more clients server than available cores, there is a lot of competition between processes, and between clients that share a unique thread, and a log context switching whoch will result in a measured lag. -- Fabien. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Adding optionally commit number in PG_VERSION_STR
On Wed, Jul 17, 2013 at 5:20 PM, Dave Page dp...@pgadmin.org wrote: On Wed, Jul 17, 2013 at 2:55 AM, Michael Paquier michael.paqu...@gmail.com wrote: Hi all, It happens that I work occasionally on multiple builds based on different stable branches at the same time to check fixes that need to be backpatched, and I tend to easily lose track on which version the build I created is based on (Duh!). There is of course the version number up to the 3rd digit available (for example 9.2.4, 9.3beta2, etc.), but as a developer I think that it would be helpful to include the commit ID in PG_VERSION_STR to get a better reference on exactly what the development build is based on. This could be controlled by an additional flag in ./configure.in called something like --enable-version-commit, of course disabled by default. If enabled, PG_VERSION_STR would be generated with the new information. configure would also return an error when this flag is enabled if git is either not found, or if the repository where configure is not a native git repository. FYI, we include the output from git describe --always in the pgAdmin version meta info, which is displayed on the About box along with the regular version info. That has proven to be extremely useful in the past, particularly during QA where people may be testing snapshot builds. Yes, that's also something tracked for the QA/QE tests at VMware. Having such an option in core would be a good thing for many people IMHO instead of using some manual scripting. -- Michael -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] review: Non-recursive processing of AND/OR lists
On Tue, Jul 16, 2013 at 4:04 PM, Pavel Stehule pavel.steh...@gmail.comwrote: I did a some performance tests of v5 and v6 version and there v5 is little bit faster than v6, and v6 has significantly higher stddev Thanks Pavel. The difference in average seems negligible, but stddev is interesting because v6 does less work than v5 in common cases and in the test that I had shared. The current commitfest (2013-06) is marked as 'In Progress', so is it okay to just mark the patch as 'Ready for Committer' or should I move it to the next commitfest (2013-09). What's the procedure of moving a patch to the next commitfest? Do I make a fresh submission there with a link to current submission, or is the move doable somehow in the application itself. Best regards, -- Gurjeet Singh http://gurjeet.singh.im/ EnterpriseDB Inc.
Re: [HACKERS] review: Non-recursive processing of AND/OR lists
On Wed, Jul 17, 2013 at 8:21 AM, Gurjeet Singh gurj...@singh.im wrote: What's the procedure of moving a patch to the next commitfest? Never mind, I see an email from Josh B. regarding this on my corporate account. Best regards, -- Gurjeet Singh http://gurjeet.singh.im/ EnterpriseDB Inc.
Re: [HACKERS] XLogInsert scaling, revisited
On 2013-07-17 15:46:00 +0530, Amit Kapila wrote: On Monday, July 08, 2013 2:47 PM Heikki Linnakangas wrote: Ok, I've committed this patch now. Finally, phew! Few doubts while reading the code: 1. Why in function WALInsertSlotAcquireOne(int slotno), it does START_CRIT_SECTION() to Lock out cancel/die interrupts, whereas other places call HOLD_INTERRUPTS() A crit section does more than just stopping interrupts. They also ensure that errors that occur while inside one get converted to a PANIC. That seems apt for SlotAcquire/Release. Although the comments could possibly improved a bit. 2. In function GetXLogBuffer(), why the logic to wakeup waiters is different when expectedEndPtr != endptr; When the wakeupwaiters is done in case expectedEndPtr == endptr? I am not sure what you're asking here. We wakeup waiters if expectedEndPtr != endptr because that means the wal buffer page the 'ptr' fits on currently has different content. Which in turn means we've finished with the last page and progressed to a new one. So we wake up everyone waiting for us. WakeupWaiters() doesn't get passed expectedEndPtr but expectedEndPtr - XLOG_BLCKSZ (up to there we are guaranteed to have inserted successfully). And we're comparing with the xlogInsertingAt value which basically measures up to where we've successfully inserted. 3. static bool ReserveXLogSwitch(..) In above function header, why EndPos_p/StartPos_p is used when function arguments are EndPos/StartPos? I guess that's bitrot... Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Improvement of checkpoint IO scheduler for stable transaction responses
On Wed, Jul 17, 2013 at 1:54 PM, Greg Smith g...@2ndquadrant.com wrote: On 7/16/13 11:36 PM, Ants Aasma wrote: As you know running a full suite of write benchmarks takes a very long time, with results often being inconclusive (noise is greater than effect we are trying to measure). I didn't say that. What I said is that over a full suite of write benchmarks, the effect of changes like this has always averaged out to zero. You should try it sometime. Then we can have a useful discussion of non-trivial results instead of you continuing to tell me I don't understand things. The fact that other changes have been tradeoffs doesn't change the point that there is no tradeoff here. I see no way in which writing blocks to the OS in a logical order is worse than writing them out in arbitrary order. This is why I considered blindly running write benchmarks a waste of time at this point - if the worst case is zero and there are cases where it helps then it can't average out to zero. It would be better to identify the worst case and design a test for that. However I started the full gamut of scale factors and client count tests just do quiet any fears of unexpected regressions. 4 scales, 6 client loads, 3 tests, 20min per test, 2 versions, the results will be done in 48h. Regards, Ants Aasma -- Cybertec Schönig Schönig GmbH Gröhrmühlgasse 26 A-2700 Wiener Neustadt Web: http://www.postgresql-support.de -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Improvement of checkpoint IO scheduler for stable transaction responses
On Wed, Jul 17, 2013 at 2:54 PM, Amit Kapila amit.kap...@huawei.com wrote: I think Oracle also use similar concept for making writes efficient, and they have patent also for this technology which you can find at below link: http://www.google.com/patents/US7194589?dq=645987hl=ensa=Xei=kn7mUZ-PIsWq rAe99oDgBwsqi=2pjf=1ved=0CEcQ6AEwAw Although Oracle has different concept for performing checkpoint writes, but I thought of sharing the above link with you, so that unknowingly we should not go into wrong path. AFAIK instead of depending on OS buffers, they use direct I/O and infact in the patent above they are using temporary buffer (Claim 3) to sort the writes which is not the same idea as far as I can understand by reading above thread. They are not even sorting anything, the patent is for opportunistically looking for adjacent dirty blocks when writing out a dirty buffer to disk. While a useful technique, this has nothing to do with sorting checkpoints. It's also a good example why the patent system is stupid. It's an obvious idea that probably has loads of prior art. I'm no patent lawyer, but the patent also looks like it would be easy to bypass by doing the equivalent thing in a slightly different way. Regards, Ants Aasma -- Cybertec Schönig Schönig GmbH Gröhrmühlgasse 26 A-2700 Wiener Neustadt Web: http://www.postgresql-support.de -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
tps = 9818.741060 (including connections establishing) So I thought I could squeeze 1 TPS from my box. Then I tried with -R 5000 tps. number of transactions actually processed: 1510640 average rate limit lag: 0.304 ms (max 19.101 ms) tps = 5035.409397 (including connections establishing) As you can see, I got about 5000 tps as expected. Yep, it works:-) But I'm confused by the lag: 0.304 ms * 1510640 = 459.2 seconds, which is longer than 300 seconds (specified by -T). Am I missing something? The lag is reasonnable, althought no too good. One transaction is about 1.2 ms, the lag is much smaller than that, and you are at about 50% of the maximum load. I've got similar figures on my box for such settings. It improves if your reduce the number of clients. No, 5000 TPS = 1/5000 = 0.2 ms per transaction, no? However pgbench says average lag is 0.304 ms. So the lag is longer than transaction itself. If you reduce the number of clients, or add more threads, the lag is reduced. BTW, the system was Linux (kernel 3.0.77). tps = 6730.940132 (including connections establishing) $ pgbench -S -n -c 10 -T 10 -R 3000 test average rate limit lag: 0.089 ms (max 27.301 ms) tps = 2983.707895 (including connections establishing) 0.089 ms * 29840 = 2.66 seconds. Not too bad compared with 10 seconds. Indeed, that is better. Transactions are about 1.5 ms and you run at about 45% of the maximum load here. On Linux maybe the overhead to calculate the lag is bigger than Mac OS X? Just my wild guess though... I would be surprised that this would be the issue is to compute the measure, compared to network connections and the like. With -S the bench is cpu bound. Possibly a better scheduler/thread management on OSX? Or more available cores? The number of cores is same. I don't understand why number of cores is relatedx, though. Anyway, as you can see in Mac OS X's case, TPS itself is no better than the Linux box. Well, I do not know! At high load with clients running on the same box as the server, and with more clients server than available cores, there is a lot of competition between processes, and between clients that share a unique thread, and a log context switching whoch will result in a measured lag. Hmm... I would like to have cleaner explanation/evidence before committing the patch. -- Tatsuo Ishii SRA OSS, Inc. Japan English: http://www.sraoss.co.jp/index_en.php Japanese: http://www.sraoss.co.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [COMMITTERS] pgsql: Add support for REFRESH MATERIALIZED VIEW CONCURRENTLY.
Kevin Grittner kgri...@postgresql.org writes: Add support for REFRESH MATERIALIZED VIEW CONCURRENTLY. The buildfarm members that use -DCLOBBER_CACHE_ALWAYS say this patch is broken. regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] review: Non-recursive processing of AND/OR lists
On 07/17/2013 05:21 AM, Gurjeet Singh wrote: On Tue, Jul 16, 2013 at 4:04 PM, Pavel Stehule pavel.steh...@gmail.comwrote: I did a some performance tests of v5 and v6 version and there v5 is little bit faster than v6, and v6 has significantly higher stddev Thanks Pavel. The difference in average seems negligible, but stddev is interesting because v6 does less work than v5 in common cases and in the test that I had shared. The current commitfest (2013-06) is marked as 'In Progress', so is it okay to just mark the patch as 'Ready for Committer' or should I move it to the next commitfest (2013-09). If this is actually ready for committer, I'll mark it as such. -- Josh Berkus PostgreSQL Experts Inc. http://pgexperts.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] pg_filedump 9.3: checksums (and a few other fixes)
Tom Lane escribió: Alvaro Herrera alvhe...@2ndquadrant.com writes: Well, Tom opined in http://www.postgresql.org/message-id/23249.1370878...@sss.pgh.pa.us that the current patch is okay. I have a mild opinion that it should instead print only SHR_LOCK when both bits are set, and one of the others when only one of them is set. But I don't have a strong opinion about this, and since Tom disagrees with me, feel free to exercise your own (Jeff's) judgement. FWIW, I think that's exactly what I did in the preliminary 9.3 patch that I committed to pg_filedump a few weeks ago. Could you take a look at what's there now and see if that's what you meant? Here's sample output (-i) from the new code, i.e. this commit: revision 1.7 date: 2013/06/06 18:33:17; author: tgl; state: Exp; lines: +14 -10 Preliminary updates for Postgres 9.3. Data -- Item 1 -- Length: 28 Offset: 8160 (0x1fe0) Flags: NORMAL XMIN: 692 XMAX: 693 CID|XVAC: 0 Block Id: 0 linp Index: 1 Attributes: 1 Size: 24 infomask: 0x0190 (XMAX_KEYSHR_LOCK|XMAX_LOCK_ONLY|XMIN_COMMITTED) Item 2 -- Length: 28 Offset: 8128 (0x1fc0) Flags: NORMAL XMIN: 692 XMAX: 694 CID|XVAC: 0 Block Id: 0 linp Index: 2 Attributes: 1 Size: 24 infomask: 0x01d0 (XMAX_KEYSHR_LOCK|XMAX_EXCL_LOCK|XMAX_LOCK_ONLY|XMIN_COMMITTED) Item 3 -- Length: 28 Offset: 8096 (0x1fa0) Flags: NORMAL XMIN: 692 XMAX: 695 CID|XVAC: 0 Block Id: 0 linp Index: 3 Attributes: 1 Size: 24 infomask: 0x01c0 (XMAX_EXCL_LOCK|XMAX_LOCK_ONLY|XMIN_COMMITTED) Item 4 -- Length: 28 Offset: 8064 (0x1f80) Flags: NORMAL XMIN: 696 XMAX: 697 CID|XVAC: 0 Block Id: 0 linp Index: 4 Attributes: 1 Size: 24 infomask: 0x01c0 (XMAX_EXCL_LOCK|XMAX_LOCK_ONLY|XMIN_COMMITTED|KEYS_UPDATED) Item 1 has SELECT FOR KEY SHARE Item 2 has SELECT FOR SHARE Item 3 has SELECT FOR NO KEY UPDATE Item 4 has SELECT FOR UPDATE The one I was talking about is the second case, which prints KEYSHR_LOCK|EXCL_LOCK to mean that there's a FOR SHARE lock. I have no problem reading it this way, but I fear that someone unfamiliar with these bits might be confused. On the other hand, trying to be nice and interpret these bits (i.e. translate presence of both into something like SHR_LOCK) might also be confusing, because that bit doesn't really exist. And one already needs to be careful while interpreting what do KEYS_UPDATED and XMAX_LOCK_ONLY, or lack thereof, mean. Perhaps it would be sensible to provide one more output line per tuple, with interpretation of the flags, so it would tell you whether the tuple has been locked or updated, and what kind of each it is. I'd propose something like status: locked (FOR {KEY SHARE,SHARE,NO KEY UPDATE,UPDATE}) [MultiXact: nnn] status: [HOT] updated (KEYS UPDATED/KEYS NOT UPDATED) [MultiXact: nnn] To: blk/off status: deleted [MultiXact: nnn] -- Álvaro Herrerahttp://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] pg_filedump 9.3: checksums (and a few other fixes)
Alvaro Herrera alvhe...@2ndquadrant.com writes: The one I was talking about is the second case, which prints KEYSHR_LOCK|EXCL_LOCK to mean that there's a FOR SHARE lock. I have no problem reading it this way, but I fear that someone unfamiliar with these bits might be confused. On the other hand, trying to be nice and interpret these bits (i.e. translate presence of both into something like SHR_LOCK) might also be confusing, because that bit doesn't really exist. And one already needs to be careful while interpreting what do KEYS_UPDATED and XMAX_LOCK_ONLY, or lack thereof, mean. Perhaps it would be sensible to provide one more output line per tuple, with interpretation of the flags, so it would tell you whether the tuple has been locked or updated, and what kind of each it is. I'd propose something like status: locked (FOR {KEY SHARE,SHARE,NO KEY UPDATE,UPDATE}) [MultiXact: nnn] status: [HOT] updated (KEYS UPDATED/KEYS NOT UPDATED) [MultiXact: nnn] To: blk/off status: deleted [MultiXact: nnn] Hm. I'm loath to add another output line per tuple, just for space reasons. My feeling about this code is that the reason we print the infomask in hex is so you can see exactly which bits are set if you care, and that the rest of the line ought to be designed to interpret the bits in as reader-friendly a way as possible. So I don't buy the notion that we should just print out a name for each bit that's set. I'd rather replace individual bit names with items like LOCKED_FOR_KEY_SHARE, LOCKED_FOR_SHARE, etc in cases where you have to combine multiple bits to understand the meaning. regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] review: Non-recursive processing of AND/OR lists
On Mon, Jul 15, 2013 at 12:45 AM, Gurjeet Singh gurj...@singh.im wrote: Agreed that there's overhead in allocating list items, but is it more overhead than pushing functions on the call stack? Not sure, so I leave it to others who understand such things better than I do. If you think that a palloc can ever be cheaper that pushing a frame on the callstack, you're wrong. palloc is not some kind of an atomic primitive. It's implemented by the AllocSetAlloc function, and you're going to have to push that function on the call stack, too, in order to run it. My main point here is that if the user writes a = 1 and b = 1 and c = 1 and d = 1, they're not going to end up with a bushy tree. They're going to end up with a tree that's only deep in one direction (left, I guess) and that's the case we might want to consider optimizing. To obtain a bushy tree, they're going to have to write a = 1 and (b = 1 and c = 1) and d = 1, or something like that, and I don't see why we should stress out about that case. It will be rare in practice. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
Hello Tatsuo, The lag is reasonnable, althought no too good. One transaction is about 1.2 ms, the lag is much smaller than that, and you are at about 50% of the maximum load. I've got similar figures on my box for such settings. It improves if your reduce the number of clients. No, 5000 TPS = 1/5000 = 0.2 ms per transaction, no? Hmmm... Yes, and no:-) Transaction are handled in parallel because there are 10 clients. I look at actual transaction times (latency) from a client perspective, not the apparent time because of parallelism, and compare it to the measured lag, which is also measured per client. The transaction time I reported is derived from your maximum tps per client : 10 clients / 8300 tps = 1.2 ms / trans. However, there are 10 transaction in progress in parallel. When you're running at 50% load, the clients basically spend 1.2 ms doing a transaction (sending queries, getting results), and 1.2 ms sleeping because of rate limiting. The reported 0.3 ms lag is that when sleeping 1.2 ms it tends to start a little bit later, after 1.5 ms, but this latency does not show up on the throuput figures because the next sleep will just be a smaller to catch-up. As you have 10 clients in one pgbench thread, the scheduling say to start a new transaction for a client at a certain time, but the pgbench process is late to actually handle this client query because it is doing other things, like attending one of the other clients, or being switched off to run a server process. However pgbench says average lag is 0.304 ms. So the lag is longer than transaction itself. See above. I would be surprised that this would be the issue is to compute the measure, compared to network connections and the like. With -S the bench is cpu bound. Possibly a better scheduler/thread management on OSX? Or more available cores? The number of cores is same. I don't understand why number of cores is relatedx, though. In my mind, because pgbench -S is cpu bound, and with -c 10 you have to run pgbench and 10 postgres backends, that is 11 processes competing for cpu time. If you have 11 cores that is mostly fine, if you have less then there will be some delay depending on how the process scheduler does thing in the OS to allocate cpu time. With a load of 50%, about 6 cores should be okay to run the load transparently (client server). Well, I do not know! At high load with clients running on the same box as the server, and with more clients server than available cores, there is a lot of competition between processes, and between clients that share a unique thread, and a log context switching whoch will result in a measured lag. Hmm... I would like to have cleaner explanation/evidence before committing the patch. The lag measures you report seems pretty consistent to me given the load your requiring, for a cpu bound bench, with more processes to run than available cores. At least, I'm buying my own explanation, and I hope to be convincing. If you want to isolate yourself from such effects, pgbench must run on a different host than the server, with has many threads as there are cores available, and not too many clients per thread. This is also true without throttling, but it shows more under throttling because of the lag (latency) measures. -- Fabien. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Fix pgstattuple/pgstatindex to use regclass-type as the argument
On Tue, Jul 16, 2013 at 3:00 PM, Satoshi Nagayasu sn...@uptime.jp wrote: (2013/07/04 3:58), Fujii Masao wrote: On Wed, Jun 26, 2013 at 12:39 AM, Robert Haas robertmh...@gmail.com wrote: On Thu, Jun 20, 2013 at 2:32 PM, Fujii Masao masao.fu...@gmail.com wrote: Since pg_relpages(oid) doesn't exist, pg_relpages() is in the same situation as pgstatindex(), i.e., we cannot just replace pg_relpages(text) with pg_relpages(regclass) for the backward-compatibility. How do you think we should solve the pg_relpages() problem? Rename? Just add pg_relpages(regclass)? Adding a function with a new name seems likely to be smoother, since that way you don't have to worry about problems with function calls being thought ambiguous. Could you let me know the example that this problem happens? For the test, I just implemented the regclass-version of pg_relpages() (patch attached) and tested some cases. But I could not get that problem. SELECT pg_relpages('hoge');-- OK SELECT pg_relpages(oid) FROM pg_class WHERE relname = 'hoge';-- OK SELECT pg_relpages(relname) FROM pg_class WHERE relname = 'hoge';-- OK In the attached patch, I cleaned up three functions to have two types of arguments for each, text and regclass. pgstattuple(text) pgstattuple(regclass) pgstatindex(text) pgstatindex(regclass) pg_relpages(text) pg_relpages(regclass) I still think a regclass argument is more appropriate for passing relation/index name to a function than text-type, but having both arguments in each function seems to be a good choice at this moment, in terms of backward-compatibility. Docs needs to be updated if this change going to be applied. Yes, please. Any comments? 'make installcheck' failed in my machine. Do we need to remove pgstattuple--1.1.sql and create pgstattuple--1.1--1.2.sql? +/* contrib/pgstattuple/pgstattuple--1.1.sql */ Typo: s/1.1/1.2 You seem to have forgotten to update pgstattuple.c. Regards, -- Fujii Masao -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] pgsql: Optimize pglz compressor for small inputs.
On 14.07.2013 20:12, Stephen Frost wrote: * Heikki Linnakangas (heikki.linnakan...@iki.fi) wrote: This patch alleviates that in two ways. First, instead of storing pointers in the hash table, store 16-bit indexes into the hist_entries array. That slashes the size of the hash table to 1/2 or 1/4 of the original, depending on the pointer width. Secondly, adjust the size of the hash table based on input size. For very small inputs, you don't need a large hash table to avoid collisions. The coverity scanner has a bit of an issue with this patch which, at least on first blush, looks like a valid concern. While the change in pg_lzcompress.c:pglz_find_match() to loop on: while (hent != INVALID_ENTRY_PTR) { const char *ip = input; const char *hp = hent-pos; looks good, and INVALID_ENTRY_PTR is the address of the first entry in the array (and can't be NULL), towards the end of the loop we do: hent = hent-next; if (hent) ... Should we really be checking for 'hent != INVALID_ENTRY_PTR' here? If not, and hent really can end up as NULL, then we're going to segfault on the next loop due to the unchecked 'hent-pos' early in the loop. If hent can never be NULL, then we probably don't need this check at all. hent can never be NULL, the code should indeed check for 'hent != INVALID_ENTRY_PTR'. The check is not required from a correctness point of view, the idea is just to avoid calculating the 'good_match' variable, if you're going to fall out of the loop anyway. I'm actually a bit surprised the compiler doesn't optimize it that way anyway, without the explicit if-block, but at least gcc -O2 (version 4.7.3) seem to do that. So, I guess we should keep the check. Committed '(hent)' - '(hent != INVALID_ENTRY_PTR)'. Thanks for the report! - Heikki -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] pg_filedump 9.3: checksums (and a few other fixes)
Tom Lane escribió: My feeling about this code is that the reason we print the infomask in hex is so you can see exactly which bits are set if you care, and that the rest of the line ought to be designed to interpret the bits in as reader-friendly a way as possible. So I don't buy the notion that we should just print out a name for each bit that's set. I'd rather replace individual bit names with items like LOCKED_FOR_KEY_SHARE, LOCKED_FOR_SHARE, etc in cases where you have to combine multiple bits to understand the meaning. Okay, that's what I've been saying all along so I cannot but agree. I haven't reviewed Jeff's patch lately; Jeff, does Tom's suggestion need some more new code, and if so are you open to doing this work, or shall I? -- Álvaro Herrerahttp://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] review: Non-recursive processing of AND/OR lists
On Wed, Jul 17, 2013 at 1:25 PM, Robert Haas robertmh...@gmail.com wrote: On Mon, Jul 15, 2013 at 12:45 AM, Gurjeet Singh gurj...@singh.im wrote: Agreed that there's overhead in allocating list items, but is it more overhead than pushing functions on the call stack? Not sure, so I leave it to others who understand such things better than I do. If you think that a palloc can ever be cheaper that pushing a frame on the callstack, you're wrong. palloc is not some kind of an atomic primitive. It's implemented by the AllocSetAlloc function, and you're going to have to push that function on the call stack, too, in order to run it. Agreed. I take my objection back. Even if AllocSetAlloc() reuses memory that was pfree'd earlier, it'll still be at least as expensive as recursing. My main point here is that if the user writes a = 1 and b = 1 and c = 1 and d = 1, they're not going to end up with a bushy tree. They're going to end up with a tree that's only deep in one direction (left, I guess) and that's the case we might want to consider optimizing. To obtain a bushy tree, they're going to have to write a = 1 and (b = 1 and c = 1) and d = 1, or something like that, and I don't see why we should stress out about that case. It will be rare in practice. In v6 of the patch, I have deferred the 'pending' list initialization to until we actually hit a candidate right-branch. So in the common case the pending list will never be populated, and if we find a bushy or right-deep tree (for some reason an ORM/tool may choose to build AND/OR lists that may end being right-deep when in Postgres), then the pending list will be used to process them iteratively. Does that alleviate your concern about 'pending' list management causing an overhead. Agreed that bushy/right-deep trees are a remote corner case, but we are addressing a remote corner case in the first place (insanely long AND lists) and why not handle another remote corner case right now if it doesn't cause an overhead for common case. Best regards, -- Gurjeet Singh http://gurjeet.singh.im/ EnterpriseDB Inc.
Re: [HACKERS] Listen/notify across clusters
On 07/16/2013 07:16 PM, Andreas Karlsson wrote: I guess one problem is to implement writing to the WAL with the smallest possible performance hit. As far as I can see there are two possible approaches: either write to WAL when NOTIFY is run or write to WAL on commit. The former seems more in line with how commands in PostgreSQL usually work. Yes. There was some work being done by Heikki or Andreas Freund on log-only tables which seems like it would be a perfect solution to this. Anyone know what happened to that patch? There shouldn't be any major problems with implementing LISTEN on the slaves since LISTEN is done in memory. Actually, that's not the hard part. Listeners need to be registered on the standby, which requires a write to a system catalog, currently. So you'd need some alternate way to register listeners on the standby. Presumably all LISTEN events would need to be broadcast to all standbys, whether or not they had LISTENERs registered. Otherwise we'd have to push the listener registrations back to the master. -- Josh Berkus PostgreSQL Experts Inc. http://pgexperts.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] new row-level lock error messages
Peter Eisentraut wrote: In general, I find these new wordings to be a loss of clarity. There is no indication on the SELECT man page or in the documentation index what a row-level lock is at all. I would suggest that these changes be undone, except that the old SELECT FOR ... be replaced by a dynamic string that reverse-parses the LockingClause to provide the actual clause that was used. Hmm, that's an idea. If there are no objections, I'll get this fixed. -- Álvaro Herrerahttp://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Listen/notify across clusters
On 07/17/2013 02:08 PM, Josh Berkus wrote: There shouldn't be any major problems with implementing LISTEN on the slaves since LISTEN is done in memory. Actually, that's not the hard part. Listeners need to be registered on the standby, which requires a write to a system catalog, currently. Er, not since 9.0 I think - there is no pg_listener any more. cheers andrew -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] Return of can't paste into psql issue
Hackers, We've seen this before: http://www.postgresql.org/message-id/b42b73150906271019k6212ba71u6263bc43a8ec8...@mail.gmail.com I've run across this again, and am posting it here just in case someone should be struck by some amazing inspiration on where the problem might be. Here's the issue: Pasting text blocks over 100 lines into psql running in a terminal causes lines to be arbitrarily truncated, mangling the query or function. The exact truncation appears random, but is 100% consistent for any given block of text. For example, one test I ran all lines 101-200 where truncated at character #32, but lines 1-100 were untouched. In another test, lines #82 and #163 were truncated after the 14th caracter, but other lines were fine. In the Ubuntu 12.10 tests below, the truncation occured in the same places regardless of whether I was using kterm, gterm or xterm. Here's where I've seen this issue occur: - Ubuntu 12.10: - 9.2 installed from source with readline - 9.4 installed from source with readline, and with libedit - using Gnome Terminal, Kterm, or xterm - Ubuntu 12.04: - 9.2 installed using packages from apt.postgresql.org - over SSH from a Gnome Terminal - Mac OSX 10.6 - 9.2 installed from source with readline Interestingly, here's where it does NOT occur: - Scientific Linux 6.1 - 9.2 installed from yum.postgresql.org packages - over SSH from a Gnome Terminal on Ubuntu - Ubuntu 12.10 - 9.4 from source, compiled --without-readline Also, the same mangling does not happen with any of the following applications on Ubuntu 12.10: - nano - emacs - joe - gedit - cat, including cat | psql I could not find another readline-equipped terminal application to test against. Ubuntu 12.10 has libreadline 6.2.9, and SciLinux 6.1 has version 6.0. However, since this happens with libedit as well, I don't think it's purely a readline issue. It seems like we're having some kind of bizarre interaction of psql's readline/libedit hooks and specific terminals or library versions, but darned if I can even figure out where to look. If anybody has inspiration strike, please post! -- Josh Berkus PostgreSQL Experts Inc. http://pgexperts.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Proposal: template-ify (binary) extensions
Markus Wanner mar...@bluegap.ch writes: But okay, you're saying we *have* and *want* a guarantee that even a superuser cannot execute arbitrary native code via libpq (at least in default installs w/o extensions). There are several problems confused into that sentence already. I think the next step of this discussion should be about talking about the problems we have and figuring out *if* we want to solve them, now that we managed to explicitely say what we want as a project. - per-installation (not even per-cluster) DSO availability If you install PostGIS 1.5 on a system, then it's just impossible to bring another cluster (of the same PostgreSQL major version), let alone database, with PostGIS 2.x, even for migration assessment purposes. The By Design™ part is really hard to explain even to security concious users. - hot standby and modules (aka DSO) As soon as you use some functions in 'language C' you need to carefully watch your external dependencies ahead of time. If you do CREATE EXTENSION hstore;, create an hstore column and a GiST index on it, then query the table on the standby… no luck. You would tell me that it's easy enough to do and that it's part of the job, so see next point. - sysadmin vs dba, or PostgreSQL meets the Cloud The current model of operations is intended for places where you have separate roles: the sysadmin cares about the OS setup and will provide with system packages (compiled extensions and the like), and DBA are never root on the OS. They can CREATE EXTENSION and maybe use the 'postgres' system account, but that's about it. Given the current raise of the Cloud environements and the devops teams, my understanding is that this model is no longer the only one. Depending on who you talk to, in some circles it's not even a relevant model anymore: user actions should not require the intervention of a sysadmin before hand. While I appreciate that many companies still want the old behavior that used to be the only relevant model of operations, I think we should also provide for the new one as it's pervasive enough now for us to stop ignoring it with our I know better smiling face. Now it should be possible to solve at least some of those items while still keeping the restriction above, or with an opt-in mechanism to enable the works by default, but you have to solve the security implications yourself behaviour. A new GUC should do it, boolean, defaults false: runs_in_the_cloud_with_no_security_concerns = false I don't think the relaxed behaviour we're talking about is currently possible to develop as an extension, by the way. Andres made two contrib-free suggestions: with COPY TO BINARY, you get a Well, what about using lo_import()? Things aren't quite so bad if we write the bits to a file first and then dynamically load the file. That way at least noexec or similar can provide protection. But it still seems like a pretty dangerous direction. I agree now. Thanks for elaborating. Yes it's dangerous. It's also solving real world problems that I see no other way to solve apart from bypassing the need to ever load a DSO file, that is embedding a retargetable C compiler in the backend. Regards, -- Dimitri Fontaine http://2ndQuadrant.fr PostgreSQL : Expertise, Formation et Support -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])
Amit kapila escribió: +# This includes the default configuration directory included to support +# ALTER SYSTEM statement. +# +# WARNING: User should not remove below include_dir or directory config, +# as both are required to make ALTER SYSTEM command work. +# Any configuration parameter values specified after this line +# will override the values set by ALTER SYSTEM statement. +#include_dir = 'config' Why do we need to expose this setting to a user? a) This can be a knob to turn this feature off. This has been asked by few people, one of the mail link is mentioned below (refer towards end of mail in the link): http://www.postgresql.org/message-id/515b04f9.30...@gmx.net b) In case user wants to change priority of parameters set by Alter System, he can move the include_dir up or down in postgresql.conf. Both of these seem like they would make troubleshooters' lives more difficult. I think we should just parse the auto file automatically after parsing postgresql.conf, without requiring the include directive to be there. -- Álvaro Herrerahttp://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])
On 07/17/2013 12:01 PM, Alvaro Herrera wrote: Both of these seem like they would make troubleshooters' lives more difficult. I think we should just parse the auto file automatically after parsing postgresql.conf, without requiring the include directive to be there. Wait, I thought the auto file was going into the conf.d directory? -- Josh Berkus PostgreSQL Experts Inc. http://pgexperts.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Return of can't paste into psql issue
On 07/17/2013 02:50 PM, Josh Berkus wrote: Hackers, We've seen this before: http://www.postgresql.org/message-id/b42b73150906271019k6212ba71u6263bc43a8ec8...@mail.gmail.com I've run across this again, and am posting it here just in case someone should be struck by some amazing inspiration on where the problem might be. Here's the issue: Pasting text blocks over 100 lines into psql running in a terminal causes lines to be arbitrarily truncated, mangling the query or function. [snip] It seems like we're having some kind of bizarre interaction of psql's readline/libedit hooks and specific terminals or library versions, but darned if I can even figure out where to look. If anybody has inspiration strike, please post! What happens when you do this with psql -n ? My suspicion is it's the tab completion code, which I occasionally find it useful to disable this way - sadly one can't do that on the fly AFAIK. cheers andrew -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])
On Fri, Jul 12, 2013 at 9:15 AM, Amit kapila amit.kap...@huawei.com wrote: The sleep is used to ensure the effects of pg_reload_conf() can be visible. That strikes me as almost certain to result in random regression test failures. My experience, as a man who helps to maintain a very large suite of regression tests, is that all time-based assumptions about how long it will take some operation to complete are false, and that failures are a lot more frequent than you think they will be. This feature doesn't strike me as a good candidate for regression testing anyway. Keep in mind that people are intended to be able to run make installcheck against a deployed system without messing it up. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Return of can't paste into psql issue
On Wed, Jul 17, 2013 at 2:20 PM, Andrew Dunstan and...@dunslane.net wrote: On 07/17/2013 02:50 PM, Josh Berkus wrote: Hackers, We've seen this before: http://www.postgresql.org/message-id/b42b73150906271019k6212ba71u6263bc43a8ec8...@mail.gmail.com I've run across this again, and am posting it here just in case someone should be struck by some amazing inspiration on where the problem might be. Here's the issue: Pasting text blocks over 100 lines into psql running in a terminal causes lines to be arbitrarily truncated, mangling the query or function. [snip] It seems like we're having some kind of bizarre interaction of psql's readline/libedit hooks and specific terminals or library versions, but darned if I can even figure out where to look. If anybody has inspiration strike, please post! What happens when you do this with psql -n ? My suspicion is it's the tab completion code, which I occasionally find it useful to disable this way - sadly one can't do that on the fly AFAIK. huh -- you're right. merlin -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] XLogInsert scaling, revisited
On 17.07.2013 02:18, Michael Paquier wrote: On Tue, Jul 16, 2013 at 2:24 AM, Fujii Masaomasao.fu...@gmail.com wrote: On Mon, Jul 8, 2013 at 6:16 PM, Heikki Linnakangas hlinnakan...@vmware.com wrote: Ok, I've committed this patch now. Finally, phew! I found that this patch causes the assertion failure. When I set up simple replication environment and promoted the standby before executing any transaction on the master, I got the following assertion failure. 2013-07-16 02:22:06 JST sby1 LOG: received promote request 2013-07-16 02:22:06 JST sby1 FATAL: terminating walreceiver process due to administrator command 2013-07-16 02:22:06 JST sby1 LOG: redo done at 0/2F0 2013-07-16 02:22:06 JST sby1 LOG: selected new timeline ID: 2 hrk:head-pgsql postgres$ 2013-07-16 02:22:06 JST sby1 LOG: archive recovery complete TRAP: FailedAssertion(!(readOff == (XLogCtl-xlblocks[firstIdx] - 8192) % ((uint32) (16 * 1024 * 1024))), File: xlog.c, Line: 7048) 2013-07-16 02:22:12 JST sby1 LOG: startup process (PID 37115) was terminated by signal 6: Abort trap 2013-07-16 02:22:12 JST sby1 LOG: terminating any other active server processes Note that this is also reproducible even when trying to recover only from archives without strrep. Fixed, thanks for the report. While at it, I slightly refactored the way the buffer bookkeeping works. Instead of keeping track of the index of the last initialized buffer, keep track how far the buffer cache has been initialized in an XLogRecPtr variable (called XLogCtl-InitializedUpTo). That made the code slightly more readable IMO. - Heikki -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] XLogInsert scaling, revisited
On 17.07.2013 15:24, Andres Freund wrote: On 2013-07-17 15:46:00 +0530, Amit Kapila wrote: Few doubts while reading the code: 1. Why in function WALInsertSlotAcquireOne(int slotno), it does START_CRIT_SECTION() to Lock out cancel/die interrupts, whereas other places call HOLD_INTERRUPTS() A crit section does more than just stopping interrupts. They also ensure that errors that occur while inside one get converted to a PANIC. That seems apt for SlotAcquire/Release. Although the comments could possibly improved a bit. Agreed. The comment was copied from LWLockAcquire(), which only does HOLD_INTERRUPTS(). The crucial difference between LWLockAcquire() and WALInsertSlotAcquire() is that there is no automatic cleanup mechanism on abort for the WAL insertion slots like there is for lwlocks. Added a sentence to the comment to mention that. 3. static bool ReserveXLogSwitch(..) In above function header, why EndPos_p/StartPos_p is used when function arguments are EndPos/StartPos? I guess that's bitrot... Yep, fixed. Thanks for the review! - Heikki -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Proposal: template-ify (binary) extensions
On 07/17/2013 08:52 PM, Dimitri Fontaine wrote: the next step of this discussion should be about talking about the problems we have and figuring out *if* we want to solve them, now that we managed to explicitely say what we want as a project. - per-installation (not even per-cluster) DSO availability If you install PostGIS 1.5 on a system, then it's just impossible to bring another cluster (of the same PostgreSQL major version), let alone database, with PostGIS 2.x, even for migration assessment purposes. The By Design™ part is really hard to explain even to security concious users. On Debian, that should be well possible. Certainly installing Postgres 9.1 w/ postgis-1.5 in parallel to Postgres 9.2 w/ postgis-2.0 is. I designed it to be. On distributions that do not allow parallel installation of multiple Postges major versions, it's certainly not the extension's fault. I think I'm misunderstanding the problem statement, here. - hot standby and modules (aka DSO) As soon as you use some functions in 'language C' you need to carefully watch your external dependencies ahead of time. If you do CREATE EXTENSION hstore;, create an hstore column and a GiST index on it, then query the table on the standby… no luck. You would tell me that it's easy enough to do and that it's part of the job, so see next point. Agreed, that's an area where Postgres could do better. I'd argue this should be possible without relaxing the security guarantees provided, though. Because there likely are people wanting both. Can CREATE EXTENSION check if the standbys have the extension installed? And refuse creation, if they don't? - sysadmin vs dba, or PostgreSQL meets the Cloud The current model of operations is intended for places where you have separate roles: the sysadmin cares about the OS setup and will provide with system packages (compiled extensions and the like), and DBA are never root on the OS. They can CREATE EXTENSION and maybe use the 'postgres' system account, but that's about it. I'm sure you are aware that even without this clear separation of roles, the guarantee means we provide an additional level of security against attackers. Given the current raise of the Cloud environements and the devops teams, my understanding is that this model is no longer the only one. Depending on who you talk to, in some circles it's not even a relevant model anymore: user actions should not require the intervention of a sysadmin before hand. While I appreciate that many companies still want the old behavior that used to be the only relevant model of operations, I think we should also provide for the new one as it's pervasive enough now for us to stop ignoring it with our I know better smiling face. I'd even think it's a minority who actually uses the guarantee we're talking about. Mostly because of the many and wide spread untrusted PLs (which undermine the guarantee). And thus even before the rise of the cloud. None the less, the safe by default has served us well, I think. Now it should be possible to solve at least some of those items while still keeping the restriction above, or with an opt-in mechanism to enable the works by default, but you have to solve the security implications yourself behaviour. A new GUC should do it, boolean, defaults false: runs_in_the_cloud_with_no_security_concerns = false [ I usually associate cloud with (increased) security concerns, but that's an entirely different story. ] I don't think the relaxed behaviour we're talking about is currently possible to develop as an extension, by the way. It's extensions that undermine the guarantee, at the moment. But yeah, it depends a lot on what kind of relaxed behavior you have in mind. Andres made two contrib-free suggestions: with COPY TO BINARY, you get a Well, what about using lo_import()? That only reads from the file-system. You probably meant lo_export(), which is writing. Although not on the server's, but only on the (libpq) client's file-system. No threat to the server. Yes it's dangerous. It's also solving real world problems that I see no other way to solve apart from bypassing the need to ever load a DSO file, that is embedding a retargetable C compiler in the backend. If the sysadmin wants to disallow arbitrary execution of native code to postgres (the process), any kind of embedded compiler likely is equally unwelcome. Regards Markus Wanner -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] pg_memory_barrier() doesn't compile, let alone work, for me
Robert Haas robertmh...@gmail.com writes: On Sun, Jul 14, 2013 at 8:41 PM, Tom Lane t...@sss.pgh.pa.us wrote: However, fixing that doesn't yield much joy; initdb stalls and then crashes with PANIC: stuck spinlock (40054a88) detected at xlog.c:2182 The reason for that is that the code does not bother to initialize dummy_spinlock anywhere. It might accidentally fail to fail on machines where the unlocked state of a spinlock is all-zeroes (given a compiler that's not picky about the incorrect macro usage) ... but HPPA is not such a machine. This would not be hard to fix, I think. Really? Given that the memory barrier primitives are supposed to be, well, primitive, I don't think this is exactly a trivial problem. There's no good place to initialize such a variable, and there's even less of a place to make sure that fork or exec leaves it in an appropriate state in the child process. Rather than trying to think of a fix for that, I'm of the opinion that we should rip this out. The fallback implementation of pg_memory_barrier ought to be pg_compiler_barrier(), on the theory that non-mainstream architectures don't have weak memory ordering anyway, or if they do you need to do some work to get PG to work on them. Or maybe we ought to stop pretending that the code is likely to work on arbitrary machines, and just #error if there's not a supplied machine-specific macro. Well, pg_memory_barrier() isn't even equivalent to pg_compiler_barrier() on x86, which has among the strongest memory orderings out there, so I think your first idea is a non-starter. Among the strongest memory orderings compared to what? Since what we're discussing here is non-mainstream architectures, I think this claim is unfounded. Most of the ones I can think of offhand are old enough to not even have multiprocessor support, so that the issue is vacuous. I'm pretty sure we've got latent memory-ordering risks in our existing code which we just haven't detected and fixed yet. Consider, for example, this exciting code from GetNewTransactionId: myproc-subxids.xids[nxids] = xid; mypgxact-nxids = nxids + 1; I don't believe that's technically safe even on an architecture like x86, because the compiler could decide to reorder those assignments. Wrong, because both pointers are marked volatile. If the compiler does reorder the stores, it's broken. Admittedly, this doesn't say anything about hardware reordering :-( My preference would be to fix this in a narrow way, by initializing dummy_spinlock somewhere. But if not, then I think #error is the only safe way to go. I'm inclined to agree that #error is the only realistic answer in general, though we could probably go ahead with equating pg_memory_barrier to pg_compiler_barrier on specific architectures we know are single-processor-only. Unfortunately, that means we just raised the bar for porting efforts significantly. And in particular, it means somebody had better go through s_lock.h and make sure we have a credible barrier implementation for every single arch+compiler supported therein. regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])
Robert Haas escribió: This feature doesn't strike me as a good candidate for regression testing anyway. Keep in mind that people are intended to be able to run make installcheck against a deployed system without messing it up. This is my opinion as well. -- Álvaro Herrerahttp://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] pg_memory_barrier() doesn't compile, let alone work, for me
I wrote: I'm inclined to agree that #error is the only realistic answer in general, though we could probably go ahead with equating pg_memory_barrier to pg_compiler_barrier on specific architectures we know are single-processor-only. Unfortunately, that means we just raised the bar for porting efforts significantly. And in particular, it means somebody had better go through s_lock.h and make sure we have a credible barrier implementation for every single arch+compiler supported therein. After going through s_lock.h another time, I can't help noticing that a large majority of the non-mainstream architectures make use of the default version of S_UNLOCK(), which is just #define S_UNLOCK(lock) (*((volatile slock_t *) (lock)) = 0) I assert that if this is a correct implementation, then the platform does not reorder writes, since correctness requires that any writes to shared memory variables protected by the lock occur before the lock is released. Generally speaking, I'm not seeing any memory-barrier-ish instructions on the locking side either, meaning there's also no risk of read reordering. It's possible that some of these arches do read reordering except for not hoisting reads before instructions that can be used to take locks ... but I'll bet that most of them simply don't have weak memory ordering. So I'm back to the position that pg_compiler_barrier() is a perfectly credible default implementation. More so than an incorrect usage of spinlocks, anyway. In particular, I'm going to go fix HPPA that way so I can get my build working again. BTW, the only arches for which we seem to have any barrier instructions in S_UNLOCK are ARM, PPC, Alpha, and MIPS. Alpha, at least, is probably dead, and I'm not seeing any MIPS machines in the buildfarm either; I wouldn't feel bad about desupporting both of those arches. Also, a comparison to s_lock.h says that the PPC code in barrier.h is a few bricks shy of a load: it's not honoring USE_PPC_LWSYNC. And while I'm bitching, the #ifdef structure in barrier.h is impossible to follow, not least because none of the #endifs are labeled, contrary to project style. regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Return of can't paste into psql issue
On 07/17/2013 12:20 PM, Andrew Dunstan wrote: What happens when you do this with psql -n ? My suspicion is it's the tab completion code, which I occasionally find it useful to disable this way - sadly one can't do that on the fly AFAIK. Well, that works. But it disables readline, not just tab completion. So it's nice that I don't have to *compile* without readline, but we still have the issue that you can't paste large files and also have readline. So, an even more practical workaround (I've been using cat | psql), but still a mysterious issue. -- Josh Berkus PostgreSQL Experts Inc. http://pgexperts.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])
On 2013-07-17 17:05:40 -0400, Alvaro Herrera wrote: Robert Haas escribió: This feature doesn't strike me as a good candidate for regression testing anyway. Keep in mind that people are intended to be able to run make installcheck against a deployed system without messing it up. This is my opinion as well. Setting and resetting a GUC like pg_regress.test_var or so shouldn't be too invasive. Some minimal testing seems like a good idea to me. Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])
On 07/17/2013 03:17 PM, Andres Freund wrote: On 2013-07-17 17:05:40 -0400, Alvaro Herrera wrote: Robert Haas escribió: This feature doesn't strike me as a good candidate for regression testing anyway. Keep in mind that people are intended to be able to run make installcheck against a deployed system without messing it up. This is my opinion as well. Setting and resetting a GUC like pg_regress.test_var or so shouldn't be too invasive. Some minimal testing seems like a good idea to me. Also, we can make check even if we don't make installcheck. -- Josh Berkus PostgreSQL Experts Inc. http://pgexperts.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] pg_memory_barrier() doesn't compile, let alone work, for me
On Wed, Jul 17, 2013 at 4:57 PM, Tom Lane t...@sss.pgh.pa.us wrote: This would not be hard to fix, I think. Really? Given that the memory barrier primitives are supposed to be, well, primitive, I don't think this is exactly a trivial problem. There's no good place to initialize such a variable, and there's even less of a place to make sure that fork or exec leaves it in an appropriate state in the child process. Well, I admit that I don't really know how spinlocks work on every obscure platform out there, but I would have thought we could initialize this in, say, main() and call it good. For that to be not OK, we'd have to be running on a non-EXEC_BACKEND platform where a previously initialized spinlock is no longer in a good state after fork(). Unless you know of a case where that happens, I'd be inclined to assume it's a non-problem. If we find a counterexample later, then I'd insert an architecture-specific hack for that platform only, with a comment along the lines of /* YBGTBFKM */. Well, pg_memory_barrier() isn't even equivalent to pg_compiler_barrier() on x86, which has among the strongest memory orderings out there, so I think your first idea is a non-starter. Among the strongest memory orderings compared to what? Since what we're discussing here is non-mainstream architectures, I think this claim is unfounded. Most of the ones I can think of offhand are old enough to not even have multiprocessor support, so that the issue is vacuous. Compared to other multi-processor architectures. I agree that the barriers are all reducible to compiler barriers on single-processor architectures, but I think new ports of PostgreSQL are much more likely to be to multi-processor systems rather than uniprocessor systems. There are very, very few multi-processor systems where pg_memory_barrier() is reducible to pg_compiler_barrier(). I'm pretty sure we've got latent memory-ordering risks in our existing code which we just haven't detected and fixed yet. Consider, for example, this exciting code from GetNewTransactionId: myproc-subxids.xids[nxids] = xid; mypgxact-nxids = nxids + 1; I don't believe that's technically safe even on an architecture like x86, because the compiler could decide to reorder those assignments. Wrong, because both pointers are marked volatile. If the compiler does reorder the stores, it's broken. Admittedly, this doesn't say anything about hardware reordering :-( OK, natch. So it's safe on x86, but not on POWER. My preference would be to fix this in a narrow way, by initializing dummy_spinlock somewhere. But if not, then I think #error is the only safe way to go. I'm inclined to agree that #error is the only realistic answer in general, though we could probably go ahead with equating pg_memory_barrier to pg_compiler_barrier on specific architectures we know are single-processor-only. I'd be fine with that. Unfortunately, that means we just raised the bar for porting efforts significantly. And in particular, it means somebody had better go through s_lock.h and make sure we have a credible barrier implementation for every single arch+compiler supported therein. I tried, but the evidence shows that I have not entirely succeeded. :-( -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Return of can't paste into psql issue
Josh Berkus wrote: So, an even more practical workaround (I've been using cat | psql), but still a mysterious issue. How often do your files contain tabs? I have seen cases where tab completion messes things up by asking you after a tab whether you really want to complete due to the number of possible completions, and such. -- Álvaro Herrerahttp://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Return of can't paste into psql issue
On 07/17/2013 03:37 PM, Alvaro Herrera wrote: Josh Berkus wrote: So, an even more practical workaround (I've been using cat | psql), but still a mysterious issue. How often do your files contain tabs? I have seen cases where tab completion messes things up by asking you after a tab whether you really want to complete due to the number of possible completions, and such. First thing I thought of, so I made sure my text editor was replacing tabs with spaces. Also, my test cases had no tabs at all (nor spaces neither). -- Josh Berkus PostgreSQL Experts Inc. http://pgexperts.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Return of can't paste into psql issue
Josh Berkus j...@agliodbs.com writes: So, an even more practical workaround (I've been using cat | psql), but still a mysterious issue. As a workaround you might try \e with EDITOR=emacs or some of the other solutions you've been pasting, maybe even cat, so that you can switch that readline-completion-bug-free environment for just that paste? Regards, -- Dimitri Fontaine http://2ndQuadrant.fr PostgreSQL : Expertise, Formation et Support -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Return of can't paste into psql issue
On 07/17/2013 04:30 PM, Dimitri Fontaine wrote: Josh Berkus j...@agliodbs.com writes: So, an even more practical workaround (I've been using cat | psql), but still a mysterious issue. As a workaround you might try \e with EDITOR=emacs or some of the other solutions you've been pasting, maybe even cat, so that you can switch that readline-completion-bug-free environment for just that paste? Oh, there's lots of good workarounds. I just wanna know why the bug is happening in the first place. (FWIW, I hit this because I'm using a graphical editor for PL/Python which pipes stuff to the terminal to execute it, and its piping hits the same issues as paste-from-clipboard) -- Josh Berkus PostgreSQL Experts Inc. http://pgexperts.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])
If we are going to add ALTER SYSTEM, then I'd like to consider recasting ALTER ROLE ALL SET in terms of ALTER SYSTEM as well, because the ALTER ROLE ALL syntax was a hack. At least it should be possible to consider various levels of system. Who knows, maybe a future version will allow propagating settings to standby servers? I don't know what a good terminology would be, but let's keep some options open. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])
On Tue, 2013-07-16 at 04:23 +, Amit kapila wrote: Why do we need to expose this setting to a user? a) This can be a knob to turn this feature off. This has been asked by few people, one of the mail link is mentioned below (refer towards end of mail in the link): http://www.postgresql.org/message-id/515b04f9.30...@gmx.net b) In case user wants to change priority of parameters set by Alter System, he can move the include_dir up or down in postgresql.conf. In that message I argue that the auto file should always be read implicitly. The comment about being able to turn this feature off was because as presented it was too complicated and error-prone. Removing the include would incidentally not be a way to turn the feature off. Turning the feature of would require disabling the ALTER SYSTEM command. I'm not really asking for that functionality, but I'm really against having to manually include the auto file, and also against the system nagging me about including the file. There is the slightly interesting question whether the auto file should notionally be processed before or after the main postgresql.conf, but that's a question we should answer now, not let it the users work it out. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
Fabien, Hello Tatsuo, The lag is reasonnable, althought no too good. One transaction is about 1.2 ms, the lag is much smaller than that, and you are at about 50% of the maximum load. I've got similar figures on my box for such settings. It improves if your reduce the number of clients. No, 5000 TPS = 1/5000 = 0.2 ms per transaction, no? Hmmm... Yes, and no:-) Transaction are handled in parallel because there are 10 clients. I look at actual transaction times (latency) from a client perspective, not the apparent time because of parallelism, and compare it to the measured lag, which is also measured per client. The transaction time I reported is derived from your maximum tps per client : 10 clients / 8300 tps = 1.2 ms / trans. However, there are 10 transaction in progress in parallel. When you're running at 50% load, the clients basically spend 1.2 ms doing a transaction (sending queries, getting results), and 1.2 ms sleeping because of rate limiting. The reported 0.3 ms lag is that when sleeping 1.2 ms it tends to start a little bit later, after 1.5 ms, but this latency does not show up on the throuput figures because the next sleep will just be a smaller to catch-up. As you have 10 clients in one pgbench thread, the scheduling say to start a new transaction for a client at a certain time, but the pgbench process is late to actually handle this client query because it is doing other things, like attending one of the other clients, or being switched off to run a server process. However pgbench says average lag is 0.304 ms. So the lag is longer than transaction itself. See above. I would be surprised that this would be the issue is to compute the measure, compared to network connections and the like. With -S the bench is cpu bound. Possibly a better scheduler/thread management on OSX? Or more available cores? The number of cores is same. I don't understand why number of cores is relatedx, though. In my mind, because pgbench -S is cpu bound, and with -c 10 you have to run pgbench and 10 postgres backends, that is 11 processes competing for cpu time. If you have 11 cores that is mostly fine, if you have less then there will be some delay depending on how the process scheduler does thing in the OS to allocate cpu time. With a load of 50%, about 6 cores should be okay to run the load transparently (client server). Well, I do not know! At high load with clients running on the same box as the server, and with more clients server than available cores, there is a lot of competition between processes, and between clients that share a unique thread, and a log context switching whoch will result in a measured lag. Hmm... I would like to have cleaner explanation/evidence before committing the patch. The lag measures you report seems pretty consistent to me given the load your requiring, for a cpu bound bench, with more processes to run than available cores. At least, I'm buying my own explanation, and I hope to be convincing. If you want to isolate yourself from such effects, pgbench must run on a different host than the server, with has many threads as there are cores available, and not too many clients per thread. This is also true without throttling, but it shows more under throttling because of the lag (latency) measures. I think I'm starting to understand what's going on. Suppose there are n transactions be issued by pgbench and it decides each schedule d(0), d(1)... d(n). Actually the schedule d(i) (which is stored in st-until) is decided by the following code: int64 wait = (int64) throttle_delay * -log(getrand(thread, 1, 1000)/1000.0); thread-throttle_trigger += wait; st-until = thread-throttle_trigger; st-until represents the time for a transaction to be finished by the time. Now the transaction i finishes at t(i). So the lag l(i) = t(i) -d(i) if the transaction is behind. Then next transaction i+1 begins. The lag l(i+1) = t(i+1) - d(i+1) and so on. At the end of pgbench, it shows the average lag as sum(l(0)...l(n))/n. Now suppose we have 3 transactions and each has following values: d(0) = 10 d(1) = 20 d(2) = 30 t(0) = 100 t(1) = 110 t(2) = 120 That says pgbench expects the duration 10 for each transaction. Actually, the first transaction runs slowly for some reason and the lag = 100 - 10 = 90. However, tx(1) and tx(2) are finished on schedule because they spend only 10 (110-10 = 10, 120-110 = 10). So the expected average lag would be 90/3 = 30. However actually pgbench calculates like this: average lag = (t(0)-d(0) + t(1)-d(1) + t(2)-d(2))/3 = (100-10 + 110-20 + 120-30)/3 = (90 + 90 + 90)/3 = 90 Looks like too much lag calculated. The difference between the lag which pgbench calculates and the expected one will be growing if a lag happens eariler. I guess why my Linux box shows bigger lag than
Re: [HACKERS] pgbench patches
Hello Tatsuo, For me, the error message is not quite right, because progress == 0 case is considered error as well in your patch. I sugges you change the error message something like: thread progress delay (-P) must be positive number (%s)\n, Please find attached a new version with an updated message. Thanks. I've been testing on Linux now. Starting from coming Tuesday (Monday is a national holiday in Japan) I will test on Mac OS X and Windows. I have done the test on Mac OS X. Windows testing was done by my coleague, Yugo Nagata. The results were very positive and I committed --progress patches. -- Tatsuo Ishii SRA OSS, Inc. Japan English: http://www.sraoss.co.jp/index_en.php Japanese: http://www.sraoss.co.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
On 7/17/13 9:16 PM, Tatsuo Ishii wrote: Now suppose we have 3 transactions and each has following values: d(0) = 10 d(1) = 20 d(2) = 30 t(0) = 100 t(1) = 110 t(2) = 120 That says pgbench expects the duration 10 for each transaction. Actually, the first transaction runs slowly for some reason and the lag = 100 - 10 = 90. However, tx(1) and tx(2) are finished on schedule because they spend only 10 (110-10 = 10, 120-110 = 10). So the expected average lag would be 90/3 = 30. The clients are not serialized here in any significant way, even when they shared a single process/thread. I did many rounds of tracing through this code with timestamps on each line, and the sequence of events here will look like this: client 0: send SELECT... to server. yield to next client. client 1: send SELECT... to server. yield to next client. client 2: send SELECT... to server. yield to next client. select(): wait for the first response from any client. client 0: receive response. complete transaction, compute lag. client 1: receive response. complete transaction, compute lag. client 2: receive response. complete transaction, compute lag. There is nothing here that is queuing the clients one after the other. If (0) takes 100ms before its reply comes back, (1) and (2) can receive their reply back and continue forward at any time. They are not waiting for (0); it has yielded control while waiting for a response. All three times are independent once you reach the select() point where all are active. In this situation, if the server gets stuck doing something such that it takes 100ms before any client receives a response, it is correct to penalize every client for that latency. All three clients could have received the information earlier if the server had any to send them. If they did not, they all were suffering from some sort of lag. I'm not even sure why you spaced the start times out at intervals of 10. If I were constructing an example like this, I'd have them start at times of 0, 1, 2--as fast as the CPU can fire off statements basically--and then start waiting from that point. If client 1 takes 10 units of time to send its query out before client 2 runs, and the rate goal requires 10 units of time, the rate you're asking for is impossible. For sorting out what's going on with your two systems, I would recommend turning on debugging output with -d and looking at the new per-transaction latency numbers that the feature reports. If your theory that the lag is going up as the test proceeds is true, that should show up in the individual latency numbers too. Based on what I saw during weeks of testing here, I would be more suspicious that there's a system level difference between your two servers than to blame the latency calculation. I saw a *lot* of weird system issues myself when I started looking that carefully at sustained throughput. The latency reports from the perspective of Fabien's code were always reasonable though. When something delays every client, it counts that against every active client's lag, and that's the right thing to do. -- Greg Smith 2ndQuadrant USg...@2ndquadrant.com Baltimore, MD PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] Proposal/design feedback needed: WITHIN GROUP (sql standard ordered set aggregate functions)
The spec defines two types of aggregate function classed as ordered set function, as follows: 1. An inverse distribution function taking one argument (which must be a grouped column or otherwise constant within groups) plus a sorted group with exactly one column: =# SELECT (func(p) WITHIN GROUP (ORDER BY q)) from ... The motivating example for this (and the only ones in the spec) are percentile_cont and percentile_disc, to return a percentile result from a continuous or discrete distribution. (Thus percentile_cont(0.5) within group (order by x) is the spec's version of a median(x) function.) 2. A hypothetical set function taking N arguments of arbitrary types (a la VARIADIC any, rather than a fixed list) plus a sorted group with N columns of matching types: =# SELECT (func(p1,p2,...) WITHIN GROUP (ORDER BY q1,q2,...)) from ... (where typeof(p1)==typeof(q1) and so on, at least up to trivial conversions) The motivating example here is to be able to do rank(p1,p2,...) to return the rank that the specified values would have had if they were added to the group. As usual, we do not want to constrain ourselves to supporting only the specific cases in the spec, but would prefer a general solution. We (meaning myself and Atri) have an implementation that basically works, though it is not yet complete, but before taking it any further we need to resolve the design question of how to represent these two types of function in the system catalogs. The fact that there are in effect two parts to the parameter list, which are either independent (for inverse distribution funcs) or closely related (for hypothetical set functions), doesn't seem to point to an obvious way to represent this in pg_proc/pg_aggregate. I'm not yet satisfied with the method used in our implementation, so we're throwing this open for suggestions. We will post the work-in-progress patch along with a description of its current implementation shortly. One of the major complications is that we ideally want to be able to do polymorphism based on the type of the sorted group, specifically in order to be able to do percentile_disc(float8) within group (order by anyelement) returning anyelement. (i.e. we should be able to get a discrete percentile from any type that is orderable.) The question here is how to resolve the return type both of the aggregate itself and of the finalfn. We've also had an expression of interest in extending this to allow percentile_disc(float8[]) and percentile_cont(float8[]) returning arrays; e.g. percentile_cont(array[0, 0.25, 0.5, 0.75, 1]) to return an array containing the bounds, median and quartiles in one go. This is an extension to the spec but it seems sufficiently obviously useful to be worth supporting. Comments? -- Andrew (irc:RhodiumToad) -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
On 7/17/13 9:16 PM, Tatsuo Ishii wrote: Now suppose we have 3 transactions and each has following values: d(0) = 10 d(1) = 20 d(2) = 30 t(0) = 100 t(1) = 110 t(2) = 120 That says pgbench expects the duration 10 for each transaction. Actually, the first transaction runs slowly for some reason and the lag = 100 - 10 = 90. However, tx(1) and tx(2) are finished on schedule because they spend only 10 (110-10 = 10, 120-110 = 10). So the expected average lag would be 90/3 = 30. The clients are not serialized here in any significant way, even when they shared a single process/thread. I did many rounds of tracing through this code with timestamps on each line, and the sequence of events here will look like this: My example is for 1 client case. So concurrent clients are not the issue here. -- Tatsuo Ishii SRA OSS, Inc. Japan English: http://www.sraoss.co.jp/index_en.php Japanese: http://www.sraoss.co.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])
On Thursday, July 18, 2013 2:36 AM Alvaro Herrera wrote: Robert Haas escribió: This feature doesn't strike me as a good candidate for regression testing anyway. Keep in mind that people are intended to be able to run make installcheck against a deployed system without messing it up. This is my opinion as well. Summarization of all tests in this patch: 1. Set of SIGHUP parameters in config file, use reload and sleep and then check value of parameters to validate whether they are set. 2. Reset all parameters so that the system returns to previous state, use reload and sleep 3. Negative scenario tests As suggested by Robert that having timing based tests are not good candidate for regression tests, which essentially means that all tests as part of point 1 2 are not good candidates for testing. If tests corresponding to point 1 2 are to be removed, then keeping some negative tests also doesn't seem to make much sense. To cover Alter System functionality and code, we can have Alter System commands without reload and sleep, but I think it might not be of much use until we verify the same using Show. So I will remove the tests from this patch in next updated version if there is no other useful way to achieve it. On a side note, I had checked that in regression suite already few tests use pg_sleep() and one of them in stats.sql is something similar. It also does wait using pg_sleep() to let prior tests action get completed. I understand that having already at one place doesn't allow to use it at other places, but still I think that if some tests using similar mechanism are running successfully, we can add new one provided they ensure the timing doesn't cause problem. With Regards, Amit Kapila. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Fix pgstattuple/pgstatindex to use regclass-type as the argument
(2013/07/18 2:31), Fujii Masao wrote: On Tue, Jul 16, 2013 at 3:00 PM, Satoshi Nagayasu sn...@uptime.jp wrote: (2013/07/04 3:58), Fujii Masao wrote: For the test, I just implemented the regclass-version of pg_relpages() (patch attached) and tested some cases. But I could not get that problem. SELECT pg_relpages('hoge');-- OK SELECT pg_relpages(oid) FROM pg_class WHERE relname = 'hoge';-- OK SELECT pg_relpages(relname) FROM pg_class WHERE relname = 'hoge';-- OK In the attached patch, I cleaned up three functions to have two types of arguments for each, text and regclass. pgstattuple(text) pgstattuple(regclass) pgstatindex(text) pgstatindex(regclass) pg_relpages(text) pg_relpages(regclass) I still think a regclass argument is more appropriate for passing relation/index name to a function than text-type, but having both arguments in each function seems to be a good choice at this moment, in terms of backward-compatibility. Docs needs to be updated if this change going to be applied. Yes, please. Updated docs and code comments, etc. PFA. Any comments? 'make installcheck' failed in my machine. Hmm, it works on my box... Do we need to remove pgstattuple--1.1.sql and create pgstattuple--1.1--1.2.sql? +/* contrib/pgstattuple/pgstattuple--1.1.sql */ Typo: s/1.1/1.2 Done. You seem to have forgotten to update pgstattuple.c. Should I change something in pgstattuple.c? I just changed CREATE FUNCTION statement for pgstattuple to replace oid input arg with the regclass. Regards, -- Satoshi Nagayasu sn...@uptime.jp Uptime Technologies, LLC. http://www.uptime.jp diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile index fc893d8..957742a 100644 --- a/contrib/pgstattuple/Makefile +++ b/contrib/pgstattuple/Makefile @@ -4,7 +4,7 @@ MODULE_big = pgstattuple OBJS = pgstattuple.o pgstatindex.o EXTENSION = pgstattuple -DATA = pgstattuple--1.1.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql +DATA = pgstattuple--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql REGRESS = pgstattuple diff --git a/contrib/pgstattuple/expected/pgstattuple.out b/contrib/pgstattuple/expected/pgstattuple.out index ab28f50..eaba306 100644 --- a/contrib/pgstattuple/expected/pgstattuple.out +++ b/contrib/pgstattuple/expected/pgstattuple.out @@ -11,12 +11,24 @@ select * from pgstattuple('test'::text); 0 | 0 | 0 | 0 |0 | 0 | 0 | 0 |0 (1 row) +select * from pgstattuple('test'::name); + table_len | tuple_count | tuple_len | tuple_percent | dead_tuple_count | dead_tuple_len | dead_tuple_percent | free_space | free_percent +---+-+---+---+--++++-- + 0 | 0 | 0 | 0 |0 | 0 | 0 | 0 |0 +(1 row) + select * from pgstattuple('test'::regclass); table_len | tuple_count | tuple_len | tuple_percent | dead_tuple_count | dead_tuple_len | dead_tuple_percent | free_space | free_percent ---+-+---+---+--++++-- 0 | 0 | 0 | 0 |0 | 0 | 0 | 0 |0 (1 row) +select * from pgstattuple('test'::regclass::oid); + table_len | tuple_count | tuple_len | tuple_percent | dead_tuple_count | dead_tuple_len | dead_tuple_percent | free_space | free_percent +---+-+---+---+--++++-- + 0 | 0 | 0 | 0 |0 | 0 | 0 | 0 |0 +(1 row) + select * from pgstatindex('test_pkey'); version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation -+++---+++-+---+--+ diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c index 97f897e..41e90e3 100644 --- a/contrib/pgstattuple/pgstatindex.c +++ b/contrib/pgstattuple/pgstatindex.c @@ -39,12 +39,24 @@ #include utils/rel.h +/* + * Because of backward-compatibility issue, we have decided to have + * two types of interfaces, with regclass-type input arg and text-type + * input arg, for each function. + * + * Those functions which have text-type input arg will be depreciated + * in the future release. + */ extern Datum pgstatindex(PG_FUNCTION_ARGS); +extern Datum
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
On 7/17/13 11:34 PM, Tatsuo Ishii wrote: My example is for 1 client case. So concurrent clients are not the issue here. Sorry about that, with your clarification I see what you were trying to explain now. The code initializes the target time like this: thread-throttle_trigger = INSTR_TIME_GET_MICROSEC(start); And then each time a transaction fires, it advances the reference time forward based on the expected rate: thread-throttle_trigger += wait; It does *not* reset thread-throttle_trigger based on when the previous transaction ended / when the next transaction started. If the goal is 10us transaction times, it beats a steady drum saying the transactions should come at 10us, 20us, 30us (on average--there's some randomness in the goals). It does not pay any attention to when the previous transactions finished. That means that if an early transaction takes an extra 1000us, every transaction after that will also show as 1000us late--even if all of them take 10us. You expect that those later transactions will show 0 lag, since they took the right duration. For that to happen, thread-throttle_trigger would need to be re-initialized with the current time at the end of each completed transaction. The lag computation was not the interesting part of this feature to me. As I said before, I considered it more of a debugging level thing than a number people would analyze as much as you did. I understand why you don't like it though. If the reference time was moved forward to match the transaction end each time, I think that would give the lag definition you're looking for. That's fine to me too, if Fabien doesn't have a good reason to reject the idea. We would need to make sure that doesn't break some part of the design too. -- Greg Smith 2ndQuadrant USg...@2ndquadrant.com Baltimore, MD PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] pgindent behavior we could do without
It's always annoyed me that pgindent insists on adjusting vertical whitespace around #else and related commands. This has, for example, rendered src/include/storage/barrier.h nigh illegible: you get things like /* * lwsync orders loads with respect to each other, and similarly with stores. * But a load can be performed before a subsequent store, so sync must be used * for a full memory barrier. */ #define pg_memory_barrier() __asm__ __volatile__ (sync : : : memory) #define pg_read_barrier() __asm__ __volatile__ (lwsync : : : memory) #define pg_write_barrier() __asm__ __volatile__ (lwsync : : : memory) #elif defined(__alpha) || defined(__alpha__)/* Alpha */ which makes it look like this block of code has something to do with Alpha. By chance, I noticed today that this misbehavior comes from a discretely identifiable spot, to wit lines 289-290 in src/tools/pgindent/pgindent: # Remove blank line(s) before #else, #elif, and #endif $source =~ s!\n\n+(\#else|\#elif|\#endif)!\n$1!g; This seems pretty broken to me: why exactly is whitespace there such a bad idea? Not only that, but the next action is concerned with undoing some of the damage this rule causes: # Add blank line before #endif if it is the last line in the file $source =~ s!\n(#endif.*)\n\z!\n\n$1\n!; I assert that we should simply remove both of these bits of code, as just about every committer on the project is smarter about when to use vertical whitespace than this program is. Thoughts? regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] Re: Proposal/design feedback needed: WITHIN GROUP (sql standard ordered set aggregate functions)
On Thu, Jul 18, 2013 at 03:15:14AM +, Andrew Gierth wrote: The spec defines two types of aggregate function classed as ordered set function, as follows: 1. An inverse distribution function taking one argument (which must be a grouped column or otherwise constant within groups) plus a sorted group with exactly one column: =# SELECT (func(p) WITHIN GROUP (ORDER BY q)) from ... The motivating example for this (and the only ones in the spec) are percentile_cont and percentile_disc, to return a percentile result from a continuous or discrete distribution. (Thus percentile_cont(0.5) within group (order by x) is the spec's version of a median(x) function.) 2. A hypothetical set function taking N arguments of arbitrary types (a la VARIADIC any, rather than a fixed list) plus a sorted group with N columns of matching types: =# SELECT (func(p1,p2,...) WITHIN GROUP (ORDER BY q1,q2,...)) from ... (where typeof(p1)==typeof(q1) and so on, at least up to trivial conversions) The motivating example here is to be able to do rank(p1,p2,...) to return the rank that the specified values would have had if they were added to the group. As usual, we do not want to constrain ourselves to supporting only the specific cases in the spec, but would prefer a general solution. We (meaning myself and Atri) have an implementation that basically works, though it is not yet complete, but before taking it any further we need to resolve the design question of how to represent these two types of function in the system catalogs. The fact that there are in effect two parts to the parameter list, which are either independent (for inverse distribution funcs) or closely related (for hypothetical set functions), doesn't seem to point to an obvious way to represent this in pg_proc/pg_aggregate. I'm not yet satisfied with the method used in our implementation, What is that method? so we're throwing this open for suggestions. We will post the work-in-progress patch along with a description of its current implementation shortly. One of the major complications is that we ideally want to be able to do polymorphism based on the type of the sorted group, specifically in order to be able to do percentile_disc(float8) within group (order by anyelement) returning anyelement. (i.e. we should be able to get a discrete percentile from any type that is orderable.) The question here is how to resolve the return type both of the aggregate itself and of the finalfn. We've also had an expression of interest in extending this to allow percentile_disc(float8[]) and percentile_cont(float8[]) returning arrays; e.g. percentile_cont(array[0, 0.25, 0.5, 0.75, 1]) to return an array containing the bounds, median and quartiles in one go. This is an extension to the spec but it seems sufficiently obviously useful to be worth supporting. Comments? I'm really happy to see PostgreSQL come into its own when it comes to the analytics side of the house :) Cheers, David. -- David Fetter da...@fetter.org http://fetter.org/ Phone: +1 415 235 3778 AIM: dfetter666 Yahoo!: dfetter Skype: davidfetter XMPP: david.fet...@gmail.com iCal: webcal://www.tripit.com/feed/ical/people/david74/tripit.ics Remember to vote! Consider donating to Postgres: http://www.postgresql.org/about/donate -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [COMMITTERS] pgsql: Add support for REFRESH MATERIALIZED VIEW CONCURRENTLY.
On Wed, Jul 17, 2013 at 7:11 AM, Tom Lane t...@sss.pgh.pa.us wrote: Kevin Grittner kgri...@postgresql.org writes: Add support for REFRESH MATERIALIZED VIEW CONCURRENTLY. The buildfarm members that use -DCLOBBER_CACHE_ALWAYS say this patch is broken. Looks like rd_indpred is not correct if index relation is fresh. Something like this works for me. diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index edd34ff..46149ee 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -634,7 +634,7 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid) /* Skip partial indexes. */ indexRel = index_open(index-indexrelid, RowExclusiveLock); - if (indexRel-rd_indpred != NIL) + if (RelationGetIndexPredicate(indexRel) != NIL) { index_close(indexRel, NoLock); ReleaseSysCache(indexTuple); -- Hitoshi Harada -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
Sorry about that, with your clarification I see what you were trying to explain now. The code initializes the target time like this: thread-throttle_trigger = INSTR_TIME_GET_MICROSEC(start); And then each time a transaction fires, it advances the reference time forward based on the expected rate: thread-throttle_trigger += wait; It does *not* reset thread-throttle_trigger based on when the previous transaction ended / when the next transaction started. If the goal is 10us transaction times, it beats a steady drum saying the transactions should come at 10us, 20us, 30us (on average--there's some randomness in the goals). It does not pay any attention to when the previous transactions finished. That means that if an early transaction takes an extra 1000us, every transaction after that will also show as 1000us late--even if all of them take 10us. You expect that those later transactions will show 0 lag, since they took the right duration. For that to happen, thread-throttle_trigger would need to be re-initialized with the current time at the end of each completed transaction. Yes, that's exactly what I understand from the code. The lag computation was not the interesting part of this feature to me. As I said before, I considered it more of a debugging level thing than a number people would analyze as much as you did. I understand why you don't like it though. If the reference time was moved forward to match the transaction end each time, I think that would give the lag definition you're looking for. That's fine to me too, if Fabien doesn't have a good reason to reject the idea. We would need to make sure that doesn't break some part of the design too. I would like to hear from Fabien about the issue too. -- Tatsuo Ishii SRA OSS, Inc. Japan English: http://www.sraoss.co.jp/index_en.php Japanese: http://www.sraoss.co.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Fix pgstattuple/pgstatindex to use regclass-type as the argument
On Thu, Jul 18, 2013 at 9:40 AM, Satoshi Nagayasu sn...@uptime.jp wrote: (2013/07/18 2:31), Fujii Masao wrote: On Tue, Jul 16, 2013 at 3:00 PM, Satoshi Nagayasu sn...@uptime.jp wrote: (2013/07/04 3:58), Fujii Masao wrote: For the test, I just implemented the regclass-version of pg_relpages() (patch attached) and tested some cases. But I could not get that problem. SELECT pg_relpages('hoge');-- OK SELECT pg_relpages(oid) FROM pg_class WHERE relname = 'hoge'; -- OK SELECT pg_relpages(relname) FROM pg_class WHERE relname = 'hoge'; -- OK In the attached patch, I cleaned up three functions to have two types of arguments for each, text and regclass. pgstattuple(text) pgstattuple(regclass) pgstatindex(text) pgstatindex(regclass) pg_relpages(text) pg_relpages(regclass) I still think a regclass argument is more appropriate for passing relation/index name to a function than text-type, but having both arguments in each function seems to be a good choice at this moment, in terms of backward-compatibility. Docs needs to be updated if this change going to be applied. Yes, please. Updated docs and code comments, etc. PFA. Looks good. Any comments? 'make installcheck' failed in my machine. Hmm, it works on my box... Works for me too. Overall looks good to me. Do we need to remove pgstattuple--1.1.sql and create pgstattuple--1.1--1.2.sql? +/* contrib/pgstattuple/**pgstattuple--1.1.sql */ Typo: s/1.1/1.2 Done. You seem to have forgotten to update pgstattuple.c. Should I change something in pgstattuple.c? I just changed CREATE FUNCTION statement for pgstattuple to replace oid input arg with the regclass. Regards, -- Satoshi Nagayasu sn...@uptime.jp Uptime Technologies, LLC. http://www.uptime.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers Thanks, Rushabh Lathia
Re: [HACKERS] [COMMITTERS] pgsql: Add support for REFRESH MATERIALIZED VIEW CONCURRENTLY.
Hitoshi Harada umi.tan...@gmail.com writes: Looks like rd_indpred is not correct if index relation is fresh. Something like this works for me. - if (indexRel-rd_indpred != NIL) + if (RelationGetIndexPredicate(indexRel) != NIL) Hm, yeah, the direct access to rd_indpred is certainly wrong. Will apply, thanks! regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])
On Thursday, July 18, 2013 12:31 AM Alvaro Herrera wrote: Amit kapila escribió: +# This includes the default configuration directory included to support +# ALTER SYSTEM statement. +# +# WARNING: User should not remove below include_dir or directory config, +# as both are required to make ALTER SYSTEM command work. +# Any configuration parameter values specified after this line +# will override the values set by ALTER SYSTEM statement. +#include_dir = 'config' Why do we need to expose this setting to a user? a) This can be a knob to turn this feature off. This has been asked by few people, one of the mail link is mentioned below (refer towards end of mail in the link): http://www.postgresql.org/message-id/515b04f9.30...@gmx.net b) In case user wants to change priority of parameters set by Alter System, he can move the include_dir up or down in postgresql.conf. Both of these seem like they would make troubleshooters' lives more difficult. I think we should just parse the auto file automatically after parsing postgresql.conf, without requiring the include directive to be there. Okay, I shall modify the patch that way. However still the file will be in config directory. Now the 2 things regarding this needs to be taken care: a. As raised by Peter Eisentraut, shall we do parsing of autofile before or after postgresql.conf. By default I will keep it after until there is some reason to do it before. b. If the directory/file is missing while parsing issue warning message. Kindly let me know your opinion about the same. With Regards, Amit Kapila. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] Re: Proposal/design feedback needed: WITHIN GROUP (sql standard ordered set aggregate functions)
On Thu, Jul 18, 2013 at 10:02 AM, David Fetter da...@fetter.org wrote: On Thu, Jul 18, 2013 at 03:15:14AM +, Andrew Gierth wrote: The spec defines two types of aggregate function classed as ordered set function, as follows: 1. An inverse distribution function taking one argument (which must be a grouped column or otherwise constant within groups) plus a sorted group with exactly one column: =# SELECT (func(p) WITHIN GROUP (ORDER BY q)) from ... The motivating example for this (and the only ones in the spec) are percentile_cont and percentile_disc, to return a percentile result from a continuous or discrete distribution. (Thus percentile_cont(0.5) within group (order by x) is the spec's version of a median(x) function.) 2. A hypothetical set function taking N arguments of arbitrary types (a la VARIADIC any, rather than a fixed list) plus a sorted group with N columns of matching types: =# SELECT (func(p1,p2,...) WITHIN GROUP (ORDER BY q1,q2,...)) from ... (where typeof(p1)==typeof(q1) and so on, at least up to trivial conversions) The motivating example here is to be able to do rank(p1,p2,...) to return the rank that the specified values would have had if they were added to the group. As usual, we do not want to constrain ourselves to supporting only the specific cases in the spec, but would prefer a general solution. We (meaning myself and Atri) have an implementation that basically works, though it is not yet complete, but before taking it any further we need to resolve the design question of how to represent these two types of function in the system catalogs. The fact that there are in effect two parts to the parameter list, which are either independent (for inverse distribution funcs) or closely related (for hypothetical set functions), doesn't seem to point to an obvious way to represent this in pg_proc/pg_aggregate. I'm not yet satisfied with the method used in our implementation, What is that method? We currently represent ordered set functions with a new bool flag in pg_aggregate. The flag is set to true for ordered set functions(obviously) and false for all others. The currently implemented functions i.e. percentile_disc, percentile_cont and percentile_cont for intervals have their finalfns present in pg_aggregate. The aggregate functions take in two arguments, one for the percentile value and other for the input row set. So, percentile_cont's entry in pg_proc has float8 and float8 as its parameters and another entry of percentile_cont (with the interval version as the finalfn) has float8 and interval as its parameter types. As you can see, there isn't a way right now to resolve the return type of the aggregate for polymorphic cases. This is something we wish to resolve. Regards, Atri -- Regards, Atri l'apprenant -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)
Sorry about that, with your clarification I see what you were trying to explain now. The code initializes the target time like this: thread-throttle_trigger = INSTR_TIME_GET_MICROSEC(start); And then each time a transaction fires, it advances the reference time forward based on the expected rate: thread-throttle_trigger += wait; It does *not* reset thread-throttle_trigger based on when the previous transaction ended / when the next transaction started. If the goal is 10us transaction times, it beats a steady drum saying the transactions should come at 10us, 20us, 30us (on average--there's some randomness in the goals). It does not pay any attention to when the previous transactions finished. That means that if an early transaction takes an extra 1000us, every transaction after that will also show as 1000us late--even if all of them take 10us. You expect that those later transactions will show 0 lag, since they took the right duration. For that to happen, thread-throttle_trigger would need to be re-initialized with the current time at the end of each completed transaction. Yes, that's exactly what I understand from the code. The lag computation was not the interesting part of this feature to me. As I said before, I considered it more of a debugging level thing than a number people would analyze as much as you did. I understand why you don't like it though. If the reference time was moved forward to match the transaction end each time, I think that would give the lag definition you're looking for. That's fine to me too, if Fabien doesn't have a good reason to reject the idea. We would need to make sure that doesn't break some part of the design too. I would like to hear from Fabien about the issue too. For your information, included is the patch against git master head to implement the lag in a way what I proposed. With the patch, I get more consistent number on Linux (and Mac OS X). -- Tatsuo Ishii SRA OSS, Inc. Japan English: http://www.sraoss.co.jp/index_en.php Japanese: http://www.sraoss.co.jp diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c index 2ad8f0b..57e62dc 100644 --- a/contrib/pgbench/pgbench.c +++ b/contrib/pgbench/pgbench.c @@ -137,6 +137,12 @@ int unlogged_tables = 0; double sample_rate = 0.0; /* + * When threads are throttled to a given rate limit, this is the target delay + * to reach that rate in usec. 0 is the default and means no throttling. + */ +int64 throttle_delay = 0; + +/* * tablespace selection */ char *tablespace = NULL; @@ -202,11 +208,15 @@ typedef struct int listen; /* 0 indicates that an async query has been * sent */ int sleeping; /* 1 indicates that the client is napping */ + boolthrottling; /* whether nap is for throttling */ int64 until; /* napping until (usec) */ + int64 wait; /* randomly generated delay (usec) */ Variable *variables; /* array of variable definitions */ int nvariables; instr_time txn_begin; /* used for measuring transaction latencies */ + instr_time txn_begin_throttle; /* tx start time used when transaction throttling enabled */ instr_time stmt_begin; /* used for measuring statement latencies */ + bool is_throttled; /* whether transaction throttling is done */ int use_file; /* index in sql_files for this client */ bool prepared[MAX_FILES]; } CState; @@ -224,6 +234,9 @@ typedef struct instr_time *exec_elapsed; /* time spent executing cmds (per Command) */ int *exec_count; /* number of cmd executions (per Command) */ unsigned short random_state[3]; /* separate randomness for each thread */ + int64 throttle_trigger; /* previous/next throttling (us) */ + int64 throttle_lag; /* total transaction lag behind throttling */ + int64 throttle_lag_max; /* max transaction lag */ } TState; #define INVALID_THREAD ((pthread_t) 0) @@ -232,6 +245,8 @@ typedef struct { instr_time conn_time; int xacts; + int64 throttle_lag; + int64 throttle_lag_max; } TResult; /* @@ -356,6 +371,7 @@ usage(void) -N, --skip-some-updates skip updates of pgbench_tellers and pgbench_branches\n -P, --progress=NUM show thread progress report every NUM seconds\n -r, --report-latencies report average latency per command\n + -R, --rate=SPEC target rate in transactions per second\n -s, --scale=NUM report this scale factor in output\n -S, --select-onlyperform SELECT-only transactions\n -t, --transactions number of transactions each client runs @@ -898,19 +914,80 @@ doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile, AggVa { PGresult *res; Command **commands; + booltrans_needs_throttle = false; top: commands = sql_files[st-use_file]; + /* + * Handle throttling once per transaction by sleeping. It is simpler + * to do this here rather than at