Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Fabien COELHO



To clarify what state this is all in: Fabien's latest
pgbench-throttle-v15.patch is the ready for a committer version.  The
last two revisions are just tweaking the comments at this point, and
his version is more correct than my last one.


Got it. I will take care of this.


Please find attached an updated version which solves conflicts introduced 
by the progress patch.


--
Fabien.diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index 23ee53c..4111e8c 100644
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -137,6 +137,12 @@ int			unlogged_tables = 0;
 double		sample_rate = 0.0;
 
 /*
+ * When threads are throttled to a given rate limit, this is the target delay
+ * to reach that rate in usec.  0 is the default and means no throttling.
+ */
+int64		throttle_delay = 0;
+
+/*
  * tablespace selection
  */
 char	   *tablespace = NULL;
@@ -202,11 +208,13 @@ typedef struct
 	int			listen;			/* 0 indicates that an async query has been
  * sent */
 	int			sleeping;		/* 1 indicates that the client is napping */
+	boolthrottling; /* whether nap is for throttling */
 	int64		until;			/* napping until (usec) */
 	Variable   *variables;		/* array of variable definitions */
 	int			nvariables;
 	instr_time	txn_begin;		/* used for measuring transaction latencies */
 	instr_time	stmt_begin;		/* used for measuring statement latencies */
+	bool		is_throttled;	/* whether transaction throttling is done */
 	int			use_file;		/* index in sql_files for this client */
 	bool		prepared[MAX_FILES];
 } CState;
@@ -224,6 +232,9 @@ typedef struct
 	instr_time *exec_elapsed;	/* time spent executing cmds (per Command) */
 	int		   *exec_count;		/* number of cmd executions (per Command) */
 	unsigned short random_state[3];		/* separate randomness for each thread */
+	int64   throttle_trigger; 	/* previous/next throttling (us) */
+	int64   throttle_lag; 		/* total transaction lag behind throttling */
+	int64   throttle_lag_max; 	/* max transaction lag */
 } TState;
 
 #define INVALID_THREAD		((pthread_t) 0)
@@ -232,6 +243,8 @@ typedef struct
 {
 	instr_time	conn_time;
 	int			xacts;
+	int64   throttle_lag;
+	int64   throttle_lag_max;
 } TResult;
 
 /*
@@ -356,6 +369,7 @@ usage(void)
 		 -N, --skip-some-updates  skip updates of pgbench_tellers and pgbench_branches\n
 		 -P, --progress NUM   show thread progress report every NUM seconds\n
 		 -r, --report-latencies   report average latency per command\n
+		 -R, --rate SPEC  target rate in transactions per second\n
 		 -s, --scale=NUM  report this scale factor in output\n
 		 -S, --select-onlyperform SELECT-only transactions\n
 		 -t, --transactions   number of transactions each client runs 
@@ -898,17 +912,62 @@ doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile, AggVa
 {
 	PGresult   *res;
 	Command   **commands;
+	booltrans_needs_throttle = false;
 
 top:
 	commands = sql_files[st-use_file];
 
+	/*
+	 * Handle throttling once per transaction by sleeping.  It is simpler
+	 * to do this here rather than at the end, because so much complicated
+	 * logic happens below when statements finish.
+	 */
+	if (throttle_delay  ! st-is_throttled)
+	{
+		/*
+		 * Use inverse transform sampling to randomly generate a delay, such
+		 * that the series of delays will approximate a Poisson distribution
+		 * centered on the throttle_delay time.
+ *
+ * 1000 implies a 6.9 (-log(1/1000)) to 0.0 (log 1.0) delay multiplier.
+		 *
+		 * If transactions are too slow or a given wait is shorter than
+		 * a transaction, the next transaction will start right away.
+		 */
+		int64 wait = (int64)
+			throttle_delay * -log(getrand(thread, 1, 1000)/1000.0);
+
+		thread-throttle_trigger += wait;
+
+		st-until = thread-throttle_trigger;
+		st-sleeping = 1;
+		st-throttling = true;
+		st-is_throttled = true;
+		if (debug)
+			fprintf(stderr, client %d throttling INT64_FORMAT us\n,
+	st-id, wait);
+	}
+
 	if (st-sleeping)
 	{			/* are we sleeping? */
 		instr_time	now;
+		int64 now_us;
 
 		INSTR_TIME_SET_CURRENT(now);
-		if (st-until = INSTR_TIME_GET_MICROSEC(now))
+		now_us = INSTR_TIME_GET_MICROSEC(now);
+		if (st-until = now_us)
+		{
 			st-sleeping = 0;	/* Done sleeping, go ahead with next command */
+			if (st-throttling)
+			{
+/* Measure lag of throttled transaction relative to target */
+int64 lag = now_us - st-until;
+thread-throttle_lag += lag;
+if (lag  thread-throttle_lag_max)
+	thread-throttle_lag_max = lag;
+st-throttling = false;
+			}
+		}
 		else
 			return true;		/* Still sleeping, nothing to do here */
 	}
@@ -1095,6 +1154,15 @@ top:
 			st-state = 0;
 			st-use_file = (int) getrand(thread, 0, num_files - 1);
 			commands = sql_files[st-use_file];
+			st-is_throttled = false;
+			/*
+			 * No transaction is underway anymore, which means there is nothing
+			 * to 

Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Tatsuo Ishii
 To clarify what state this is all in: Fabien's latest
 pgbench-throttle-v15.patch is the ready for a committer version.  The
 last two revisions are just tweaking the comments at this point, and
 his version is more correct than my last one.

 Got it. I will take care of this.
 
 Please find attached an updated version which solves conflicts
 introduced by the progress patch.

Thanks, but I already solved the conflict and fixed some minor
indentation issues. Now I have question regarding the function.

./pgbench -p 5433 -S -T 10 -R 1 test
starting vacuum...end.
transaction type: SELECT only
scaling factor: 1
query mode: simple
number of clients: 1
number of threads: 1
duration: 10 s
number of transactions actually processed: 71339
average rate limit lag: 862.534 ms (max 2960.913 ms)
tps = 7133.745911 (including connections establishing)
tps = 7135.130810 (excluding connections establishing)

What does average rate limit lag mean? From the manual:

-R rate
--rate rate

Execute transactions targeting the specified rate instead of
running as fast as possible (the default). The rate is given in
transactions per second. If the targeted rate is above the maximum
possible rate these transactions can execute at, the rate limit
won't have any impact on results. The rate is targeted by starting
transactions along a Poisson-distributed event time line. When a
rate limit is active, the average and maximum transaction lag time
(the delay between the scheduled and actual transaction start
times) are reported in ms. High values indicate that the database
could not handle the scheduled load at some time.

So in my understanding the number shows the delay time before *each*
transaction starts. If my understanding is correct, why

71339 (total transactions) * 862.534 ms = 61532 sec

could exceed 10 seconds, which is the total run time?

Also I noticed small bug.

./pgbench -R 0 test
invalid rate limit: 0

Shouldn't this be treated as if -R is not specified? Actually in the program:

/*
 * When threads are throttled to a given rate limit, this is the target delay
 * to reach that rate in usec.  0 is the default and means no throttling.
 */
int64   throttle_delay = 0;

So it seems treating -R 0 means no throttling makes more sense to me.
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Adding optionally commit number in PG_VERSION_STR

2013-07-17 Thread Erik Rijkers
On Wed, July 17, 2013 05:27, Tom Lane wrote:
 Michael Paquier michael.paqu...@gmail.com writes:
 It happens that I work occasionally on multiple builds based on


FWIW, I've been doing this for while:


where $project is mostly the patchname, $commit_hash is extracted like this:

commit_hash=$( cd /home/aardvark/pg_stuff/git/master; git log | head -n 1 | cut 
--delimiter=  -f 2 );

version_string=${project}-${db_timestamp}-${commit_hash};

perl -i.original -ne 

s,(PACKAGE_.*[[:digit:]]+\.[[:digit:]]+(?:\.[[:digit:]]+)?(?:devel)?(?:(?:alpha|beta|rc)[[:digit:]]+)?),\\1-${version_string},;
  print; configure


Which will then give:

PostgreSQL 9.4devel-HEAD-20130717_0828-ffcf654547ef38555203e6d716f47b7065a0a87d 
on x86_64-unknown-linux-gnu, compiled by
gcc (GCC) 4.8.1, 64-bit


Btw, in an even more ugly hack I also stick some of that same config data in 
information_schema.sql_packages, with a
feature_id = 100:

select * from information_schema.sql_packages where cast(substring(feature_id 
from E'^PKG([[:digit:]]+)') as integer) = 100
 feature_id |feature_name| is_supported | is_verified_by |  
 comments
++--++--
 PKG100 | project name   | YES  | ej | HEAD
 PKG101 | patched| NO   | ej | NO
 PKG103 | build time | YES  | ej | 2013-07-17 
08:32:03.400521+02
 PKG104 | server_version | YES  | ej |
9.4devel-HEAD-20130717_0828-ffcf654547ef38555203e6d716f47b7065a0a87d
 PKG105 | server_version_num | YES  | ej | 90400
 PKG106 | port   | YES  | ej | 6544
 PKG110 | commit hash| YES  | ej | 
ffcf654547ef38555203e6d716f47b7065a0a87d
 PKG111 | catversion | YES  | ej | 201307161
 PKG112 | control version| YES  | ej | 937
(9 rows)



Needless to say, YMMV ...


Thanks,

Erik Rijkers





-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Cube extension point support // GSoC'13

2013-07-17 Thread Alexander Korotkov
On Fri, Jul 12, 2013 at 3:57 PM, Stas Kelvich stas.kelv...@gmail.comwrote:

 Hello.

 here is a patch adding to cube extension support for compressed
 representation of point cubes. If cube is a point, i.e. has coincident
 lower left and upper right corners, than only one corner is stored. First
 bit of the cube header indicates whether the cube is point or not. Few
 moments:

 * Patch preserves binary compatibility with old indices


New representation of points will work in both index and heap. So, we
should speak about just compatibility with old cubes.


 * All functions that create cubes from user input, check whether it is a
 point or not
 * All internal functions that can return cubes takes care of all cases
 where a cube might become a point
 * Added tests for checking correct point behavior

 Also this patch includes adapted Alexander Korotkov's patch with kNN-based
 ordering operator, which he wrote for postgresql-9.0beta1 with knngist
 patch. More info there
 http://www.postgresql.org/message-id/aanlktimhfaq6hcibrnk0tlcqmiyhywhwaq2zd87wb...@mail.gmail.com


I think ordering operator should be extracted into separated patch together
with another ordering operators of your project.


Patch contains some formatting issues. For example, this comment

/* Point can arise in two cases:
   1) When argument is point and r == 0
   2) When all coordinates was set to their averages */

should contain star sign on the beginning of each line. Also it will be
reflowed by pgindent. Correct formatting for this comment should look like
this:

/*--
 * Point can arise in two cases:
 * 1) When argument is point and r == 0
 * 2) When all coordinates was set to their averages
 */

See coding convention for details:
http://www.postgresql.org/docs/current/static/source-format.html

--
With best regards,
Alexander Korotkov.


Re: [HACKERS] Adding optionally commit number in PG_VERSION_STR

2013-07-17 Thread Dave Page
On Wed, Jul 17, 2013 at 2:55 AM, Michael Paquier
michael.paqu...@gmail.com wrote:
 Hi all,

 It happens that I work occasionally on multiple builds based on
 different stable branches at the same time to check fixes that need to
 be backpatched, and I tend to easily lose track on which version the
 build I created is based on (Duh!). There is of course the version
 number up to the 3rd digit available (for example 9.2.4, 9.3beta2,
 etc.), but as a developer I think that it would be helpful to include
 the commit ID in PG_VERSION_STR to get a better reference on exactly
 what the development build is based on. This could be controlled by an
 additional flag in ./configure.in called something like
 --enable-version-commit, of course disabled by default. If enabled,
 PG_VERSION_STR would be generated with the new information. configure
 would also return an error when this flag is enabled if git is either
 not found, or if the repository where configure is not a native git
 repository.

FYI, we include the output from git describe --always in the pgAdmin
version meta info, which is displayed on the About box along with the
regular version info. That has proven to be extremely useful in the
past, particularly during QA where people may be testing snapshot
builds.

--
Dave Page
Blog: http://pgsnake.blogspot.com
Twitter: @pgsnake

EnterpriseDB UK: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Fabien COELHO


Hello Tatsuo,


Now I have question regarding the function.

./pgbench -p 5433 -S -T 10 -R 1 test
tps = 7133.745911 (including connections establishing)

What does average rate limit lag mean? From the manual:
[...]
So in my understanding the number shows the delay time before *each*
transaction starts.


... with respect to the schedule time assigned by the rate-limiting 
stochastic process. This is to detect that rate limiting does not work 
properly.


If my understanding is correct, why 71339 (total transactions) * 862.534 
ms = 61532 sec could exceed 10 seconds, which is the total run time?


It is possible, because each transaction is far behind schedule, and you 
cumulate the lateness.


Say you have transactions schedules every 0.1 second, but they take 2 
second to complete:


 1. scheduled at 0.0, start at 0.0
 2. scheduled at 0.1, start at 2.0, 1.9 second lag
 3. scheduled at 0.2, start at 4.0, 3.8 second lag, cumulative lag 5.7 s
 4. scheduled at 0.3, start at 6.0, 5.7 second lag, cumulative lag 11.4 s
 5. scheduled at 0.4, start at 8.0, 7.6 second lag, cumulative lag 19.0 s
 6. scheduled at 0.5, never starts

If we stop at 10.0 seconds, 5 transaction have been processed, the average 
lag is about 3.8 seconds, the cumulative lag is 19.0 seconds. The lag of a 
given transaction can cover lag from previous ones.


Basically, if the lag is anything but small, it means that the database 
cannot handle the load and that something is amiss. In your example you 
required 1 tps, but the database can only handle 7000 tps.


Note that the database could catchup at some point, say it usually can 
handle more that 1 tps, but while the database dump is running it 
falls far behing schedule, and then one the dump is done it goes back to 
nominal and late transactions are finally processed. The max lag would 
show that something was amiss during the bench, even if the average lag

is quite low.


Also I noticed small bug.

./pgbench -R 0 test
invalid rate limit: 0

Shouldn't this be treated as if -R is not specified? Actually in the program:

/*
* When threads are throttled to a given rate limit, this is the target delay
* to reach that rate in usec.  0 is the default and means no throttling.
*/
int64   throttle_delay = 0;

So it seems treating -R 0 means no throttling makes more sense to me.


Note that the rate is expressed in tps which make sense to users, but the 
internal variable is in usec which is more useful for scheduling, and is 
the inverse of the other.


So -R 0 would mean zero tps, that is an infinite delay, but a 0 delay 
would require an infinite tps. As requiring 0 tps does not make sense, I 
decided to disable that. If you really fill that -R 0 should mean 
disable the feature, I'm fine with that, but this is not exactly logical 
wrt tps.


--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Tatsuo Ishii
 Hello Tatsuo,
 
 Now I have question regarding the function.

 ./pgbench -p 5433 -S -T 10 -R 1 test
 tps = 7133.745911 (including connections establishing)

 What does average rate limit lag mean? From the manual:
 [...]
 So in my understanding the number shows the delay time before *each*
 transaction starts.
 
 ... with respect to the schedule time assigned by the rate-limiting
 stochastic process. This is to detect that rate limiting does not work
 properly.
 
 If my understanding is correct, why 71339 (total transactions) *
 862.534 ms = 61532 sec could exceed 10 seconds, which is the total run
 time?
 
 It is possible, because each transaction is far behind schedule, and
 you cumulate the lateness.
 
 Say you have transactions schedules every 0.1 second, but they take 2
 second to complete:
 
  1. scheduled at 0.0, start at 0.0
  2. scheduled at 0.1, start at 2.0, 1.9 second lag
  3. scheduled at 0.2, start at 4.0, 3.8 second lag, cumulative lag 5.7
  s
  4. scheduled at 0.3, start at 6.0, 5.7 second lag, cumulative lag 11.4
  s
  5. scheduled at 0.4, start at 8.0, 7.6 second lag, cumulative lag 19.0
  s
  6. scheduled at 0.5, never starts
 
 If we stop at 10.0 seconds, 5 transaction have been processed, the
 average lag is about 3.8 seconds, the cumulative lag is 19.0
 seconds. The lag of a given transaction can cover lag from previous
 ones.
 
 Basically, if the lag is anything but small, it means that the
 database cannot handle the load and that something is amiss. In your
 example you required 1 tps, but the database can only handle 7000
 tps.
 
 Note that the database could catchup at some point, say it usually can
 handle more that 1 tps, but while the database dump is running it
 falls far behing schedule, and then one the dump is done it goes back
 to nominal and late transactions are finally processed. The max lag
 would show that something was amiss during the bench, even if the
 average lag
 is quite low.

Thanks for detailed explainations. I now understand the function.

 Also I noticed small bug.

 ./pgbench -R 0 test
 invalid rate limit: 0

 Shouldn't this be treated as if -R is not specified? Actually in the
 program:

 /*
 * When threads are throttled to a given rate limit, this is the target
 * delay
 * to reach that rate in usec.  0 is the default and means no throttling.
 */
 int64throttle_delay = 0;

 So it seems treating -R 0 means no throttling makes more sense to
 me.
 
 Note that the rate is expressed in tps which make sense to users, but
 the internal variable is in usec which is more useful for scheduling,
 and is the inverse of the other.
 
 So -R 0 would mean zero tps, that is an infinite delay, but a 0 delay
 would require an infinite tps. As requiring 0 tps does not make sense,
 I decided to disable that. If you really fill that -R 0 should mean
 disable the feature, I'm fine with that, but this is not exactly
 logical wrt tps.

Ok, your statement seems to be fair. Unless someone complains the
point, I leave it as it is.

I'm going to test your patches on Mac OS X and Windows.
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Patch to add regression tests for SCHEMA

2013-07-17 Thread Fabien COELHO


I've looked this version.

The only reservation I have is that when changing the owner of a schema, 
the new owner is not always checked. I would suggest to query the new 
owner to check that it matches (5, 11, 12), just as you do in 3.


Also, reowning is tested several times (5, 11, 12). I would suggest to 
remove 12 which does not bring much new things after both 5 and 11 get 
passed ?


Otherwise the patch applies (with a minor warning about spaces on line 33) 
passes for me, and brings valuable new test coverage.


--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Greg Smith

On 7/17/13 2:31 AM, Tatsuo Ishii wrote:

./pgbench -p 5433 -S -T 10 -R 1 test
average rate limit lag: 862.534 ms (max 2960.913 ms)
tps = 7133.745911 (including connections establishing)
tps = 7135.130810 (excluding connections establishing)

What does average rate limit lag mean?


The whole concept of lag with the rate limit is complicated.  At one 
point I thought this should be a debugging detail, rather than exposing 
the user to it.


The problem is that if you do that, you might not notice that your limit 
failed to work as expected.  Maybe it's good enough in a case like this 
that the user will see they tried to limit at 1, but they only got 
7135, so something must not have worked as expected.


Tatsuo:  most of my tests were on Mac OS and Linux, I actually tested 
the Mac version a lot more than any other here.  I didn't do any testing 
on Windows.


--
Greg Smith   2ndQuadrant USg...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Fabien COELHO



Thanks for detailed explainations. I now understand the function.


Good. I've looked into the documentation. I'm not sure how I could improve 
it significantly without adding a lot of text which would also add a lot 
of confusion to the casual reader.



I'm going to test your patches on Mac OS X and Windows.


Great! I cannot do that.

--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Fabien COELHO



The whole concept of lag with the rate limit is complicated.


I must agree on that point, their interpretation is subtle.

At one point I thought this should be a debugging detail, rather than 
exposing the user to it. The problem is that if you do that, you might 
not notice that your limit failed to work as expected.  Maybe it's good 
enough in a case like this that the user will see they tried to limit at 
1, but they only got 7135, so something must not have worked as 
expected.


Yep. As I suggested in answering to Tatsuo, the process can catch up 
later, so you could have 1 in the end even with something amiss.


--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Tatsuo Ishii
 The whole concept of lag with the rate limit is complicated.
 
 I must agree on that point, their interpretation is subtle.
 
 At one point I thought this should be a debugging detail, rather than
 exposing the user to it. The problem is that if you do that, you might
 not notice that your limit failed to work as expected.  Maybe it's
 good enough in a case like this that the user will see they tried to
 limit at 1, but they only got 7135, so something must not have
 worked as expected.
 
 Yep. As I suggested in answering to Tatsuo, the process can catch up
 later, so you could have 1 in the end even with something amiss.

Fabian,

I did another case. First, I run pgbench without -R.

$ ./pgbench -p 5433 -S -n -c 10 -T 300 test
./pgbench -p 5433 -S -n -c 10 -T 300 test
transaction type: SELECT only
scaling factor: 1
query mode: simple
number of clients: 10
number of threads: 1
duration: 300 s
number of transactions actually processed: 2945652
tps = 9818.741060 (including connections establishing)
tps = 9819.389689 (excluding connections establishing)

So I thought I could squeeze 1 TPS from my box.
Then I tried with -R 5000 tps.

$ ./pgbench -p 5433 -S -n -c 10 -T 300 -R 5000 test
./pgbench -p 5433 -S -n -c 10 -T 300 -R 5000 test
transaction type: SELECT only
scaling factor: 1
query mode: simple
number of clients: 10
number of threads: 1
duration: 300 s
number of transactions actually processed: 1510640
average rate limit lag: 0.304 ms (max 19.101 ms)
tps = 5035.409397 (including connections establishing)
tps = 5035.731093 (excluding connections establishing)

As you can see, I got about 5000 tps as expected. But I'm confused by
the lag:

0.304 ms * 1510640 = 459.2 seconds, which is longer than 300 seconds
(specified by -T). Am I missing something?
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Tatsuo Ishii
 The whole concept of lag with the rate limit is complicated.
 
 I must agree on that point, their interpretation is subtle.
 
 At one point I thought this should be a debugging detail, rather than
 exposing the user to it. The problem is that if you do that, you might
 not notice that your limit failed to work as expected.  Maybe it's
 good enough in a case like this that the user will see they tried to
 limit at 1, but they only got 7135, so something must not have
 worked as expected.
 
 Yep. As I suggested in answering to Tatsuo, the process can catch up
 later, so you could have 1 in the end even with something amiss.
 
 Fabian,
 
 I did another case. First, I run pgbench without -R.
 
 $ ./pgbench -p 5433 -S -n -c 10 -T 300 test
 ./pgbench -p 5433 -S -n -c 10 -T 300 test
 transaction type: SELECT only
 scaling factor: 1
 query mode: simple
 number of clients: 10
 number of threads: 1
 duration: 300 s
 number of transactions actually processed: 2945652
 tps = 9818.741060 (including connections establishing)
 tps = 9819.389689 (excluding connections establishing)
 
 So I thought I could squeeze 1 TPS from my box.
 Then I tried with -R 5000 tps.
 
 $ ./pgbench -p 5433 -S -n -c 10 -T 300 -R 5000 test
 ./pgbench -p 5433 -S -n -c 10 -T 300 -R 5000 test
 transaction type: SELECT only
 scaling factor: 1
 query mode: simple
 number of clients: 10
 number of threads: 1
 duration: 300 s
 number of transactions actually processed: 1510640
 average rate limit lag: 0.304 ms (max 19.101 ms)
 tps = 5035.409397 (including connections establishing)
 tps = 5035.731093 (excluding connections establishing)
 
 As you can see, I got about 5000 tps as expected. But I'm confused by
 the lag:
 
 0.304 ms * 1510640 = 459.2 seconds, which is longer than 300 seconds
 (specified by -T). Am I missing something?

BTW, the system was Linux (kernel 3.0.77).

Now I tried on Mac OS X.

$ pgbench -S -n -c 10 -T 10  test
transaction type: SELECT only
scaling factor: 1
query mode: simple
number of clients: 10
number of threads: 1
duration: 10 s
number of transactions actually processed: 67333
tps = 6730.940132 (including connections establishing)
tps = 6751.078966 (excluding connections establishing)

$ pgbench -S -n -c 10 -T 10  -R 3000 test
transaction type: SELECT only
scaling factor: 1
query mode: simple
number of clients: 10
number of threads: 1
duration: 10 s
number of transactions actually processed: 29840
average rate limit lag: 0.089 ms (max 27.301 ms)
tps = 2983.707895 (including connections establishing)
tps = 2991.919611 (excluding connections establishing)

0.089 ms * 29840 = 2.66 seconds. Not too bad compared with 10
seconds. On Linux maybe the overhead to calculate the lag is bigger
than Mac OS X? Just my wild guess though...
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] XLogInsert scaling, revisited

2013-07-17 Thread Amit Kapila
On Monday, July 08, 2013 2:47 PM Heikki Linnakangas wrote:  
 Ok, I've committed this patch now. Finally, phew!

Few doubts while reading the code:

1. Why in function WALInsertSlotAcquireOne(int slotno), it does
START_CRIT_SECTION() to
   Lock out cancel/die interrupts, whereas other places call
HOLD_INTERRUPTS()

2. In function GetXLogBuffer(), why the logic to wakeup waiters is
different when expectedEndPtr != endptr;
  When the wakeupwaiters is done in case expectedEndPtr == endptr?

3.
static bool
ReserveXLogSwitch(..)

In above function header, why EndPos_p/StartPos_p is used when
function arguments are EndPos/StartPos?


With Regards,
Amit Kapila.



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Improvement of checkpoint IO scheduler for stable transaction responses

2013-07-17 Thread Greg Smith

On 7/16/13 11:36 PM, Ants Aasma wrote:

As you know running a full suite of write benchmarks takes a very long
time, with results often being inconclusive (noise is greater than
effect we are trying to measure).


I didn't say that.  What I said is that over a full suite of write 
benchmarks, the effect of changes like this has always averaged out to 
zero.  You should try it sometime.  Then we can have a useful discussion 
of non-trivial results instead of you continuing to tell me I don't 
understand things.


--
Greg Smith   2ndQuadrant USg...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Improvement of checkpoint IO scheduler for stable transaction responses

2013-07-17 Thread Amit Kapila
On Tuesday, July 16, 2013 10:16 PM Ants Aasma wrote:
 On Jul 14, 2013 9:46 PM, Greg Smith g...@2ndquadrant.com wrote:
  I updated and re-reviewed that in 2011:
 http://www.postgresql.org/message-id/4d31ae64.3000...@2ndquadrant.com
 and commented on why I think the improvement was difficult to reproduce
 back then.  The improvement didn't follow for me either.  It would take
 a really amazing bit of data to get me to believe write sorting code is
 worthwhile after that.  On large systems capable of dirtying enough
 blocks to cause a problem, the operating system and RAID controllers
 are already sorting block.  And *that* sorting is also considering
 concurrent read requests, which are a lot more important to an
 efficient schedule than anything the checkpoint process knows about.
 The database doesn't have nearly enough information yet to compete
 against OS level sorting.
 
 That reasoning makes no sense. OS level sorting can only see the
 writes in the time window between PostgreSQL write, and being forced
 to disk. Spread checkpoints sprinkles the writes out over a long
 period and the general tuning advice is to heavily bound the amount of
 memory the OS willing to keep dirty. This makes probability of
 scheduling adjacent writes together quite low, the merging window
 being limited either by dirty_bytes or dirty_expire_centisecs. The
 checkpointer has the best long term overview of the situation here, OS
 scheduling only has the short term view of outstanding read and write
 requests. By sorting checkpoint writes it is much more likely that
 adjacent blocks are visible to OS writeback at the same time and will
 be issued together.

I think Oracle also use similar concept for making writes efficient, and
they have patent also for this technology which you can find at below link:
http://www.google.com/patents/US7194589?dq=645987hl=ensa=Xei=kn7mUZ-PIsWq
rAe99oDgBwsqi=2pjf=1ved=0CEcQ6AEwAw

Although Oracle has different concept for performing checkpoint writes, but
I thought of sharing the above link with you, so that unknowingly we should
not go into wrong path. 

AFAIK instead of depending on OS buffers, they use direct I/O and infact in
the patent above they are using temporary buffer (Claim 3) to sort the
writes which is not the same idea as far as I can understand by reading
above thread.

With Regards,
Amit Kapila.



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Fabien COELHO



tps = 9818.741060 (including connections establishing)

So I thought I could squeeze 1 TPS from my box.
Then I tried with -R 5000 tps.

number of transactions actually processed: 1510640
average rate limit lag: 0.304 ms (max 19.101 ms)
tps = 5035.409397 (including connections establishing)

As you can see, I got about 5000 tps as expected.


Yep, it works:-)


But I'm confused by the lag:

0.304 ms * 1510640 = 459.2 seconds, which is longer than 300 seconds
(specified by -T). Am I missing something?


The lag is reasonnable, althought no too good. One transaction is about 
1.2 ms, the lag is much smaller than that, and you are at about 50% of the 
maximum load. I've got similar figures on my box for such settings. It 
improves if your reduce the number of clients.


If you reduce the number of clients, or add more threads, the lag is 
reduced.



BTW, the system was Linux (kernel 3.0.77).



tps = 6730.940132 (including connections establishing)
$ pgbench -S -n -c 10 -T 10  -R 3000 test



average rate limit lag: 0.089 ms (max 27.301 ms)
tps = 2983.707895 (including connections establishing)

0.089 ms * 29840 = 2.66 seconds. Not too bad compared with 10
seconds.


Indeed, that is better. Transactions are about 1.5 ms and you run at about 
45% of the maximum load here.



On Linux maybe the overhead to calculate the lag is bigger
than Mac OS X? Just my wild guess though...


I would be surprised that this would be the issue is to compute the 
measure, compared to network connections and the like. With -S the bench 
is cpu bound. Possibly a better scheduler/thread management on OSX? Or 
more available cores? Well, I do not know! At high load with clients 
running on the same box as the server, and with more clients  server than 
available cores, there is a lot of competition between processes, and 
between clients that share a unique thread, and a log context switching 
whoch will result in a measured lag.


--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Adding optionally commit number in PG_VERSION_STR

2013-07-17 Thread Michael Paquier
On Wed, Jul 17, 2013 at 5:20 PM, Dave Page dp...@pgadmin.org wrote:
 On Wed, Jul 17, 2013 at 2:55 AM, Michael Paquier
 michael.paqu...@gmail.com wrote:
 Hi all,

 It happens that I work occasionally on multiple builds based on
 different stable branches at the same time to check fixes that need to
 be backpatched, and I tend to easily lose track on which version the
 build I created is based on (Duh!). There is of course the version
 number up to the 3rd digit available (for example 9.2.4, 9.3beta2,
 etc.), but as a developer I think that it would be helpful to include
 the commit ID in PG_VERSION_STR to get a better reference on exactly
 what the development build is based on. This could be controlled by an
 additional flag in ./configure.in called something like
 --enable-version-commit, of course disabled by default. If enabled,
 PG_VERSION_STR would be generated with the new information. configure
 would also return an error when this flag is enabled if git is either
 not found, or if the repository where configure is not a native git
 repository.

 FYI, we include the output from git describe --always in the pgAdmin
 version meta info, which is displayed on the About box along with the
 regular version info. That has proven to be extremely useful in the
 past, particularly during QA where people may be testing snapshot
 builds.
Yes, that's also something tracked for the QA/QE tests at VMware.
Having such an option in core would be a good thing for many people
IMHO instead of using some manual scripting.
--
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] review: Non-recursive processing of AND/OR lists

2013-07-17 Thread Gurjeet Singh
On Tue, Jul 16, 2013 at 4:04 PM, Pavel Stehule pavel.steh...@gmail.comwrote:

 I did a some performance tests of v5 and v6 version and there v5 is
 little bit faster than v6, and v6 has significantly higher stddev


Thanks Pavel.

The difference in average seems negligible, but stddev is interesting
because v6 does less work than v5 in common cases and in the test that I
had shared.

The current commitfest (2013-06) is marked as 'In Progress', so is it okay
to just mark the patch as 'Ready for Committer' or should I move it to the
next commitfest (2013-09).

What's the procedure of moving a patch to the next commitfest? Do I make a
fresh submission there with a link to current submission, or is the move
doable somehow in the application itself.

Best regards,
-- 
Gurjeet Singh

http://gurjeet.singh.im/

EnterpriseDB Inc.


Re: [HACKERS] review: Non-recursive processing of AND/OR lists

2013-07-17 Thread Gurjeet Singh
On Wed, Jul 17, 2013 at 8:21 AM, Gurjeet Singh gurj...@singh.im wrote:


 What's the procedure of moving a patch to the next commitfest?


Never mind, I see an email from Josh B. regarding this on my corporate
account.

Best regards,
-- 
Gurjeet Singh

http://gurjeet.singh.im/

EnterpriseDB Inc.


Re: [HACKERS] XLogInsert scaling, revisited

2013-07-17 Thread Andres Freund
On 2013-07-17 15:46:00 +0530, Amit Kapila wrote:
 On Monday, July 08, 2013 2:47 PM Heikki Linnakangas wrote:  
  Ok, I've committed this patch now. Finally, phew!
 
 Few doubts while reading the code:
 
 1. Why in function WALInsertSlotAcquireOne(int slotno), it does
 START_CRIT_SECTION() to
Lock out cancel/die interrupts, whereas other places call
 HOLD_INTERRUPTS()

A crit section does more than just stopping interrupts. They also ensure
that errors that occur while inside one get converted to a PANIC. That
seems apt for SlotAcquire/Release. Although the comments could possibly
improved a bit.

 2. In function GetXLogBuffer(), why the logic to wakeup waiters is
 different when expectedEndPtr != endptr;
   When the wakeupwaiters is done in case expectedEndPtr == endptr?

I am not sure what you're asking here. We wakeup waiters if
expectedEndPtr != endptr because that means the wal buffer page the
'ptr' fits on currently has different content. Which in turn means we've
finished with the last page and progressed to a new one. So we wake up
everyone waiting for us.
WakeupWaiters() doesn't get passed expectedEndPtr but expectedEndPtr -
XLOG_BLCKSZ (up to there we are guaranteed to have inserted
successfully). And we're comparing with the xlogInsertingAt value which
basically measures up to where we've successfully inserted.

 3.
 static bool
 ReserveXLogSwitch(..)
 
 In above function header, why EndPos_p/StartPos_p is used when
 function arguments are EndPos/StartPos?

I guess that's bitrot...

Greetings,

Andres Freund

-- 
 Andres Freund http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training  Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Improvement of checkpoint IO scheduler for stable transaction responses

2013-07-17 Thread Ants Aasma
On Wed, Jul 17, 2013 at 1:54 PM, Greg Smith g...@2ndquadrant.com wrote:
 On 7/16/13 11:36 PM, Ants Aasma wrote:

 As you know running a full suite of write benchmarks takes a very long
 time, with results often being inconclusive (noise is greater than
 effect we are trying to measure).


 I didn't say that.  What I said is that over a full suite of write
 benchmarks, the effect of changes like this has always averaged out to zero.
 You should try it sometime.  Then we can have a useful discussion of
 non-trivial results instead of you continuing to tell me I don't understand
 things.

The fact that other changes have been tradeoffs doesn't change the
point that there is no tradeoff here. I see no way in which writing
blocks to the OS in a logical order is worse than writing them out in
arbitrary order. This is why I considered blindly running write
benchmarks a waste of time at this point - if the worst case is zero
and there are cases where it helps then it can't average out to zero.
It would be better to identify the worst case and design a test for
that.

However I started the full gamut of scale factors and client count
tests just do quiet any fears of unexpected regressions. 4 scales, 6
client loads, 3 tests, 20min per test, 2 versions, the results will be
done in 48h.

Regards,
Ants Aasma
-- 
Cybertec Schönig  Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt
Web: http://www.postgresql-support.de


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Improvement of checkpoint IO scheduler for stable transaction responses

2013-07-17 Thread Ants Aasma
On Wed, Jul 17, 2013 at 2:54 PM, Amit Kapila amit.kap...@huawei.com wrote:
 I think Oracle also use similar concept for making writes efficient, and
 they have patent also for this technology which you can find at below link:
 http://www.google.com/patents/US7194589?dq=645987hl=ensa=Xei=kn7mUZ-PIsWq
 rAe99oDgBwsqi=2pjf=1ved=0CEcQ6AEwAw

 Although Oracle has different concept for performing checkpoint writes, but
 I thought of sharing the above link with you, so that unknowingly we should
 not go into wrong path.

 AFAIK instead of depending on OS buffers, they use direct I/O and infact in
 the patent above they are using temporary buffer (Claim 3) to sort the
 writes which is not the same idea as far as I can understand by reading
 above thread.

They are not even sorting anything, the patent is for
opportunistically looking for adjacent dirty blocks when writing out a
dirty buffer to disk. While a useful technique, this has nothing to do
with sorting checkpoints. It's also a good example why the patent
system is stupid. It's an obvious idea that probably has loads of
prior art. I'm no patent lawyer, but the patent also looks like it
would be easy to bypass by doing the equivalent thing in a slightly
different way.

Regards,
Ants Aasma
-- 
Cybertec Schönig  Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt
Web: http://www.postgresql-support.de


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Tatsuo Ishii
 tps = 9818.741060 (including connections establishing)

 So I thought I could squeeze 1 TPS from my box.
 Then I tried with -R 5000 tps.

 number of transactions actually processed: 1510640
 average rate limit lag: 0.304 ms (max 19.101 ms)
 tps = 5035.409397 (including connections establishing)

 As you can see, I got about 5000 tps as expected.
 
 Yep, it works:-)
 
 But I'm confused by the lag:

 0.304 ms * 1510640 = 459.2 seconds, which is longer than 300 seconds
 (specified by -T). Am I missing something?
 
 The lag is reasonnable, althought no too good. One transaction is
 about 1.2 ms, the lag is much smaller than that, and you are at about
 50% of the maximum load. I've got similar figures on my box for such
 settings. It improves if your reduce the number of clients.

No, 5000 TPS = 1/5000 = 0.2 ms per transaction, no? However pgbench
says average lag is 0.304 ms. So the lag is longer than transaction
itself.

 If you reduce the number of clients, or add more threads, the lag is
 reduced.
 
 BTW, the system was Linux (kernel 3.0.77).
 
 tps = 6730.940132 (including connections establishing)
 $ pgbench -S -n -c 10 -T 10  -R 3000 test
 
 average rate limit lag: 0.089 ms (max 27.301 ms)
 tps = 2983.707895 (including connections establishing)

 0.089 ms * 29840 = 2.66 seconds. Not too bad compared with 10
 seconds.
 
 Indeed, that is better. Transactions are about 1.5 ms and you run at
 about 45% of the maximum load here.
 
 On Linux maybe the overhead to calculate the lag is bigger
 than Mac OS X? Just my wild guess though...
 
 I would be surprised that this would be the issue is to compute the
 measure, compared to network connections and the like. With -S the
 bench is cpu bound. Possibly a better scheduler/thread management on
 OSX? Or more available cores?

The number of cores is same.  I don't understand why number of cores
is relatedx, though. Anyway, as you can see in Mac OS X's case, TPS
itself is no better than the Linux box.

  Well, I do not know! At high load with
 clients running on the same box as the server, and with more clients 
 server than available cores, there is a lot of competition between
 processes, and between clients that share a unique thread, and a log
 context switching whoch will result in a measured lag.

Hmm... I would like to have cleaner explanation/evidence before
committing the patch.
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [COMMITTERS] pgsql: Add support for REFRESH MATERIALIZED VIEW CONCURRENTLY.

2013-07-17 Thread Tom Lane
Kevin Grittner kgri...@postgresql.org writes:
 Add support for REFRESH MATERIALIZED VIEW CONCURRENTLY.

The buildfarm members that use -DCLOBBER_CACHE_ALWAYS say this patch
is broken.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] review: Non-recursive processing of AND/OR lists

2013-07-17 Thread Josh Berkus
On 07/17/2013 05:21 AM, Gurjeet Singh wrote:
 On Tue, Jul 16, 2013 at 4:04 PM, Pavel Stehule pavel.steh...@gmail.comwrote:
 
 I did a some performance tests of v5 and v6 version and there v5 is
 little bit faster than v6, and v6 has significantly higher stddev

 
 Thanks Pavel.
 
 The difference in average seems negligible, but stddev is interesting
 because v6 does less work than v5 in common cases and in the test that I
 had shared.
 
 The current commitfest (2013-06) is marked as 'In Progress', so is it okay
 to just mark the patch as 'Ready for Committer' or should I move it to the
 next commitfest (2013-09).

If this is actually ready for committer, I'll mark it as such.


-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_filedump 9.3: checksums (and a few other fixes)

2013-07-17 Thread Alvaro Herrera
Tom Lane escribió:
 Alvaro Herrera alvhe...@2ndquadrant.com writes:
  Well, Tom opined in
  http://www.postgresql.org/message-id/23249.1370878...@sss.pgh.pa.us that
  the current patch is okay.  I have a mild opinion that it should instead
  print only SHR_LOCK when both bits are set, and one of the others when
  only one of them is set.  But I don't have a strong opinion about this,
  and since Tom disagrees with me, feel free to exercise your own (Jeff's)
  judgement.
 
 FWIW, I think that's exactly what I did in the preliminary 9.3 patch
 that I committed to pg_filedump a few weeks ago.  Could you take a look
 at what's there now and see if that's what you meant?

Here's sample output (-i) from the new code, i.e. this commit:
revision 1.7
date: 2013/06/06 18:33:17;  author: tgl;  state: Exp;  lines: +14 -10
Preliminary updates for Postgres 9.3.


Data -- 
 Item   1 -- Length:   28  Offset: 8160 (0x1fe0)  Flags: NORMAL
  XMIN: 692  XMAX: 693  CID|XVAC: 0
  Block Id: 0  linp Index: 1   Attributes: 1   Size: 24
  infomask: 0x0190 (XMAX_KEYSHR_LOCK|XMAX_LOCK_ONLY|XMIN_COMMITTED) 

 Item   2 -- Length:   28  Offset: 8128 (0x1fc0)  Flags: NORMAL
  XMIN: 692  XMAX: 694  CID|XVAC: 0
  Block Id: 0  linp Index: 2   Attributes: 1   Size: 24
  infomask: 0x01d0 
(XMAX_KEYSHR_LOCK|XMAX_EXCL_LOCK|XMAX_LOCK_ONLY|XMIN_COMMITTED) 

 Item   3 -- Length:   28  Offset: 8096 (0x1fa0)  Flags: NORMAL
  XMIN: 692  XMAX: 695  CID|XVAC: 0
  Block Id: 0  linp Index: 3   Attributes: 1   Size: 24
  infomask: 0x01c0 (XMAX_EXCL_LOCK|XMAX_LOCK_ONLY|XMIN_COMMITTED) 

 Item   4 -- Length:   28  Offset: 8064 (0x1f80)  Flags: NORMAL
  XMIN: 696  XMAX: 697  CID|XVAC: 0
  Block Id: 0  linp Index: 4   Attributes: 1   Size: 24
  infomask: 0x01c0 (XMAX_EXCL_LOCK|XMAX_LOCK_ONLY|XMIN_COMMITTED|KEYS_UPDATED) 

Item 1 has SELECT FOR KEY SHARE
Item 2 has SELECT FOR SHARE
Item 3 has SELECT FOR NO KEY UPDATE
Item 4 has SELECT FOR UPDATE

The one I was talking about is the second case, which prints
KEYSHR_LOCK|EXCL_LOCK to mean that there's a FOR SHARE lock.  I have no
problem reading it this way, but I fear that someone unfamiliar with
these bits might be confused.  On the other hand, trying to be nice and
interpret these bits (i.e. translate presence of both into something
like SHR_LOCK) might also be confusing, because that bit doesn't really
exist.  And one already needs to be careful while interpreting what do
KEYS_UPDATED and XMAX_LOCK_ONLY, or lack thereof, mean.

Perhaps it would be sensible to provide one more output line per tuple,
with interpretation of the flags, so it would tell you whether the tuple
has been locked or updated, and what kind of each it is.  I'd propose
something like

  status: locked (FOR {KEY SHARE,SHARE,NO KEY UPDATE,UPDATE}) [MultiXact: nnn]
  status: [HOT] updated (KEYS UPDATED/KEYS NOT UPDATED) [MultiXact: nnn]  To: 
blk/off
  status: deleted [MultiXact: nnn]

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training  Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_filedump 9.3: checksums (and a few other fixes)

2013-07-17 Thread Tom Lane
Alvaro Herrera alvhe...@2ndquadrant.com writes:
 The one I was talking about is the second case, which prints
 KEYSHR_LOCK|EXCL_LOCK to mean that there's a FOR SHARE lock.  I have no
 problem reading it this way, but I fear that someone unfamiliar with
 these bits might be confused.  On the other hand, trying to be nice and
 interpret these bits (i.e. translate presence of both into something
 like SHR_LOCK) might also be confusing, because that bit doesn't really
 exist.  And one already needs to be careful while interpreting what do
 KEYS_UPDATED and XMAX_LOCK_ONLY, or lack thereof, mean.

 Perhaps it would be sensible to provide one more output line per tuple,
 with interpretation of the flags, so it would tell you whether the tuple
 has been locked or updated, and what kind of each it is.  I'd propose
 something like

   status: locked (FOR {KEY SHARE,SHARE,NO KEY UPDATE,UPDATE}) [MultiXact: nnn]
   status: [HOT] updated (KEYS UPDATED/KEYS NOT UPDATED) [MultiXact: nnn]  To: 
 blk/off
   status: deleted [MultiXact: nnn]

Hm.  I'm loath to add another output line per tuple, just for space
reasons.

My feeling about this code is that the reason we print the infomask in
hex is so you can see exactly which bits are set if you care, and that
the rest of the line ought to be designed to interpret the bits in as
reader-friendly a way as possible.  So I don't buy the notion that we
should just print out a name for each bit that's set.  I'd rather
replace individual bit names with items like LOCKED_FOR_KEY_SHARE,
LOCKED_FOR_SHARE, etc in cases where you have to combine multiple
bits to understand the meaning.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] review: Non-recursive processing of AND/OR lists

2013-07-17 Thread Robert Haas
On Mon, Jul 15, 2013 at 12:45 AM, Gurjeet Singh gurj...@singh.im wrote:
 Agreed that there's overhead in allocating list items, but is it more
 overhead than pushing functions on the call stack? Not sure, so I leave it
 to others who understand such things better than I do.

If you think that a palloc can ever be cheaper that pushing a frame on
the callstack, you're wrong.  palloc is not some kind of an atomic
primitive.  It's implemented by the AllocSetAlloc function, and you're
going to have to push that function on the call stack, too, in order
to run it.

My main point here is that if the user writes a = 1 and b = 1 and c =
1 and d = 1, they're not going to end up with a bushy tree.  They're
going to end up with a tree that's only deep in one direction (left, I
guess) and that's the case we might want to consider optimizing.  To
obtain a bushy tree, they're going to have to write a  = 1 and (b = 1
and c = 1) and d = 1, or something like that, and I don't see why we
should stress out about that case.  It will be rare in practice.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Fabien COELHO


Hello Tatsuo,


The lag is reasonnable, althought no too good. One transaction is
about 1.2 ms, the lag is much smaller than that, and you are at about
50% of the maximum load. I've got similar figures on my box for such
settings. It improves if your reduce the number of clients.


No, 5000 TPS = 1/5000 = 0.2 ms per transaction, no?


Hmmm... Yes, and no:-)

Transaction are handled in parallel because there are 10 clients. I look 
at actual transaction times (latency) from a client perspective, not the 
apparent time because of parallelism, and compare it to the measured 
lag, which is also measured per client.


The transaction time I reported is derived from your maximum tps per 
client : 10 clients / 8300 tps = 1.2 ms / trans. However, there are 10 
transaction in progress in parallel.


When you're running at 50% load, the clients basically spend 1.2 ms doing 
a transaction (sending queries, getting results), and 1.2 ms sleeping 
because of rate limiting. The reported 0.3 ms lag is that when sleeping 
1.2 ms it tends to start a little bit later, after 1.5 ms, but this 
latency does not show up on the throuput figures because the next sleep 
will just be a smaller to catch-up.


As you have 10 clients in one pgbench thread, the scheduling say to start 
a new transaction for a client at a certain time, but the pgbench process 
is late to actually handle this client query because it is doing other 
things, like attending one of the other clients, or being switched off to 
run a server process.


However pgbench says average lag is 0.304 ms. So the lag is longer than 
transaction itself.


See above.


I would be surprised that this would be the issue is to compute the
measure, compared to network connections and the like. With -S the
bench is cpu bound. Possibly a better scheduler/thread management on
OSX? Or more available cores?


The number of cores is same.  I don't understand why number of cores
is relatedx, though.


In my mind, because pgbench -S is cpu bound, and with -c 10 you have 
to run pgbench and 10 postgres backends, that is 11 processes competing 
for cpu time. If you have 11 cores that is mostly fine, if you have less 
then there will be some delay depending on how the process scheduler does 
thing in the OS to allocate cpu time. With a load of 50%, about 6 cores 
should be okay to run the load transparently (client  server).


 Well, I do not know! At high load with clients running on the same box 
as the server, and with more clients  server than available cores, 
there is a lot of competition between processes, and between clients 
that share a unique thread, and a log context switching whoch will 
result in a measured lag.


Hmm... I would like to have cleaner explanation/evidence before
committing the patch.


The lag measures you report seems pretty consistent to me given the load 
your requiring, for a cpu bound bench, with more processes to run than 
available cores. At least, I'm buying my own explanation, and I hope to be 
convincing.


If you want to isolate yourself from such effects, pgbench must run on a 
different host than the server, with has many threads as there are cores 
available, and not too many clients per thread. This is also true without 
throttling, but it shows more under throttling because of the lag 
(latency) measures.


--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Fix pgstattuple/pgstatindex to use regclass-type as the argument

2013-07-17 Thread Fujii Masao
On Tue, Jul 16, 2013 at 3:00 PM, Satoshi Nagayasu sn...@uptime.jp wrote:
 (2013/07/04 3:58), Fujii Masao wrote:
 On Wed, Jun 26, 2013 at 12:39 AM, Robert Haas robertmh...@gmail.com wrote:
 On Thu, Jun 20, 2013 at 2:32 PM, Fujii Masao masao.fu...@gmail.com wrote:
 Since pg_relpages(oid) doesn't exist, pg_relpages() is in the same
 situation as pgstatindex(), i.e., we cannot just replace pg_relpages(text)
 with pg_relpages(regclass) for the backward-compatibility. How do you
 think we should solve the pg_relpages() problem? Rename? Just
 add pg_relpages(regclass)?

 Adding a function with a new name seems likely to be smoother, since
 that way you don't have to worry about problems with function calls
 being thought ambiguous.

 Could you let me know the example that this problem happens?

 For the test, I just implemented the regclass-version of pg_relpages()
 (patch attached) and tested some cases. But I could not get that problem.

  SELECT pg_relpages('hoge');-- OK
  SELECT pg_relpages(oid) FROM pg_class WHERE relname = 'hoge';-- OK
  SELECT pg_relpages(relname) FROM pg_class WHERE relname = 'hoge';-- 
 OK

 In the attached patch, I cleaned up three functions to have
 two types of arguments for each, text and regclass.

   pgstattuple(text)
   pgstattuple(regclass)
   pgstatindex(text)
   pgstatindex(regclass)
   pg_relpages(text)
   pg_relpages(regclass)

 I still think a regclass argument is more appropriate for passing
 relation/index name to a function than text-type, but having both
 arguments in each function seems to be a good choice at this moment,
 in terms of backward-compatibility.

 Docs needs to be updated if this change going to be applied.

Yes, please.

 Any comments?

'make installcheck' failed in my machine.

Do we need to remove pgstattuple--1.1.sql and create pgstattuple--1.1--1.2.sql?

+/* contrib/pgstattuple/pgstattuple--1.1.sql */

Typo: s/1.1/1.2

You seem to have forgotten to update pgstattuple.c.

Regards,

-- 
Fujii Masao


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pgsql: Optimize pglz compressor for small inputs.

2013-07-17 Thread Heikki Linnakangas

On 14.07.2013 20:12, Stephen Frost wrote:

* Heikki Linnakangas (heikki.linnakan...@iki.fi) wrote:

This patch alleviates that in two ways. First, instead of storing pointers
in the hash table, store 16-bit indexes into the hist_entries array. That
slashes the size of the hash table to 1/2 or 1/4 of the original, depending
on the pointer width. Secondly, adjust the size of the hash table based on
input size. For very small inputs, you don't need a large hash table to
avoid collisions.


   The coverity scanner has a bit of an issue with this patch which, at
   least on first blush, looks like a valid concern.

   While the change in pg_lzcompress.c:pglz_find_match() to loop on:

   while (hent != INVALID_ENTRY_PTR)
   {
  const char *ip = input;
  const char *hp = hent-pos;

   looks good, and INVALID_ENTRY_PTR is the address of the first entry in
   the array (and can't be NULL), towards the end of the loop we do:

   hent = hent-next;
   if (hent)
  ...

   Should we really be checking for 'hent != INVALID_ENTRY_PTR' here?  If
   not, and hent really can end up as NULL, then we're going to segfault
   on the next loop due to the unchecked 'hent-pos' early in the loop.
   If hent can never be NULL, then we probably don't need this check at
   all.


hent can never be NULL, the code should indeed check for 'hent != 
INVALID_ENTRY_PTR'. The check is not required from a correctness point 
of view, the idea is just to avoid calculating the 'good_match' 
variable, if you're going to fall out of the loop anyway.


I'm actually a bit surprised the compiler doesn't optimize it that way 
anyway, without the explicit if-block, but at least gcc -O2 (version 
4.7.3) seem to do that. So, I guess we should keep the check.


Committed '(hent)' - '(hent != INVALID_ENTRY_PTR)'. Thanks for the report!

- Heikki


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_filedump 9.3: checksums (and a few other fixes)

2013-07-17 Thread Alvaro Herrera
Tom Lane escribió:

 My feeling about this code is that the reason we print the infomask in
 hex is so you can see exactly which bits are set if you care, and that
 the rest of the line ought to be designed to interpret the bits in as
 reader-friendly a way as possible.  So I don't buy the notion that we
 should just print out a name for each bit that's set.  I'd rather
 replace individual bit names with items like LOCKED_FOR_KEY_SHARE,
 LOCKED_FOR_SHARE, etc in cases where you have to combine multiple
 bits to understand the meaning.

Okay, that's what I've been saying all along so I cannot but agree.  I
haven't reviewed Jeff's patch lately; Jeff, does Tom's suggestion need
some more new code, and if so are you open to doing this work, or shall
I?

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training  Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] review: Non-recursive processing of AND/OR lists

2013-07-17 Thread Gurjeet Singh
On Wed, Jul 17, 2013 at 1:25 PM, Robert Haas robertmh...@gmail.com wrote:

 On Mon, Jul 15, 2013 at 12:45 AM, Gurjeet Singh gurj...@singh.im wrote:
  Agreed that there's overhead in allocating list items, but is it more
  overhead than pushing functions on the call stack? Not sure, so I leave
 it
  to others who understand such things better than I do.

 If you think that a palloc can ever be cheaper that pushing a frame on
 the callstack, you're wrong.  palloc is not some kind of an atomic
 primitive.  It's implemented by the AllocSetAlloc function, and you're
 going to have to push that function on the call stack, too, in order
 to run it.


Agreed. I take my objection back. Even if AllocSetAlloc() reuses memory
that was pfree'd earlier, it'll still be at least as expensive as recursing.



 My main point here is that if the user writes a = 1 and b = 1 and c =
 1 and d = 1, they're not going to end up with a bushy tree.  They're
 going to end up with a tree that's only deep in one direction (left, I
 guess) and that's the case we might want to consider optimizing.  To
 obtain a bushy tree, they're going to have to write a  = 1 and (b = 1
 and c = 1) and d = 1, or something like that, and I don't see why we
 should stress out about that case.  It will be rare in practice.


In v6 of the  patch, I have deferred the 'pending' list initialization to
until we actually hit a candidate right-branch. So in the common case the
pending list will never be populated, and if we find a bushy or right-deep
tree (for some reason an ORM/tool may choose to build AND/OR lists that may
end being right-deep when in Postgres), then the pending list will be used
to process them iteratively.

Does that alleviate your concern about 'pending' list management causing an
overhead.

Agreed that bushy/right-deep trees are a remote corner case, but we are
addressing a remote corner case in the first place (insanely long AND
lists) and why not handle another remote corner case right now if it
doesn't cause an overhead for common case.

Best regards,
-- 
Gurjeet Singh

http://gurjeet.singh.im/

EnterpriseDB Inc.


Re: [HACKERS] Listen/notify across clusters

2013-07-17 Thread Josh Berkus
On 07/16/2013 07:16 PM, Andreas Karlsson wrote:
 I guess one problem is to implement writing to the WAL with the smallest
 possible performance hit.  As far as I can see there are two possible
 approaches: either write to WAL when NOTIFY is run or write to WAL on
 commit. The former seems more in line with how commands in PostgreSQL
 usually work.

Yes.

There was some work being done by Heikki or Andreas Freund on log-only
tables which seems like it would be a perfect solution to this.  Anyone
know what happened to that patch?

 There shouldn't be any major problems with implementing LISTEN on the
 slaves since LISTEN is done in memory.

Actually, that's not the hard part.  Listeners need to be registered on
the standby, which requires a write to a system catalog, currently.  So
you'd need some alternate way to register listeners on the standby.

Presumably all LISTEN events would need to be broadcast to all standbys,
whether or not they had LISTENERs registered.  Otherwise we'd have to
push the listener registrations back to the master.

-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] new row-level lock error messages

2013-07-17 Thread Alvaro Herrera
Peter Eisentraut wrote:

 In general, I find these new wordings to be a loss of clarity.  There is
 no indication on the SELECT man page or in the documentation index what
 a row-level lock is at all.
 
 I would suggest that these changes be undone, except that the old
 SELECT FOR ... be replaced by a dynamic string that reverse-parses the
 LockingClause to provide the actual clause that was used.

Hmm, that's an idea.  If there are no objections, I'll get this fixed.

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training  Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Listen/notify across clusters

2013-07-17 Thread Andrew Dunstan


On 07/17/2013 02:08 PM, Josh Berkus wrote:



There shouldn't be any major problems with implementing LISTEN on the
slaves since LISTEN is done in memory.

Actually, that's not the hard part.  Listeners need to be registered on
the standby, which requires a write to a system catalog, currently.



Er, not since 9.0 I think - there is no pg_listener any more.

cheers

andrew



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] Return of can't paste into psql issue

2013-07-17 Thread Josh Berkus
Hackers,

We've seen this before:

http://www.postgresql.org/message-id/b42b73150906271019k6212ba71u6263bc43a8ec8...@mail.gmail.com

I've run across this again, and am posting it here just in case someone
should be struck by some amazing inspiration on where the problem might
be. Here's the issue:

  Pasting text blocks over 100 lines into psql running in
  a terminal causes lines to be arbitrarily truncated, mangling
  the query or function.

The exact truncation appears random, but is 100% consistent for any
given block of text.  For example, one test I ran all lines 101-200
where truncated at character #32, but lines 1-100 were untouched.  In
another test, lines #82 and #163 were truncated after the 14th caracter,
but other lines were fine.  In the Ubuntu 12.10 tests below, the
truncation occured in the same places regardless of whether I was using
kterm, gterm or xterm.

Here's where I've seen this issue occur:

- Ubuntu 12.10:
  - 9.2 installed from source with readline
  - 9.4 installed from source with readline, and with libedit
  - using Gnome Terminal, Kterm, or xterm
- Ubuntu 12.04:
  - 9.2 installed using packages from apt.postgresql.org
  - over SSH from a Gnome Terminal
- Mac OSX 10.6
  - 9.2 installed from source with readline

Interestingly, here's where it does NOT occur:
- Scientific Linux 6.1
  - 9.2 installed from yum.postgresql.org packages
  - over SSH from a Gnome Terminal on Ubuntu
- Ubuntu 12.10
  - 9.4 from source, compiled --without-readline

Also, the same mangling does not happen with any of the following
applications on Ubuntu 12.10:
- nano
- emacs
- joe
- gedit
- cat, including cat | psql

I could not find another readline-equipped terminal application to test
against.

Ubuntu 12.10 has libreadline 6.2.9, and SciLinux 6.1 has version 6.0.
However, since this happens with libedit as well, I don't think it's
purely a readline issue.

It seems like we're having some kind of bizarre interaction of psql's
readline/libedit hooks and specific terminals or library versions, but
darned if I can even figure out where to look.  If anybody has
inspiration strike, please post!

-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal: template-ify (binary) extensions

2013-07-17 Thread Dimitri Fontaine
Markus Wanner mar...@bluegap.ch writes:
 But okay, you're saying we *have* and *want* a guarantee that even a
 superuser cannot execute arbitrary native code via libpq (at least in
 default installs w/o extensions).

There are several problems confused into that sentence already. I think
the next step of this discussion should be about talking about the
problems we have and figuring out *if* we want to solve them, now that
we managed to explicitely say what we want as a project.

  - per-installation (not even per-cluster) DSO availability

If you install PostGIS 1.5 on a system, then it's just impossible to
bring another cluster (of the same PostgreSQL major version), let
alone database, with PostGIS 2.x, even for migration assessment
purposes. The By Design™ part is really hard to explain even to
security concious users.

  - hot standby and modules (aka DSO)

As soon as you use some functions in 'language C' you need to
carefully watch your external dependencies ahead of time. If you do
CREATE EXTENSION hstore;, create an hstore column and a GiST index
on it, then query the table on the standby… no luck. You would tell
me that it's easy enough to do and that it's part of the job, so see
next point.

  - sysadmin vs dba, or PostgreSQL meets the Cloud

The current model of operations is intended for places where you
have separate roles: the sysadmin cares about the OS setup and will
provide with system packages (compiled extensions and the like), and
DBA are never root on the OS. They can CREATE EXTENSION and maybe
use the 'postgres' system account, but that's about it.

Given the current raise of the Cloud environements and the devops
teams, my understanding is that this model is no longer the only
one. Depending on who you talk to, in some circles it's not even a
relevant model anymore: user actions should not require the
intervention of a sysadmin before hand.

While I appreciate that many companies still want the old behavior
that used to be the only relevant model of operations, I think we
should also provide for the new one as it's pervasive enough now for
us to stop ignoring it with our I know better smiling face.

Now it should be possible to solve at least some of those items while
still keeping the restriction above, or with an opt-in mechanism to
enable the works by default, but you have to solve the security
implications yourself behaviour. A new GUC should do it, boolean,
defaults false:

  runs_in_the_cloud_with_no_security_concerns = false

I don't think the relaxed behaviour we're talking about is currently
possible to develop as an extension, by the way.

 Andres made two contrib-free suggestions: with COPY TO BINARY, you get a

Well, what about using lo_import()?

 Things aren't quite so bad if we write the bits to a file first and
 then dynamically load the file.  That way at least noexec or similar
 can provide protection.  But it still seems like a pretty dangerous
 direction.

 I agree now. Thanks for elaborating.

Yes it's dangerous. It's also solving real world problems that I see no
other way to solve apart from bypassing the need to ever load a DSO
file, that is embedding a retargetable C compiler in the backend.

Regards,
-- 
Dimitri Fontaine
http://2ndQuadrant.fr PostgreSQL : Expertise, Formation et Support


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])

2013-07-17 Thread Alvaro Herrera
Amit kapila escribió:


  +# This includes the default configuration directory included to support
  +# ALTER SYSTEM statement.
  +#
  +# WARNING: User should not remove below include_dir or directory config,
  +#  as both are required to make ALTER SYSTEM command work.
  +#  Any configuration parameter values specified after this line
  +#  will override the values set by ALTER SYSTEM statement.
  +#include_dir = 'config'
 
  Why do we need to expose this setting to a user? 
 
 a) This can be a knob to turn this feature off. This has been asked by few 
 people, 
one of the mail link is mentioned below (refer towards end of mail in the 
 link):
http://www.postgresql.org/message-id/515b04f9.30...@gmx.net
 
 b) In case user wants to change priority of parameters set by Alter System, 
 he can move the
include_dir up or down in postgresql.conf.

Both of these seem like they would make troubleshooters' lives more
difficult.  I think we should just parse the auto file automatically
after parsing postgresql.conf, without requiring the include directive
to be there.

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training  Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])

2013-07-17 Thread Josh Berkus
On 07/17/2013 12:01 PM, Alvaro Herrera wrote:
 Both of these seem like they would make troubleshooters' lives more
 difficult.  I think we should just parse the auto file automatically
 after parsing postgresql.conf, without requiring the include directive
 to be there.

Wait, I thought the auto file was going into the conf.d directory?


-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Return of can't paste into psql issue

2013-07-17 Thread Andrew Dunstan


On 07/17/2013 02:50 PM, Josh Berkus wrote:

Hackers,

We've seen this before:

http://www.postgresql.org/message-id/b42b73150906271019k6212ba71u6263bc43a8ec8...@mail.gmail.com

I've run across this again, and am posting it here just in case someone
should be struck by some amazing inspiration on where the problem might
be. Here's the issue:

   Pasting text blocks over 100 lines into psql running in
   a terminal causes lines to be arbitrarily truncated, mangling
   the query or function.


[snip]



It seems like we're having some kind of bizarre interaction of psql's
readline/libedit hooks and specific terminals or library versions, but
darned if I can even figure out where to look.  If anybody has
inspiration strike, please post!





What happens when you do this with psql -n ? My suspicion is it's the 
tab completion code, which I occasionally find it useful to disable this 
way - sadly one can't do that on the fly AFAIK.


cheers

andrew



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])

2013-07-17 Thread Robert Haas
On Fri, Jul 12, 2013 at 9:15 AM, Amit kapila amit.kap...@huawei.com wrote:
 The sleep is used to ensure the effects of pg_reload_conf() can be visible.

That strikes me as almost certain to result in random regression test
failures.  My experience, as a man who helps to maintain a very large
suite of regression tests, is that all time-based assumptions about
how long it will take some operation to complete are false, and that
failures are a lot more frequent than you think they will be.

This feature doesn't strike me as a good candidate for regression
testing anyway.  Keep in mind that people are intended to be able to
run make installcheck against a deployed system without messing it up.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Return of can't paste into psql issue

2013-07-17 Thread Merlin Moncure
On Wed, Jul 17, 2013 at 2:20 PM, Andrew Dunstan and...@dunslane.net wrote:

 On 07/17/2013 02:50 PM, Josh Berkus wrote:

 Hackers,

 We've seen this before:


 http://www.postgresql.org/message-id/b42b73150906271019k6212ba71u6263bc43a8ec8...@mail.gmail.com

 I've run across this again, and am posting it here just in case someone
 should be struck by some amazing inspiration on where the problem might
 be. Here's the issue:

Pasting text blocks over 100 lines into psql running in
a terminal causes lines to be arbitrarily truncated, mangling
the query or function.


 [snip]



 It seems like we're having some kind of bizarre interaction of psql's
 readline/libedit hooks and specific terminals or library versions, but
 darned if I can even figure out where to look.  If anybody has
 inspiration strike, please post!




 What happens when you do this with psql -n ? My suspicion is it's the tab
 completion code, which I occasionally find it useful to disable this way -
 sadly one can't do that on the fly AFAIK.

huh -- you're right.

merlin


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] XLogInsert scaling, revisited

2013-07-17 Thread Heikki Linnakangas

On 17.07.2013 02:18, Michael Paquier wrote:

On Tue, Jul 16, 2013 at 2:24 AM, Fujii Masaomasao.fu...@gmail.com  wrote:

On Mon, Jul 8, 2013 at 6:16 PM, Heikki Linnakangas
hlinnakan...@vmware.com  wrote:

Ok, I've committed this patch now. Finally, phew!


I found that this patch causes the assertion failure. When I set up simple
replication environment and promoted the standby before executing any
transaction on the master, I got the following assertion failure.

2013-07-16 02:22:06 JST sby1 LOG:  received promote request
2013-07-16 02:22:06 JST sby1 FATAL:  terminating walreceiver process
due to administrator command
2013-07-16 02:22:06 JST sby1 LOG:  redo done at 0/2F0
2013-07-16 02:22:06 JST sby1 LOG:  selected new timeline ID: 2
hrk:head-pgsql postgres$ 2013-07-16 02:22:06 JST sby1 LOG:  archive
recovery complete
TRAP: FailedAssertion(!(readOff == (XLogCtl-xlblocks[firstIdx] -
8192) % ((uint32) (16 * 1024 * 1024))), File: xlog.c, Line: 7048)
2013-07-16 02:22:12 JST sby1 LOG:  startup process (PID 37115) was
terminated by signal 6: Abort trap
2013-07-16 02:22:12 JST sby1 LOG:  terminating any other active server processes

Note that this is also reproducible even when trying to recover only
from archives without strrep.


Fixed, thanks for the report. While at it, I slightly refactored the way 
the buffer bookkeeping works. Instead of keeping track of the index of 
the last initialized buffer, keep track how far the buffer cache has 
been initialized in an XLogRecPtr variable (called 
XLogCtl-InitializedUpTo). That made the code slightly more readable IMO.


- Heikki


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] XLogInsert scaling, revisited

2013-07-17 Thread Heikki Linnakangas

On 17.07.2013 15:24, Andres Freund wrote:

On 2013-07-17 15:46:00 +0530, Amit Kapila wrote:

Few doubts while reading the code:

1. Why in function WALInsertSlotAcquireOne(int slotno), it does
START_CRIT_SECTION() to
Lock out cancel/die interrupts, whereas other places call
HOLD_INTERRUPTS()


A crit section does more than just stopping interrupts. They also ensure
that errors that occur while inside one get converted to a PANIC. That
seems apt for SlotAcquire/Release. Although the comments could possibly
improved a bit.


Agreed. The comment was copied from LWLockAcquire(), which only does 
HOLD_INTERRUPTS(). The crucial difference between LWLockAcquire() and 
WALInsertSlotAcquire() is that there is no automatic cleanup mechanism 
on abort for the WAL insertion slots like there is for lwlocks. Added a 
sentence to the comment to mention that.



3.
static bool
ReserveXLogSwitch(..)

In above function header, why EndPos_p/StartPos_p is used when
function arguments are EndPos/StartPos?


I guess that's bitrot...


Yep, fixed.

Thanks for the review!

- Heikki


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal: template-ify (binary) extensions

2013-07-17 Thread Markus Wanner
On 07/17/2013 08:52 PM, Dimitri Fontaine wrote:
 the next step of this discussion should be about talking about the
 problems we have and figuring out *if* we want to solve them, now that
 we managed to explicitely say what we want as a project.
 
   - per-installation (not even per-cluster) DSO availability
 
 If you install PostGIS 1.5 on a system, then it's just impossible to
 bring another cluster (of the same PostgreSQL major version), let
 alone database, with PostGIS 2.x, even for migration assessment
 purposes. The By Design™ part is really hard to explain even to
 security concious users.

On Debian, that should be well possible. Certainly installing Postgres
9.1 w/ postgis-1.5 in parallel to Postgres 9.2 w/ postgis-2.0 is. I
designed it to be.

On distributions that do not allow parallel installation of multiple
Postges major versions, it's certainly not the extension's fault.

I think I'm misunderstanding the problem statement, here.

   - hot standby and modules (aka DSO)
 
 As soon as you use some functions in 'language C' you need to
 carefully watch your external dependencies ahead of time. If you do
 CREATE EXTENSION hstore;, create an hstore column and a GiST index
 on it, then query the table on the standby… no luck. You would tell
 me that it's easy enough to do and that it's part of the job, so see
 next point.

Agreed, that's an area where Postgres could do better. I'd argue this
should be possible without relaxing the security guarantees provided,
though. Because there likely are people wanting both.

Can CREATE EXTENSION check if the standbys have the extension installed?
And refuse creation, if they don't?

   - sysadmin vs dba, or PostgreSQL meets the Cloud
 
 The current model of operations is intended for places where you
 have separate roles: the sysadmin cares about the OS setup and will
 provide with system packages (compiled extensions and the like), and
 DBA are never root on the OS. They can CREATE EXTENSION and maybe
 use the 'postgres' system account, but that's about it.

I'm sure you are aware that even without this clear separation of roles,
the guarantee means we provide an additional level of security against
attackers.

 Given the current raise of the Cloud environements and the devops
 teams, my understanding is that this model is no longer the only
 one. Depending on who you talk to, in some circles it's not even a
 relevant model anymore: user actions should not require the
 intervention of a sysadmin before hand.
 
 While I appreciate that many companies still want the old behavior
 that used to be the only relevant model of operations, I think we
 should also provide for the new one as it's pervasive enough now for
 us to stop ignoring it with our I know better smiling face.

I'd even think it's a minority who actually uses the guarantee we're
talking about. Mostly because of the many and wide spread untrusted PLs
(which undermine the guarantee). And thus even before the rise of the cloud.

None the less, the safe by default has served us well, I think.

 Now it should be possible to solve at least some of those items while
 still keeping the restriction above, or with an opt-in mechanism to
 enable the works by default, but you have to solve the security
 implications yourself behaviour. A new GUC should do it, boolean,
 defaults false:
 
   runs_in_the_cloud_with_no_security_concerns = false

[ I usually associate cloud with (increased) security concerns, but
  that's an entirely different story. ]

 I don't think the relaxed behaviour we're talking about is currently
 possible to develop as an extension, by the way.

It's extensions that undermine the guarantee, at the moment. But yeah,
it depends a lot on what kind of relaxed behavior you have in mind.

 Andres made two contrib-free suggestions: with COPY TO BINARY, you get a
 
 Well, what about using lo_import()?

That only reads from the file-system. You probably meant lo_export(),
which is writing. Although not on the server's, but only on the (libpq)
client's file-system. No threat to the server.

 Yes it's dangerous. It's also solving real world problems that I see no
 other way to solve apart from bypassing the need to ever load a DSO
 file, that is embedding a retargetable C compiler in the backend.

If the sysadmin wants to disallow arbitrary execution of native code to
postgres (the process), any kind of embedded compiler likely is equally
unwelcome.

Regards

Markus Wanner


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_memory_barrier() doesn't compile, let alone work, for me

2013-07-17 Thread Tom Lane
Robert Haas robertmh...@gmail.com writes:
 On Sun, Jul 14, 2013 at 8:41 PM, Tom Lane t...@sss.pgh.pa.us wrote:
 However, fixing that doesn't yield much joy; initdb stalls and then
 crashes with
 
 PANIC:  stuck spinlock (40054a88) detected at xlog.c:2182
 
 The reason for that is that the code does not bother to initialize
 dummy_spinlock anywhere.  It might accidentally fail to fail
 on machines where the unlocked state of a spinlock is all-zeroes
 (given a compiler that's not picky about the incorrect macro usage)
 ... but HPPA is not such a machine.

 This would not be hard to fix, I think.

Really?  Given that the memory barrier primitives are supposed to be,
well, primitive, I don't think this is exactly a trivial problem.
There's no good place to initialize such a variable, and there's even
less of a place to make sure that fork or exec leaves it in an
appropriate state in the child process.

 Rather than trying to think of a fix for that, I'm of the opinion that
 we should rip this out.  The fallback implementation of pg_memory_barrier
 ought to be pg_compiler_barrier(), on the theory that non-mainstream
 architectures don't have weak memory ordering anyway, or if they do you
 need to do some work to get PG to work on them.  Or maybe we ought to
 stop pretending that the code is likely to work on arbitrary machines,
 and just #error if there's not a supplied machine-specific macro.

 Well, pg_memory_barrier() isn't even equivalent to
 pg_compiler_barrier() on x86, which has among the strongest memory
 orderings out there, so I think your first idea is a non-starter.

Among the strongest memory orderings compared to what?  Since what we're
discussing here is non-mainstream architectures, I think this claim is
unfounded.  Most of the ones I can think of offhand are old enough to
not even have multiprocessor support, so that the issue is vacuous.

 I'm pretty sure we've got latent memory-ordering risks in our existing
 code which we just haven't detected and fixed yet.  Consider, for
 example, this exciting code from GetNewTransactionId:

 myproc-subxids.xids[nxids] = xid;
 mypgxact-nxids = nxids + 1;

 I don't believe that's technically safe even on an architecture like
 x86, because the compiler could decide to reorder those assignments.

Wrong, because both pointers are marked volatile.  If the compiler does
reorder the stores, it's broken.  Admittedly, this doesn't say anything
about hardware reordering :-(

 My preference would be to fix this in a narrow way, by initializing
 dummy_spinlock somewhere.  But if not, then I think #error is the only
 safe way to go.

I'm inclined to agree that #error is the only realistic answer in
general, though we could probably go ahead with equating
pg_memory_barrier to pg_compiler_barrier on specific architectures we
know are single-processor-only.  Unfortunately, that means we just
raised the bar for porting efforts significantly.  And in particular,
it means somebody had better go through s_lock.h and make sure we have a
credible barrier implementation for every single arch+compiler supported
therein.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])

2013-07-17 Thread Alvaro Herrera
Robert Haas escribió:

 This feature doesn't strike me as a good candidate for regression
 testing anyway.  Keep in mind that people are intended to be able to
 run make installcheck against a deployed system without messing it up.

This is my opinion as well.

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training  Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_memory_barrier() doesn't compile, let alone work, for me

2013-07-17 Thread Tom Lane
I wrote:
 I'm inclined to agree that #error is the only realistic answer in
 general, though we could probably go ahead with equating
 pg_memory_barrier to pg_compiler_barrier on specific architectures we
 know are single-processor-only.  Unfortunately, that means we just
 raised the bar for porting efforts significantly.  And in particular,
 it means somebody had better go through s_lock.h and make sure we have a
 credible barrier implementation for every single arch+compiler supported
 therein.

After going through s_lock.h another time, I can't help noticing that
a large majority of the non-mainstream architectures make use of the
default version of S_UNLOCK(), which is just

#define S_UNLOCK(lock)  (*((volatile slock_t *) (lock)) = 0)

I assert that if this is a correct implementation, then the platform
does not reorder writes, since correctness requires that any writes to
shared memory variables protected by the lock occur before the lock is
released.

Generally speaking, I'm not seeing any memory-barrier-ish instructions
on the locking side either, meaning there's also no risk of read
reordering.  It's possible that some of these arches do read reordering
except for not hoisting reads before instructions that can be used to
take locks ... but I'll bet that most of them simply don't have weak
memory ordering.

So I'm back to the position that pg_compiler_barrier() is a perfectly
credible default implementation.  More so than an incorrect usage of
spinlocks, anyway.  In particular, I'm going to go fix HPPA that way
so I can get my build working again.

BTW, the only arches for which we seem to have any barrier instructions
in S_UNLOCK are ARM, PPC, Alpha, and MIPS.  Alpha, at least, is probably
dead, and I'm not seeing any MIPS machines in the buildfarm either;
I wouldn't feel bad about desupporting both of those arches.

Also, a comparison to s_lock.h says that the PPC code in barrier.h is a
few bricks shy of a load: it's not honoring USE_PPC_LWSYNC.  And while
I'm bitching, the #ifdef structure in barrier.h is impossible to follow,
not least because none of the #endifs are labeled, contrary to project
style.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Return of can't paste into psql issue

2013-07-17 Thread Josh Berkus
On 07/17/2013 12:20 PM, Andrew Dunstan wrote:
 What happens when you do this with psql -n ? My suspicion is it's the
 tab completion code, which I occasionally find it useful to disable this
 way - sadly one can't do that on the fly AFAIK.

Well, that works.  But it disables readline, not just tab completion.

So it's nice that I don't have to *compile* without readline, but we
still have the issue that you can't paste large files and also have
readline.

So, an even more practical workaround (I've been using cat | psql), but
still a mysterious issue.

-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])

2013-07-17 Thread Andres Freund
On 2013-07-17 17:05:40 -0400, Alvaro Herrera wrote:
 Robert Haas escribió:
 
  This feature doesn't strike me as a good candidate for regression
  testing anyway.  Keep in mind that people are intended to be able to
  run make installcheck against a deployed system without messing it up.
 
 This is my opinion as well.

Setting and resetting a GUC like pg_regress.test_var or so shouldn't be
too invasive. Some minimal testing seems like a good idea to me.

Greetings,

Andres Freund

-- 
 Andres Freund http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training  Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])

2013-07-17 Thread Josh Berkus
On 07/17/2013 03:17 PM, Andres Freund wrote:
 On 2013-07-17 17:05:40 -0400, Alvaro Herrera wrote:
 Robert Haas escribió:

 This feature doesn't strike me as a good candidate for regression
 testing anyway.  Keep in mind that people are intended to be able to
 run make installcheck against a deployed system without messing it up.

 This is my opinion as well.
 
 Setting and resetting a GUC like pg_regress.test_var or so shouldn't be
 too invasive. Some minimal testing seems like a good idea to me.

Also, we can make check even if we don't make installcheck.

-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_memory_barrier() doesn't compile, let alone work, for me

2013-07-17 Thread Robert Haas
On Wed, Jul 17, 2013 at 4:57 PM, Tom Lane t...@sss.pgh.pa.us wrote:
 This would not be hard to fix, I think.

 Really?  Given that the memory barrier primitives are supposed to be,
 well, primitive, I don't think this is exactly a trivial problem.
 There's no good place to initialize such a variable, and there's even
 less of a place to make sure that fork or exec leaves it in an
 appropriate state in the child process.

Well, I admit that I don't really know how spinlocks work on every
obscure platform out there, but I would have thought we could
initialize this in, say, main() and call it good.  For that to be not
OK, we'd have to be running on a non-EXEC_BACKEND platform where a
previously initialized spinlock is no longer in a good state after
fork().  Unless you know of a case where that happens, I'd be inclined
to assume it's a non-problem.  If we find a counterexample later, then
I'd insert an architecture-specific hack for that platform only, with
a comment along the lines of /* YBGTBFKM */.

 Well, pg_memory_barrier() isn't even equivalent to
 pg_compiler_barrier() on x86, which has among the strongest memory
 orderings out there, so I think your first idea is a non-starter.

 Among the strongest memory orderings compared to what?  Since what we're
 discussing here is non-mainstream architectures, I think this claim is
 unfounded.  Most of the ones I can think of offhand are old enough to
 not even have multiprocessor support, so that the issue is vacuous.

Compared to other multi-processor architectures.  I agree that the
barriers are all reducible to compiler barriers on single-processor
architectures, but I think new ports of PostgreSQL are much more
likely to be to multi-processor systems rather than uniprocessor
systems.  There are very, very few multi-processor systems where
pg_memory_barrier() is reducible to pg_compiler_barrier().

 I'm pretty sure we've got latent memory-ordering risks in our existing
 code which we just haven't detected and fixed yet.  Consider, for
 example, this exciting code from GetNewTransactionId:

 myproc-subxids.xids[nxids] = xid;
 mypgxact-nxids = nxids + 1;

 I don't believe that's technically safe even on an architecture like
 x86, because the compiler could decide to reorder those assignments.

 Wrong, because both pointers are marked volatile.  If the compiler does
 reorder the stores, it's broken.  Admittedly, this doesn't say anything
 about hardware reordering :-(

OK, natch.  So it's safe on x86, but not on POWER.

 My preference would be to fix this in a narrow way, by initializing
 dummy_spinlock somewhere.  But if not, then I think #error is the only
 safe way to go.

 I'm inclined to agree that #error is the only realistic answer in
 general, though we could probably go ahead with equating
 pg_memory_barrier to pg_compiler_barrier on specific architectures we
 know are single-processor-only.

I'd be fine with that.

 Unfortunately, that means we just
 raised the bar for porting efforts significantly.  And in particular,
 it means somebody had better go through s_lock.h and make sure we have a
 credible barrier implementation for every single arch+compiler supported
 therein.

I tried, but the evidence shows that I have not entirely succeeded.  :-(

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Return of can't paste into psql issue

2013-07-17 Thread Alvaro Herrera
Josh Berkus wrote:

 So, an even more practical workaround (I've been using cat | psql), but
 still a mysterious issue.

How often do your files contain tabs?  I have seen cases where tab
completion messes things up by asking you after a tab whether you really
want to complete due to the number of possible completions, and such.

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training  Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Return of can't paste into psql issue

2013-07-17 Thread Josh Berkus
On 07/17/2013 03:37 PM, Alvaro Herrera wrote:
 Josh Berkus wrote:
 
 So, an even more practical workaround (I've been using cat | psql), but
 still a mysterious issue.
 
 How often do your files contain tabs?  I have seen cases where tab
 completion messes things up by asking you after a tab whether you really
 want to complete due to the number of possible completions, and such.

First thing I thought of, so I made sure my text editor was replacing
tabs with spaces.  Also, my test cases had no tabs at all (nor spaces
neither).

-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Return of can't paste into psql issue

2013-07-17 Thread Dimitri Fontaine
Josh Berkus j...@agliodbs.com writes:
 So, an even more practical workaround (I've been using cat | psql), but
 still a mysterious issue.

As a workaround you might try \e with EDITOR=emacs or some of the other
solutions you've been pasting, maybe even cat, so that you can switch
that readline-completion-bug-free environment for just that paste?

Regards,
-- 
Dimitri Fontaine
http://2ndQuadrant.fr PostgreSQL : Expertise, Formation et Support


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Return of can't paste into psql issue

2013-07-17 Thread Josh Berkus
On 07/17/2013 04:30 PM, Dimitri Fontaine wrote:
 Josh Berkus j...@agliodbs.com writes:
 So, an even more practical workaround (I've been using cat | psql), but
 still a mysterious issue.
 
 As a workaround you might try \e with EDITOR=emacs or some of the other
 solutions you've been pasting, maybe even cat, so that you can switch
 that readline-completion-bug-free environment for just that paste?

Oh, there's lots of good workarounds.  I just wanna know why the bug is
happening in the first place.

(FWIW, I hit this because I'm using a graphical editor for PL/Python
which pipes stuff to the terminal to execute it, and its piping hits the
same issues as paste-from-clipboard)

-- 
Josh Berkus
PostgreSQL Experts Inc.
http://pgexperts.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])

2013-07-17 Thread Peter Eisentraut
If we are going to add ALTER SYSTEM, then I'd like to consider recasting
ALTER ROLE ALL SET in terms of ALTER SYSTEM as well, because the ALTER
ROLE ALL syntax was a hack.  At least it should be possible to consider
various levels of system.  Who knows, maybe a future version will
allow propagating settings to standby servers?  I don't know what a good
terminology would be, but let's keep some options open.





-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])

2013-07-17 Thread Peter Eisentraut
On Tue, 2013-07-16 at 04:23 +, Amit kapila wrote:
 
  Why do we need to expose this setting to a user? 
 
 a) This can be a knob to turn this feature off. This has been asked by
 few people, 
one of the mail link is mentioned below (refer towards end of mail
 in the link):
http://www.postgresql.org/message-id/515b04f9.30...@gmx.net
 
 b) In case user wants to change priority of parameters set by Alter
 System, he can move the
include_dir up or down in postgresql.conf. 

In that message I argue that the auto file should always be read
implicitly.

The comment about being able to turn this feature off was because as
presented it was too complicated and error-prone.  Removing the include
would incidentally not be a way to turn the feature off.  Turning the
feature of would require disabling the ALTER SYSTEM command.  I'm not
really asking for that functionality, but I'm really against having to
manually include the auto file, and also against the system nagging me
about including the file.

There is the slightly interesting question whether the auto file should
notionally be processed before or after the main postgresql.conf, but
that's a question we should answer now, not let it the users work it
out.




-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Tatsuo Ishii
Fabien,

 Hello Tatsuo,
 
 The lag is reasonnable, althought no too good. One transaction is
 about 1.2 ms, the lag is much smaller than that, and you are at about
 50% of the maximum load. I've got similar figures on my box for such
 settings. It improves if your reduce the number of clients.

 No, 5000 TPS = 1/5000 = 0.2 ms per transaction, no?
 
 Hmmm... Yes, and no:-)
 
 Transaction are handled in parallel because there are 10 clients. I
 look at actual transaction times (latency) from a client perspective,
 not the apparent time because of parallelism, and compare it to the
 measured lag, which is also measured per client.
 
 The transaction time I reported is derived from your maximum tps per
 client : 10 clients / 8300 tps = 1.2 ms / trans. However, there are 10
 transaction in progress in parallel.
 
 When you're running at 50% load, the clients basically spend 1.2 ms
 doing a transaction (sending queries, getting results), and 1.2 ms
 sleeping because of rate limiting. The reported 0.3 ms lag is that
 when sleeping 1.2 ms it tends to start a little bit later, after 1.5
 ms, but this latency does not show up on the throuput figures because
 the next sleep will just be a smaller to catch-up.
 
 As you have 10 clients in one pgbench thread, the scheduling say to
 start a new transaction for a client at a certain time, but the
 pgbench process is late to actually handle this client query because
 it is doing other things, like attending one of the other clients, or
 being switched off to run a server process.
 
 However pgbench says average lag is 0.304 ms. So the lag is longer
 than transaction itself.
 
 See above.
 
 I would be surprised that this would be the issue is to compute the
 measure, compared to network connections and the like. With -S the
 bench is cpu bound. Possibly a better scheduler/thread management on
 OSX? Or more available cores?

 The number of cores is same.  I don't understand why number of cores
 is relatedx, though.
 
 In my mind, because pgbench -S is cpu bound, and with -c 10 you
 have to run pgbench and 10 postgres backends, that is 11 processes
 competing for cpu time. If you have 11 cores that is mostly fine, if
 you have less then there will be some delay depending on how the
 process scheduler does thing in the OS to allocate cpu time. With a
 load of 50%, about 6 cores should be okay to run the load
 transparently (client  server).
 
  Well, I do not know! At high load with clients running on the same box
  as the server, and with more clients  server than available cores,
  there is a lot of competition between processes, and between clients
  that share a unique thread, and a log context switching whoch will
  result in a measured lag.

 Hmm... I would like to have cleaner explanation/evidence before
 committing the patch.
 
 The lag measures you report seems pretty consistent to me given the
 load your requiring, for a cpu bound bench, with more processes to run
 than available cores. At least, I'm buying my own explanation, and I
 hope to be convincing.
 
 If you want to isolate yourself from such effects, pgbench must run on
 a different host than the server, with has many threads as there are
 cores available, and not too many clients per thread. This is also
 true without throttling, but it shows more under throttling because of
 the lag (latency) measures.

I think I'm starting to understand what's going on.  Suppose there are
n transactions be issued by pgbench and it decides each schedule d(0),
d(1)... d(n). Actually the schedule d(i) (which is stored in
st-until) is decided by the following code:

int64 wait = (int64)
throttle_delay * -log(getrand(thread, 1, 1000)/1000.0);

thread-throttle_trigger += wait;

st-until = thread-throttle_trigger;

st-until represents the time for a transaction to be finished by the
time. Now the transaction i finishes at t(i).  So the lag l(i) = t(i)
-d(i) if the transaction is behind.  Then next transaction i+1
begins. The lag l(i+1) = t(i+1) - d(i+1) and so on. At the end of
pgbench, it shows the average lag as sum(l(0)...l(n))/n.

Now suppose we have 3 transactions and each has following values:

d(0) = 10
d(1) = 20
d(2) = 30

t(0) = 100
t(1) = 110
t(2) = 120

That says pgbench expects the duration 10 for each
transaction. Actually, the first transaction runs slowly for some
reason and the lag = 100 - 10 = 90. However, tx(1) and tx(2) are
finished on schedule because they spend only 10 (110-10 = 10, 120-110
= 10). So the expected average lag would be 90/3 = 30.

However actually pgbench calculates like this:

average lag = (t(0)-d(0) + t(1)-d(1) + t(2)-d(2))/3
= (100-10 + 110-20 + 120-30)/3
= (90 + 90 + 90)/3
= 90

Looks like too much lag calculated. The difference between the lag
which pgbench calculates and the expected one will be growing if a lag
happens eariler. I guess why my Linux box shows bigger lag than 

Re: [HACKERS] pgbench patches

2013-07-17 Thread Tatsuo Ishii
 Hello Tatsuo,
 
 For me, the error message is not quite right, because progress == 0
 case is considered error as well in your patch. I sugges you change
 the error message something like:

 thread progress delay (-P) must be positive number (%s)\n,
 
 Please find attached a new version with an updated message.
 
 Thanks. I've been testing on Linux now. Starting from coming Tuesday
 (Monday is a national holiday in Japan) I will test on Mac OS X and
 Windows.

I have done the test on Mac OS X. Windows testing was done by my
coleague, Yugo Nagata. The results were very positive and I committed
--progress patches.
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Greg Smith

On 7/17/13 9:16 PM, Tatsuo Ishii wrote:

Now suppose we have 3 transactions and each has following values:

d(0) = 10
d(1) = 20
d(2) = 30

t(0) = 100
t(1) = 110
t(2) = 120

That says pgbench expects the duration 10 for each
transaction. Actually, the first transaction runs slowly for some
reason and the lag = 100 - 10 = 90. However, tx(1) and tx(2) are
finished on schedule because they spend only 10 (110-10 = 10, 120-110
= 10). So the expected average lag would be 90/3 = 30.


The clients are not serialized here in any significant way, even when 
they shared a single process/thread.  I did many rounds of tracing 
through this code with timestamps on each line, and the sequence of 
events here will look like this:


client 0:  send SELECT... to server.  yield to next client.
client 1:  send SELECT... to server.  yield to next client.
client 2:  send SELECT... to server.  yield to next client.
select():  wait for the first response from any client.
client 0:  receive response.  complete transaction, compute lag.
client 1:  receive response.  complete transaction, compute lag.
client 2:  receive response.  complete transaction, compute lag.

There is nothing here that is queuing the clients one after the other. 
If (0) takes 100ms before its reply comes back, (1) and (2) can receive 
their reply back and continue forward at any time.  They are not waiting 
for (0); it has yielded control while waiting for a response.  All three 
times are independent once you reach the select() point where all are 
active.


In this situation, if the server gets stuck doing something such that it 
takes 100ms before any client receives a response, it is correct to 
penalize every client for that latency.  All three clients could have 
received the information earlier if the server had any to send them.  If 
they did not, they all were suffering from some sort of lag.


I'm not even sure why you spaced the start times out at intervals of 10. 
 If I were constructing an example like this, I'd have them start at 
times of 0, 1, 2--as fast as the CPU can fire off statements 
basically--and then start waiting from that point.  If client 1 takes 10 
units of time to send its query out before client 2 runs, and the rate 
goal requires 10 units of time, the rate you're asking for is impossible.


For sorting out what's going on with your two systems, I would recommend 
turning on debugging output with -d and looking at the new 
per-transaction latency numbers that the feature reports.  If your 
theory that the lag is going up as the test proceeds is true, that 
should show up in the individual latency numbers too.


Based on what I saw during weeks of testing here, I would be more 
suspicious that there's a system level difference between your two 
servers than to blame the latency calculation.  I saw a *lot* of weird 
system issues myself when I started looking that carefully at sustained 
throughput.  The latency reports from the perspective of Fabien's code 
were always reasonable though.  When something delays every client, it 
counts that against every active client's lag, and that's the right 
thing to do.


--
Greg Smith   2ndQuadrant USg...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] Proposal/design feedback needed: WITHIN GROUP (sql standard ordered set aggregate functions)

2013-07-17 Thread Andrew Gierth
The spec defines two types of aggregate function classed as ordered set
function, as follows:
 
1. An inverse distribution function taking one argument (which must be
   a grouped column or otherwise constant within groups) plus a sorted
   group with exactly one column:
 
   =# SELECT (func(p) WITHIN GROUP (ORDER BY q)) from ...
 
   The motivating example for this (and the only ones in the spec) are
   percentile_cont and percentile_disc, to return a percentile result
   from a continuous or discrete distribution. (Thus
   percentile_cont(0.5) within group (order by x) is the spec's version
   of a median(x) function.)
 
2. A hypothetical set function taking N arguments of arbitrary types
   (a la VARIADIC any, rather than a fixed list) plus a sorted group
   with N columns of matching types:
 
   =# SELECT (func(p1,p2,...) WITHIN GROUP (ORDER BY q1,q2,...)) from ...
 
   (where typeof(p1)==typeof(q1) and so on, at least up to trivial
   conversions)
 
   The motivating example here is to be able to do rank(p1,p2,...) to
   return the rank that the specified values would have had if they were
   added to the group.
 
As usual, we do not want to constrain ourselves to supporting only the
specific cases in the spec, but would prefer a general solution.
 
We (meaning myself and Atri) have an implementation that basically
works, though it is not yet complete, but before taking it any further
we need to resolve the design question of how to represent these two
types of function in the system catalogs. The fact that there are in
effect two parts to the parameter list, which are either independent
(for inverse distribution funcs) or closely related (for hypothetical
set functions), doesn't seem to point to an obvious way to represent
this in pg_proc/pg_aggregate.
 
I'm not yet satisfied with the method used in our implementation, so
we're throwing this open for suggestions. We will post the
work-in-progress patch along with a description of its current
implementation shortly.
 
One of the major complications is that we ideally want to be able to do
polymorphism based on the type of the sorted group, specifically in
order to be able to do
 
percentile_disc(float8) within group (order by anyelement)
 
returning anyelement. (i.e. we should be able to get a discrete
percentile from any type that is orderable.) The question here is how to
resolve the return type both of the aggregate itself and of the finalfn.
 
We've also had an expression of interest in extending this to allow
percentile_disc(float8[]) and percentile_cont(float8[]) returning
arrays; e.g. percentile_cont(array[0, 0.25, 0.5, 0.75, 1]) to return an
array containing the bounds, median and quartiles in one go. This is an
extension to the spec but it seems sufficiently obviously useful to be
worth supporting.

Comments?

-- 
Andrew (irc:RhodiumToad)


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Tatsuo Ishii
 On 7/17/13 9:16 PM, Tatsuo Ishii wrote:
 Now suppose we have 3 transactions and each has following values:

 d(0) = 10
 d(1) = 20
 d(2) = 30

 t(0) = 100
 t(1) = 110
 t(2) = 120

 That says pgbench expects the duration 10 for each
 transaction. Actually, the first transaction runs slowly for some
 reason and the lag = 100 - 10 = 90. However, tx(1) and tx(2) are
 finished on schedule because they spend only 10 (110-10 = 10, 120-110
 = 10). So the expected average lag would be 90/3 = 30.
 
 The clients are not serialized here in any significant way, even when
 they shared a single process/thread.  I did many rounds of tracing
 through this code with timestamps on each line, and the sequence of
 events here will look like this:

My example is for 1 client case. So concurrent clients are not the
issue here.
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])

2013-07-17 Thread Amit Kapila
On Thursday, July 18, 2013 2:36 AM Alvaro Herrera wrote:
 Robert Haas escribió:
 
  This feature doesn't strike me as a good candidate for regression
  testing anyway.  Keep in mind that people are intended to be able to
  run make installcheck against a deployed system without messing it
 up.
 
 This is my opinion as well.

Summarization of all tests in this patch:
1. Set of SIGHUP parameters in config file, use reload and sleep and then
check value of parameters to validate whether they are set.
2. Reset all parameters so that the system returns to previous state, use
reload and sleep
3. Negative scenario tests

As suggested by Robert that having timing based tests are not good candidate
for regression tests, which essentially means that all tests
as part of point 1  2 are not good candidates for testing. If tests
corresponding to point 1  2 are to be removed, then keeping some negative
tests
also doesn't seem to make much sense.

To cover Alter System functionality and code, we can have Alter System
commands without reload and sleep, but I think it might not be of much use
until we verify the same using Show.  

So I will remove the tests from this patch in next updated version if there
is no other useful way to achieve it.

On a side note, I had checked that in regression suite already few tests use
pg_sleep() and one of them in stats.sql is something similar. It also does
wait using pg_sleep() to let prior tests action get completed. 
I understand that having already at one place doesn't allow to use it at
other places, but still I think that if some tests using similar mechanism
are running successfully, we can add new one provided they ensure the timing
doesn't cause problem.

With Regards,
Amit Kapila.



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Fix pgstattuple/pgstatindex to use regclass-type as the argument

2013-07-17 Thread Satoshi Nagayasu

(2013/07/18 2:31), Fujii Masao wrote:

On Tue, Jul 16, 2013 at 3:00 PM, Satoshi Nagayasu sn...@uptime.jp wrote:

(2013/07/04 3:58), Fujii Masao wrote:

For the test, I just implemented the regclass-version of pg_relpages()
(patch attached) and tested some cases. But I could not get that problem.

  SELECT pg_relpages('hoge');-- OK
  SELECT pg_relpages(oid) FROM pg_class WHERE relname = 'hoge';-- OK
  SELECT pg_relpages(relname) FROM pg_class WHERE relname = 'hoge';-- OK


In the attached patch, I cleaned up three functions to have
two types of arguments for each, text and regclass.

   pgstattuple(text)
   pgstattuple(regclass)
   pgstatindex(text)
   pgstatindex(regclass)
   pg_relpages(text)
   pg_relpages(regclass)

I still think a regclass argument is more appropriate for passing
relation/index name to a function than text-type, but having both
arguments in each function seems to be a good choice at this moment,
in terms of backward-compatibility.

Docs needs to be updated if this change going to be applied.


Yes, please.


Updated docs and code comments, etc. PFA.


Any comments?


'make installcheck' failed in my machine.


Hmm, it works on my box...


Do we need to remove pgstattuple--1.1.sql and create pgstattuple--1.1--1.2.sql?

+/* contrib/pgstattuple/pgstattuple--1.1.sql */

Typo: s/1.1/1.2


Done.


You seem to have forgotten to update pgstattuple.c.


Should I change something in pgstattuple.c?

I just changed CREATE FUNCTION statement for pgstattuple
to replace oid input arg with the regclass.

Regards,
--
Satoshi Nagayasu sn...@uptime.jp
Uptime Technologies, LLC. http://www.uptime.jp
diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index fc893d8..957742a 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -4,7 +4,7 @@ MODULE_big  = pgstattuple
 OBJS   = pgstattuple.o pgstatindex.o
 
 EXTENSION = pgstattuple
-DATA = pgstattuple--1.1.sql pgstattuple--1.0--1.1.sql 
pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.2.sql pgstattuple--1.0--1.1.sql 
pgstattuple--unpackaged--1.0.sql
 
 REGRESS = pgstattuple
 
diff --git a/contrib/pgstattuple/expected/pgstattuple.out 
b/contrib/pgstattuple/expected/pgstattuple.out
index ab28f50..eaba306 100644
--- a/contrib/pgstattuple/expected/pgstattuple.out
+++ b/contrib/pgstattuple/expected/pgstattuple.out
@@ -11,12 +11,24 @@ select * from pgstattuple('test'::text);
  0 |   0 | 0 | 0 |0 |  
0 |  0 |  0 |0
 (1 row)
 
+select * from pgstattuple('test'::name);
+ table_len | tuple_count | tuple_len | tuple_percent | dead_tuple_count | 
dead_tuple_len | dead_tuple_percent | free_space | free_percent 
+---+-+---+---+--++++--
+ 0 |   0 | 0 | 0 |0 |  
0 |  0 |  0 |0
+(1 row)
+
 select * from pgstattuple('test'::regclass);
  table_len | tuple_count | tuple_len | tuple_percent | dead_tuple_count | 
dead_tuple_len | dead_tuple_percent | free_space | free_percent 
 
---+-+---+---+--++++--
  0 |   0 | 0 | 0 |0 |  
0 |  0 |  0 |0
 (1 row)
 
+select * from pgstattuple('test'::regclass::oid);
+ table_len | tuple_count | tuple_len | tuple_percent | dead_tuple_count | 
dead_tuple_len | dead_tuple_percent | free_space | free_percent 
+---+-+---+---+--++++--
+ 0 |   0 | 0 | 0 |0 |  
0 |  0 |  0 |0
+(1 row)
+
 select * from pgstatindex('test_pkey');
  version | tree_level | index_size | root_block_no | internal_pages | 
leaf_pages | empty_pages | deleted_pages | avg_leaf_density | 
leaf_fragmentation 
 
-+++---+++-+---+--+
diff --git a/contrib/pgstattuple/pgstatindex.c 
b/contrib/pgstattuple/pgstatindex.c
index 97f897e..41e90e3 100644
--- a/contrib/pgstattuple/pgstatindex.c
+++ b/contrib/pgstattuple/pgstatindex.c
@@ -39,12 +39,24 @@
 #include utils/rel.h
 
 
+/*
+ * Because of backward-compatibility issue, we have decided to have
+ * two types of interfaces, with regclass-type input arg and text-type
+ * input arg, for each function.
+ *
+ * Those functions which have text-type input arg will be depreciated
+ * in the future release.
+ */
 extern Datum pgstatindex(PG_FUNCTION_ARGS);
+extern Datum 

Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Greg Smith

On 7/17/13 11:34 PM, Tatsuo Ishii wrote:

My example is for 1 client case. So concurrent clients are not the
issue here.


Sorry about that, with your clarification I see what you were trying to 
explain now.  The code initializes the target time like this:


thread-throttle_trigger = INSTR_TIME_GET_MICROSEC(start);

And then each time a transaction fires, it advances the reference time 
forward based on the expected rate:


thread-throttle_trigger += wait;

It does *not* reset thread-throttle_trigger based on when the previous 
transaction ended / when the next transaction started.  If the goal is 
10us transaction times, it beats a steady drum saying the transactions 
should come at 10us, 20us, 30us (on average--there's some randomness in 
the goals).  It does not pay any attention to when the previous 
transactions finished.


That means that if an early transaction takes an extra 1000us, every 
transaction after that will also show as 1000us late--even if all of 
them take 10us.  You expect that those later transactions will show 0 
lag, since they took the right duration.  For that to happen, 
thread-throttle_trigger would need to be re-initialized with the 
current time at the end of each completed transaction.


The lag computation was not the interesting part of this feature to me. 
 As I said before, I considered it more of a debugging level thing than 
a number people would analyze as much as you did.  I understand why you 
don't like it though.  If the reference time was moved forward to match 
the transaction end each time, I think that would give the lag 
definition you're looking for.  That's fine to me too, if Fabien doesn't 
have a good reason to reject the idea.  We would need to make sure that 
doesn't break some part of the design too.


--
Greg Smith   2ndQuadrant USg...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] pgindent behavior we could do without

2013-07-17 Thread Tom Lane
It's always annoyed me that pgindent insists on adjusting vertical
whitespace around #else and related commands.  This has, for example,
rendered src/include/storage/barrier.h nigh illegible: you get things
like

/*
 * lwsync orders loads with respect to each other, and similarly with stores.
 * But a load can be performed before a subsequent store, so sync must be used
 * for a full memory barrier.
 */
#define pg_memory_barrier() __asm__ __volatile__ (sync : : : memory)
#define pg_read_barrier()   __asm__ __volatile__ (lwsync : : : memory)
#define pg_write_barrier()  __asm__ __volatile__ (lwsync : : : memory)
#elif defined(__alpha) || defined(__alpha__)/* Alpha */

which makes it look like this block of code has something to do with
Alpha.

By chance, I noticed today that this misbehavior comes from a discretely
identifiable spot, to wit lines 289-290 in src/tools/pgindent/pgindent:

# Remove blank line(s) before #else, #elif, and #endif
$source =~ s!\n\n+(\#else|\#elif|\#endif)!\n$1!g;

This seems pretty broken to me: why exactly is whitespace there such a
bad idea?  Not only that, but the next action is concerned with undoing
some of the damage this rule causes:

# Add blank line before #endif if it is the last line in the file
$source =~ s!\n(#endif.*)\n\z!\n\n$1\n!;

I assert that we should simply remove both of these bits of code, as
just about every committer on the project is smarter about when to use
vertical whitespace than this program is.

Thoughts?

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] Re: Proposal/design feedback needed: WITHIN GROUP (sql standard ordered set aggregate functions)

2013-07-17 Thread David Fetter
On Thu, Jul 18, 2013 at 03:15:14AM +, Andrew Gierth wrote:
 The spec defines two types of aggregate function classed as ordered set
 function, as follows:
  
 1. An inverse distribution function taking one argument (which must be
a grouped column or otherwise constant within groups) plus a sorted
group with exactly one column:
  
=# SELECT (func(p) WITHIN GROUP (ORDER BY q)) from ...
  
The motivating example for this (and the only ones in the spec) are
percentile_cont and percentile_disc, to return a percentile result
from a continuous or discrete distribution. (Thus
percentile_cont(0.5) within group (order by x) is the spec's version
of a median(x) function.)
  
 2. A hypothetical set function taking N arguments of arbitrary types
(a la VARIADIC any, rather than a fixed list) plus a sorted group
with N columns of matching types:
  
=# SELECT (func(p1,p2,...) WITHIN GROUP (ORDER BY q1,q2,...)) from ...
  
(where typeof(p1)==typeof(q1) and so on, at least up to trivial
conversions)
  
The motivating example here is to be able to do rank(p1,p2,...) to
return the rank that the specified values would have had if they were
added to the group.
  
 As usual, we do not want to constrain ourselves to supporting only the
 specific cases in the spec, but would prefer a general solution.
  
 We (meaning myself and Atri) have an implementation that basically
 works, though it is not yet complete, but before taking it any further
 we need to resolve the design question of how to represent these two
 types of function in the system catalogs. The fact that there are in
 effect two parts to the parameter list, which are either independent
 (for inverse distribution funcs) or closely related (for hypothetical
 set functions), doesn't seem to point to an obvious way to represent
 this in pg_proc/pg_aggregate.
  
 I'm not yet satisfied with the method used in our implementation,

What is that method?

 so we're throwing this open for suggestions. We will post the
 work-in-progress patch along with a description of its current
 implementation shortly.
  
 One of the major complications is that we ideally want to be able to
 do polymorphism based on the type of the sorted group, specifically
 in order to be able to do
  
 percentile_disc(float8) within group (order by anyelement)
  
 returning anyelement. (i.e. we should be able to get a discrete
 percentile from any type that is orderable.) The question here is
 how to resolve the return type both of the aggregate itself and of
 the finalfn.
  
 We've also had an expression of interest in extending this to allow
 percentile_disc(float8[]) and percentile_cont(float8[]) returning
 arrays; e.g. percentile_cont(array[0, 0.25, 0.5, 0.75, 1]) to return
 an array containing the bounds, median and quartiles in one go. This
 is an extension to the spec but it seems sufficiently obviously
 useful to be worth supporting.
 
 Comments?

I'm really happy to see PostgreSQL come into its own when it comes to
the analytics side of the house :)

Cheers,
David.
-- 
David Fetter da...@fetter.org http://fetter.org/
Phone: +1 415 235 3778  AIM: dfetter666  Yahoo!: dfetter
Skype: davidfetter  XMPP: david.fet...@gmail.com
iCal: webcal://www.tripit.com/feed/ical/people/david74/tripit.ics

Remember to vote!
Consider donating to Postgres: http://www.postgresql.org/about/donate


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [COMMITTERS] pgsql: Add support for REFRESH MATERIALIZED VIEW CONCURRENTLY.

2013-07-17 Thread Hitoshi Harada
On Wed, Jul 17, 2013 at 7:11 AM, Tom Lane t...@sss.pgh.pa.us wrote:
 Kevin Grittner kgri...@postgresql.org writes:
 Add support for REFRESH MATERIALIZED VIEW CONCURRENTLY.

 The buildfarm members that use -DCLOBBER_CACHE_ALWAYS say this patch
 is broken.


Looks like rd_indpred is not correct if index relation is fresh.
Something like this works for me.

diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c
index edd34ff..46149ee 100644
--- a/src/backend/commands/matview.c
+++ b/src/backend/commands/matview.c
@@ -634,7 +634,7 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid)

/* Skip partial indexes. */
indexRel = index_open(index-indexrelid,
RowExclusiveLock);
-   if (indexRel-rd_indpred != NIL)
+   if (RelationGetIndexPredicate(indexRel) != NIL)
{
index_close(indexRel, NoLock);
ReleaseSysCache(indexTuple);

--
Hitoshi Harada


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Tatsuo Ishii
 Sorry about that, with your clarification I see what you were trying
 to explain now.  The code initializes the target time like this:
 
 thread-throttle_trigger = INSTR_TIME_GET_MICROSEC(start);
 
 And then each time a transaction fires, it advances the reference time
 forward based on the expected rate:
 
 thread-throttle_trigger += wait;
 
 It does *not* reset thread-throttle_trigger based on when the
 previous transaction ended / when the next transaction started.  If
 the goal is 10us transaction times, it beats a steady drum saying the
 transactions should come at 10us, 20us, 30us (on average--there's some
 randomness in the goals).  It does not pay any attention to when the
 previous transactions finished.
 
 That means that if an early transaction takes an extra 1000us, every
 transaction after that will also show as 1000us late--even if all of
 them take 10us.  You expect that those later transactions will show 0
 lag, since they took the right duration.  For that to happen,
 thread-throttle_trigger would need to be re-initialized with the
 current time at the end of each completed transaction.

Yes, that's exactly what I understand from the code.

 The lag computation was not the interesting part of this feature to
 me.  As I said before, I considered it more of a debugging level thing
 than a number people would analyze as much as you did.  I understand
 why you don't like it though.  If the reference time was moved forward
 to match the transaction end each time, I think that would give the
 lag definition you're looking for.  That's fine to me too, if Fabien
 doesn't have a good reason to reject the idea.  We would need to make
 sure that doesn't break some part of the design too.

I would like to hear from Fabien about the issue too.
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Fix pgstattuple/pgstatindex to use regclass-type as the argument

2013-07-17 Thread Rushabh Lathia
On Thu, Jul 18, 2013 at 9:40 AM, Satoshi Nagayasu sn...@uptime.jp wrote:

 (2013/07/18 2:31), Fujii Masao wrote:

 On Tue, Jul 16, 2013 at 3:00 PM, Satoshi Nagayasu sn...@uptime.jp
 wrote:

 (2013/07/04 3:58), Fujii Masao wrote:

 For the test, I just implemented the regclass-version of pg_relpages()
 (patch attached) and tested some cases. But I could not get that
 problem.

   SELECT pg_relpages('hoge');-- OK
   SELECT pg_relpages(oid) FROM pg_class WHERE relname = 'hoge';
  -- OK
   SELECT pg_relpages(relname) FROM pg_class WHERE relname = 'hoge';
-- OK


 In the attached patch, I cleaned up three functions to have
 two types of arguments for each, text and regclass.

pgstattuple(text)
pgstattuple(regclass)
pgstatindex(text)
pgstatindex(regclass)
pg_relpages(text)
pg_relpages(regclass)

 I still think a regclass argument is more appropriate for passing
 relation/index name to a function than text-type, but having both
 arguments in each function seems to be a good choice at this moment,
 in terms of backward-compatibility.

 Docs needs to be updated if this change going to be applied.


 Yes, please.


 Updated docs and code comments, etc. PFA.


Looks good.




  Any comments?


 'make installcheck' failed in my machine.


 Hmm, it works on my box...


Works for me too.

Overall looks good to me.





  Do we need to remove pgstattuple--1.1.sql and create
 pgstattuple--1.1--1.2.sql?

 +/* contrib/pgstattuple/**pgstattuple--1.1.sql */

 Typo: s/1.1/1.2


 Done.


  You seem to have forgotten to update pgstattuple.c.


 Should I change something in pgstattuple.c?

 I just changed CREATE FUNCTION statement for pgstattuple
 to replace oid input arg with the regclass.

 Regards,

 --
 Satoshi Nagayasu sn...@uptime.jp
 Uptime Technologies, LLC. http://www.uptime.jp


 --
 Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
 To make changes to your subscription:
 http://www.postgresql.org/mailpref/pgsql-hackers




Thanks,
Rushabh Lathia


Re: [HACKERS] [COMMITTERS] pgsql: Add support for REFRESH MATERIALIZED VIEW CONCURRENTLY.

2013-07-17 Thread Tom Lane
Hitoshi Harada umi.tan...@gmail.com writes:
 Looks like rd_indpred is not correct if index relation is fresh.
 Something like this works for me.

 -   if (indexRel-rd_indpred != NIL)
 +   if (RelationGetIndexPredicate(indexRel) != NIL)

Hm, yeah, the direct access to rd_indpred is certainly wrong.
Will apply, thanks!

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: ALTER SYSTEM SET command to change postgresql.conf parameters (RE: [HACKERS] Proposal for Allow postgresql.conf values to be changed via SQL [review])

2013-07-17 Thread Amit Kapila
On Thursday, July 18, 2013 12:31 AM Alvaro Herrera wrote:
 Amit kapila escribió:
 
 
   +# This includes the default configuration directory included to
 support
   +# ALTER SYSTEM statement.
   +#
   +# WARNING: User should not remove below include_dir or directory
 config,
   +#  as both are required to make ALTER SYSTEM command work.
   +#  Any configuration parameter values specified after this
 line
   +#  will override the values set by ALTER SYSTEM statement.
   +#include_dir = 'config'
 
   Why do we need to expose this setting to a user?
 
  a) This can be a knob to turn this feature off. This has been asked
 by few people,
 one of the mail link is mentioned below (refer towards end of mail
 in the link):
 http://www.postgresql.org/message-id/515b04f9.30...@gmx.net
 
  b) In case user wants to change priority of parameters set by Alter
 System, he can move the
 include_dir up or down in postgresql.conf.
 
 Both of these seem like they would make troubleshooters' lives more
 difficult.  I think we should just parse the auto file automatically
 after parsing postgresql.conf, without requiring the include directive
 to be there.

Okay, I shall modify the patch that way. However still the file will be in
config directory.
Now the 2 things regarding this needs to be taken care:

a. As raised by Peter Eisentraut, shall we do parsing of autofile before or
after postgresql.conf. By default I will keep it after until there is some
reason to do it before.
b. If the directory/file is missing while parsing issue warning message.

Kindly let me know your opinion about the same.

With Regards,
Amit Kapila.



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] Re: Proposal/design feedback needed: WITHIN GROUP (sql standard ordered set aggregate functions)

2013-07-17 Thread Atri Sharma
On Thu, Jul 18, 2013 at 10:02 AM, David Fetter da...@fetter.org wrote:
 On Thu, Jul 18, 2013 at 03:15:14AM +, Andrew Gierth wrote:
 The spec defines two types of aggregate function classed as ordered set
 function, as follows:

 1. An inverse distribution function taking one argument (which must be
a grouped column or otherwise constant within groups) plus a sorted
group with exactly one column:

=# SELECT (func(p) WITHIN GROUP (ORDER BY q)) from ...

The motivating example for this (and the only ones in the spec) are
percentile_cont and percentile_disc, to return a percentile result
from a continuous or discrete distribution. (Thus
percentile_cont(0.5) within group (order by x) is the spec's version
of a median(x) function.)

 2. A hypothetical set function taking N arguments of arbitrary types
(a la VARIADIC any, rather than a fixed list) plus a sorted group
with N columns of matching types:

=# SELECT (func(p1,p2,...) WITHIN GROUP (ORDER BY q1,q2,...)) from ...

(where typeof(p1)==typeof(q1) and so on, at least up to trivial
conversions)

The motivating example here is to be able to do rank(p1,p2,...) to
return the rank that the specified values would have had if they were
added to the group.

 As usual, we do not want to constrain ourselves to supporting only the
 specific cases in the spec, but would prefer a general solution.

 We (meaning myself and Atri) have an implementation that basically
 works, though it is not yet complete, but before taking it any further
 we need to resolve the design question of how to represent these two
 types of function in the system catalogs. The fact that there are in
 effect two parts to the parameter list, which are either independent
 (for inverse distribution funcs) or closely related (for hypothetical
 set functions), doesn't seem to point to an obvious way to represent
 this in pg_proc/pg_aggregate.

 I'm not yet satisfied with the method used in our implementation,

 What is that method?

We currently represent ordered set functions with a new bool flag in
pg_aggregate. The flag is set to true for ordered set
functions(obviously) and false for all others. The currently
implemented functions i.e. percentile_disc, percentile_cont and
percentile_cont for intervals have their finalfns present in
pg_aggregate.

The aggregate functions take in two arguments, one for the percentile
value and other for the input row set. So, percentile_cont's entry in
pg_proc has float8 and float8 as its parameters and another entry of
percentile_cont (with the interval version as the finalfn) has float8
and interval as its parameter types.

As you can see, there isn't a way right now to resolve the return type
of the aggregate for polymorphic cases. This is something we wish to
resolve.

Regards,

Atri





--
Regards,

Atri
l'apprenant


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] pgbench --throttle (submission 7 - with lag measurement)

2013-07-17 Thread Tatsuo Ishii
 Sorry about that, with your clarification I see what you were trying
 to explain now.  The code initializes the target time like this:
 
 thread-throttle_trigger = INSTR_TIME_GET_MICROSEC(start);
 
 And then each time a transaction fires, it advances the reference time
 forward based on the expected rate:
 
 thread-throttle_trigger += wait;
 
 It does *not* reset thread-throttle_trigger based on when the
 previous transaction ended / when the next transaction started.  If
 the goal is 10us transaction times, it beats a steady drum saying the
 transactions should come at 10us, 20us, 30us (on average--there's some
 randomness in the goals).  It does not pay any attention to when the
 previous transactions finished.
 
 That means that if an early transaction takes an extra 1000us, every
 transaction after that will also show as 1000us late--even if all of
 them take 10us.  You expect that those later transactions will show 0
 lag, since they took the right duration.  For that to happen,
 thread-throttle_trigger would need to be re-initialized with the
 current time at the end of each completed transaction.
 
 Yes, that's exactly what I understand from the code.
 
 The lag computation was not the interesting part of this feature to
 me.  As I said before, I considered it more of a debugging level thing
 than a number people would analyze as much as you did.  I understand
 why you don't like it though.  If the reference time was moved forward
 to match the transaction end each time, I think that would give the
 lag definition you're looking for.  That's fine to me too, if Fabien
 doesn't have a good reason to reject the idea.  We would need to make
 sure that doesn't break some part of the design too.
 
 I would like to hear from Fabien about the issue too.

For your information, included is the patch against git master head to
implement the lag in a way what I proposed. With the patch, I get more
consistent number on Linux (and Mac OS X).
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp
diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index 2ad8f0b..57e62dc 100644
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -137,6 +137,12 @@ int			unlogged_tables = 0;
 double		sample_rate = 0.0;
 
 /*
+ * When threads are throttled to a given rate limit, this is the target delay
+ * to reach that rate in usec.  0 is the default and means no throttling.
+ */
+int64		throttle_delay = 0;
+
+/*
  * tablespace selection
  */
 char	   *tablespace = NULL;
@@ -202,11 +208,15 @@ typedef struct
 	int			listen;			/* 0 indicates that an async query has been
  * sent */
 	int			sleeping;		/* 1 indicates that the client is napping */
+	boolthrottling; /* whether nap is for throttling */
 	int64		until;			/* napping until (usec) */
+	int64		wait;			/* randomly generated delay (usec) */
 	Variable   *variables;		/* array of variable definitions */
 	int			nvariables;
 	instr_time	txn_begin;		/* used for measuring transaction latencies */
+	instr_time  txn_begin_throttle;		/* tx start time used when transaction throttling enabled */
 	instr_time	stmt_begin;		/* used for measuring statement latencies */
+	bool		is_throttled;	/* whether transaction throttling is done */
 	int			use_file;		/* index in sql_files for this client */
 	bool		prepared[MAX_FILES];
 } CState;
@@ -224,6 +234,9 @@ typedef struct
 	instr_time *exec_elapsed;	/* time spent executing cmds (per Command) */
 	int		   *exec_count;		/* number of cmd executions (per Command) */
 	unsigned short random_state[3];		/* separate randomness for each thread */
+	int64   throttle_trigger; 	/* previous/next throttling (us) */
+	int64   throttle_lag; 		/* total transaction lag behind throttling */
+	int64   throttle_lag_max; 	/* max transaction lag */
 } TState;
 
 #define INVALID_THREAD		((pthread_t) 0)
@@ -232,6 +245,8 @@ typedef struct
 {
 	instr_time	conn_time;
 	int			xacts;
+	int64   throttle_lag;
+	int64   throttle_lag_max;
 } TResult;
 
 /*
@@ -356,6 +371,7 @@ usage(void)
 		 -N, --skip-some-updates  skip updates of pgbench_tellers and pgbench_branches\n
 		 -P, --progress=NUM   show thread progress report every NUM seconds\n
 		 -r, --report-latencies   report average latency per command\n
+		 -R, --rate=SPEC  target rate in transactions per second\n
 		 -s, --scale=NUM  report this scale factor in output\n
 		 -S, --select-onlyperform SELECT-only transactions\n
 		 -t, --transactions   number of transactions each client runs 
@@ -898,19 +914,80 @@ doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile, AggVa
 {
 	PGresult   *res;
 	Command   **commands;
+	booltrans_needs_throttle = false;
 
 top:
 	commands = sql_files[st-use_file];
 
+	/*
+	 * Handle throttling once per transaction by sleeping.  It is simpler
+	 * to do this here rather than at