Re: [HACKERS] [PATCH] Assert that the correct locks are held when calling PageGetLSN()

2017-09-05 Thread Michael Paquier
On Wed, Sep 6, 2017 at 1:15 AM, Jacob Champion  wrote:
> On Sun, Sep 3, 2017 at 11:14 PM, Michael Paquier
>  wrote:
>> +#if defined(USE_ASSERT_CHECKING) && !defined(FRONTEND)
>> +void
>> +AssertPageIsLockedForLSN(Page page)
>> +{
>> From a design point of view, bufmgr.c is an API interface for buffer
>> management on the backend-size, so just using FRONTEND there looks
>> wrong to me (bufmgr.h is already bad enough IMO now).
>
> The comments suggested that bufmgr.h could be incidentally pulled into
> frontend code through other headers. Or do you mean that I don't need
> to check for FRONTEND in the C source file (since, I assume, it's only
> ever being compiled for the backend)? I'm not sure what you mean by
> "bufmgr.h is already bad enough".

I mean that this should not become worse without a better reason. This
patch makes it so.

>> This bit in src/backend/access/transam/README may interest you:
>> Note that we must only use PageSetLSN/PageGetLSN() when we know the action
>> is serialised. Only Startup process may modify data blocks during recovery,
>> so Startup process may execute PageGetLSN() without fear of serialisation
>> problems. All other processes must only call PageSet/GetLSN when holding
>> either an exclusive buffer lock or a shared lock plus buffer header lock,
>> or be writing the data block directly rather than through shared buffers
>> while holding AccessExclusiveLock on the relation.
>>
>> So I see no problem with the exiting caller.
>
> Is heap_xlog_visible only called during startup?

Recovery is more exact. When replaying a XLOG_HEAP2_VISIBLE record.

> If so, is there a
> general rule for which locks are required during startup and which
> aren't?

You can surely say that a process is fine to read a variable without a
lock if it is the only one updating it, other processes would need a
lock to read a variable. In this case the startup process is the only
one updating blocks, so that's at least one code path where the is no
need to take a lock when looking at a page LSN with PageGetLSN.
Another example is pageinspect which works on a copy of a page and
uses PageGetLSN, so no direct locks on the buffer page is needed.

In short, it seems to me that this patch should be rejected in its
current shape. There is no need to put more constraints on a API which
does not need more constraints and which is used widely by extensions.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pgbench: Skipping the creating primary keys after initialization

2017-09-05 Thread Masahiko Sawada
On Wed, Sep 6, 2017 at 12:11 AM, Fabien COELHO  wrote:
>
>> Sorry, I don't follow that. You meant I should add a newline before
>> pg_realloc()? That is,
>>
>> +initialize_cmds =
>> +(char *) pg_realloc(initialize_cmds,
>> +sizeof(char) * n_cmds +
>> 1);
>
>
> Yes. Or maybe my terminal was doing tricks, because I had the impression
> that both argument where on the same line with many tabs in between, but
> maybe I just misinterpreted the diff file. My apology if it is the case.
>

I understood. It looks ugly in the patch but can be applied properly
by git apply command. Attached latest patch incorporated the comments
I got so far.

Regards,

--
Masahiko Sawada
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index f5db8d1..48e3581 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -159,6 +159,75 @@ pgbench  options  dbname
  
 
  
+  -I {custom_init_command [...]}
+  --custom-initialize={custom_init_command [...]}
+  
+   
+Specify custom initialization steps.
+custom_init_command specifies custom
+initialization steps for the initialization. The default is
+ctgvp. Each command is invoked in the
+specified order. The supported commands are:
+
+
+ 
+  c (cleanup)
+  
+   
+Destroying existing pgbench tables: pgbench_accounts,
+pgbench_branches, pgbench_history
+and pgbench_tellers.
+   
+  
+ 
+ 
+  t (table creation)
+  
+   
+Create four tables pgbench_accounts,
+pgbench_branches, pgbench_history
+and pgbench_tellers.
+   
+  
+ 
+ 
+  g (generate data)
+  
+   
+Generate data and load it to standard tables.
+   
+  
+ 
+ 
+  v (vacuum)
+  
+   
+Invoke vacuum on standard tables.
+   
+  
+ 
+ 
+  p (primary key)
+  
+   
+Create primary keys on standard tables.
+   
+  
+ 
+ 
+  f (foreign key)
+  
+   
+Create foreign keys constraints between the standard tables.
+   
+  
+ 
+
+   
+  
+ 
+
+ 
   -F fillfactor
   --fillfactor=fillfactor
   
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index e37496c..f6b0452 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -95,6 +95,8 @@ static int	pthread_join(pthread_t th, void **thread_return);
 
 #define MIN_GAUSSIAN_PARAM		2.0 /* minimum parameter for gauss */
 
+#define DEFAULT_INIT_COMMANDS "ctgvp"
+
 int			nxacts = 0;			/* number of transactions per client */
 int			duration = 0;		/* duration in seconds */
 int64		end_time = 0;		/* when to stop in micro seconds, under -T */
@@ -112,9 +114,9 @@ int			scale = 1;
 int			fillfactor = 100;
 
 /*
- * create foreign key constraints on the tables?
+ * no vacuum at all before testing?
  */
-int			foreign_keys = 0;
+bool			is_no_vacuum = false;
 
 /*
  * use unlogged tables?
@@ -458,6 +460,12 @@ static void pgbench_error(const char *fmt,...) pg_attribute_printf(1, 2);
 static void addScript(ParsedScript script);
 static void *threadRun(void *arg);
 static void setalarm(int seconds);
+static void initCleanupTables(PGconn *con);
+static void initCreateTables(PGconn *con);
+static void initLoadData(PGconn *con);
+static void initVacuum(PGconn *con);
+static void initCreatePKeys(PGconn *con);
+static void initCreateFKeys(PGconn *con);
 
 
 /* callback functions for our flex lexer */
@@ -484,6 +492,8 @@ usage(void)
 		   "   create indexes in the specified tablespace\n"
 		   "  --tablespace=TABLESPACE  create tables in the specified tablespace\n"
 		   "  --unlogged-tablescreate tables as unlogged tables\n"
+		   "  -I, --custom-initialize=[ctgvpf]+\n"
+		   "   initialize with custom initialization commands\n"
 		   "\nOptions to select what to run:\n"
 		   "  -b, --builtin=NAME[@W]   add builtin script NAME weighted at W (default: 1)\n"
 		   "   (use \"-b list\" to list available scripts)\n"
@@ -2605,9 +2615,55 @@ disconnect_all(CState *state, int length)
 	}
 }
 
-/* create tables and setup data */
+/* Check custom initialization commands */
+static void
+checkCustomCommands(char *commands)
+{
+	char	*cmd;
+
+	if (commands[0] == '\0')
+	{
+		fprintf(stderr, "initialize command is empty\n");
+		exit(1);
+	}
+
+	for (cmd = commands; *cmd != '\0'; cmd++)
+	{
+		if (strchr("ctgvpf ", *cmd) == NULL)

Re: [HACKERS] Parallel Append implementation

2017-09-05 Thread Amit Khandekar
On 30 August 2017 at 17:32, Amit Khandekar  wrote:
> On 16 August 2017 at 18:34, Robert Haas  wrote:
>> Thanks for the benchmarking results!
>>
>> On Tue, Aug 15, 2017 at 11:35 PM, Rafia Sabih
>>  wrote:
>>> Q4 | 244 | 12 | PA and PWJ, time by only PWJ - 41
>>
>> 12 seconds instead of 244?  Whoa.  I find it curious that we picked a
>> Parallel Append with a bunch of non-partial plans when we could've
>> just as easily picked partial plans, or so it seems to me.  To put
>> that another way, why did we end up with a bunch of Bitmap Heap Scans
>> here instead of Parallel Bitmap Heap Scans?
>
> Actually, the cost difference would be quite low for Parallel Append
> with partial plans and Parallel Append with non-partial plans with 2
> workers. But yes, I should take a look at why it is consistently
> taking non-partial Bitmap Heap Scan.

Here, I checked that Partial Bitmap Heap Scan Path is not getting
created in the first place; but I think it should.

As you can see from the below plan snippet, the inner path of the join
is a parameterized Index Scan :

->  Parallel Append
 ->  Nested Loop Semi Join
   ->  Bitmap Heap Scan on orders_004
   Recheck Cond: ((o_orderdate >= '1994-01-01'::date) AND
(o_orderdate < '1994-04-01 00:00:00'::timestamp without time zone))
   ->  Bitmap Index Scan on idx_orders_orderdate_004
Index Cond: ((o_orderdate >= '1994-01-01'::date) AND
(o_orderdate < '1994-04-01 00:00:00'::timestamp without time zone))
   ->  Index Scan using idx_lineitem_orderkey_004 on lineitem_004
   Index Cond: (l_orderkey = orders_004.o_orderkey)
   Filter: (l_commitdate < l_receiptdate)

In the index condition of the inner IndexScan path, it is referencing
partition order_004 which is used by the outer path. So this should
satisfy the partial join path restriction concerning parameterized
inner path : "inner path should not refer to relations *outside* the
join path". Here, it is referring to relations *inside* the join path.
But still this join path gets rejected by try_partial_nestloop_path(),
here :

if (inner_path->param_info != NULL)
{
   Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
   if (!bms_is_subset(inner_paramrels, outer_path->parent->relids))
  return;
}

Actually, bms_is_subset() above should return true, because
inner_paramrels and outer_path relids should have orders_004. But
that's not happening. inner_paramrels is referring to orders, not
orders_004. And hence bms_is_subset() returns false (thereby rejecting
the partial nestloop path). I suspect this is because the innerpath is
not getting reparameterized so as to refer to child relations. In the
PWJ patch, I saw that reparameterize_path_by_child() is called by
try_nestloop_path(), but not by try_partial_nestloop_path().

Now, for Parallel Append, if this partial nestloop subpath gets
created, it may or may not get chosen, depending upon the number of
workers. For e.g. if the number of workers is 6, and ParalleAppend+PWJ
runs with only 2 partitions, then partial nestedloop join would
definitely win because we can put all 6 workers to work, whereas for
ParallelAppend with all non-partial nested loop join subpaths, at the
most only 2 workers could be allotted, one for each child. But if the
partitions are more, and available workers are less, then I think the
cost difference in case of partial versus non-partial join paths would
not be significant.

But here the issue is, partial nest loop subpaths don't get created in
the first place. Looking at the above analysis, this issue should be
worked by a different thread, not in this one.

-- 
Thanks,
-Amit Khandekar
EnterpriseDB Corporation
The Postgres Database Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [bug fix] Savepoint-related statements terminates connection

2017-09-05 Thread Catalin Iacob
On Mon, Sep 4, 2017 at 4:15 PM, Tom Lane  wrote:
> Also, the main thing that we need xact.c's involvement for in the first
> place is the fact that implicit transaction blocks, unlike regular ones,
> auto-cancel on an error, leaving you outside a block not inside a failed
> one.  So I don't exactly see how savepoints would fit into that.

I think this hits the nail on the head and should have a place in the
official docs as I now realize I didn't grasp this distinction before
I read this. My mental model was always "sending a bunch of semicolon
separated queries without BEGIN/COMMIT/ROLLBACK; in one PQexec is like
sending them one by one preceeded by a BEGIN; and followed by a
COMMIT; except you only get the response from the last one". Also,
explain what happens when there are BEGIN/ROLLBACK/COMMIT inside that
multiquery string, that's still not completely clear to me and I don't
want to reverse engineer it from your patch.

> Now admittedly, the same set of issues pops up if one uses an
> explicit transaction block in a multi-query string:
>
> begin\; insert ...\; savepoint\; insert ...\; release savepoint\; insert 
> ...\; commit;

According to my mental model described above, this would be exactly
the same as without the begin; and commit; which is not the case so I
think the distinction is worth explaining.

I think the lack of a more detailed explanation about the stuff above
confuses *a lot* of people, especially newcomers, and the confusion is
only increased by what client drivers do on top (like issuing implicit
BEGIN if configured in various modes specified by
language-specific-DB-independent specs like Python's DBAPI or Java's
JDBC) and one's background from other DBs that do it differently.

Speaking of the above, psql also doesn't explicitly document how it
groups lines of the file it's executing into PQexec calls. See below
for a personal example of the confusions all this generates.

I also encountered this FATAL a month ago in the context of "we have
some (migration schema) queries in some files and want to orchestrate
running them for testing". Initially we started with calling psql but
then we needed some client side logic for some other stuff and
switched to Python and Psycopg2. We did "read the whole file in a
Python string" and then call Psycopg2's execute() on that string. Note
that Psycopg2 only uses PQexec to issue queries. We had some SAVEPOINT
statements in the file which lead to the backend stopping and the next
Psycopg2 execute() on that connection saying Connection closed.
It was already confusing why Psycopg2 behaves differently than psql
(because we were issuing the whole file in one PQexec vs. psql
splitting on ; and issuing multiple PQexecs and SAVEPOINTs working
there) and the backend stopping only added to that confusion. Add on
top of that "Should we put BEGIN; and COMMIT; in the file itself? Or
is a single Psycopg2 execute() enough to have this schema migration be
applied transactionally? Is there a difference between the two?".

I searched the docs for existing explanations of multiquery strings
and found these references but all of them are a bit hand wavy:
- psql's reference explaining -c
- libpq's PQexec explanation
- the message flow document in the FE/BE protocol description


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Replacing lfirst() with lfirst_node() appropriately in planner.c

2017-09-05 Thread Ashutosh Bapat
On Wed, Sep 6, 2017 at 1:32 AM, Tom Lane  wrote:
> Ashutosh Bapat  writes:
>> On Tue, Sep 5, 2017 at 4:02 PM, Jeevan Chalke
>>  wrote:
>>> Attached patch for replacing linitial() with linital_node() appropriately in
>>> planner.c
>
>> Ok. The patch looks good to me. It compiles and make check passes.
>> Here are both the patches rebased on the latest sources.
>
> LGTM, pushed.  I also changed a couple of places that left off any cast
> of lfirst, eg:
>
> -   List   *gset = lfirst(lc);
> +   List   *gset = (List *) lfirst(lc);
>
> While this isn't really harmful, it's not per prevailing style.

+1. Thanks.

>
> BTW, I think we *could* use "lfirst_node(List, ...)" in cases where
> we know the list is supposed to be a list of objects rather than ints
> or Oids.  I didn't do anything about that observation, though.
>

IMO, it won't be apparent as to why some code uses lfirst_node(List,
...) and some code uses (List *) lfirst().

-- 
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Making clausesel.c Smarter

2017-09-05 Thread David Rowley
On 6 September 2017 at 00:43, Daniel Gustafsson  wrote:
> This patch was moved to the currently open Commitfest.  Given the above
> comment, is the last patch in this thread still up for review, or are you
> working on a new version?  Just to avoid anyone reviewing an outdated version.

Hi Daniel,

I've not had time to work on a new version yet and I can't imagine
that I will for quite some time.

The idea I had to remove the quadratic search on the list was to add
to or modify equal() in equalfuncs.c to have it return -1, 0, +1 and
use that as a comparison function in a binary search tree. The Btree
would complement List as a way of storing Nodes in no particular
order, but in a way that a Node can be found quickly. There's likely
more than a handful of places in the planner that would benefit from
this already.

It's not a 5-minute patch and it probably would see some resistance,
so won't have time to look at this soon.

If the possibility of this increasing planning time in corner cases is
going to be a problem, then it might be best to return this with
feedback for now and I'll resubmit if I get time later in the cycle.

-- 
 David Rowley   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Coverage improvements of src/bin/pg_basebackup and pg_receivewal --endpos

2017-09-05 Thread Michael Paquier
On Tue, Sep 5, 2017 at 10:19 PM, Peter Eisentraut
 wrote:
> On 6/9/17 02:08, Michael Paquier wrote:
>> I have just played with that, and attached is a patch to implement the
>> so-said option with a basic set of tests, increasing code coverage of
>> pg_receivewal.c from 15% to 60%. I'll park that in the next CF of
>> September.
>
> Looks like a good idea.  A couple of thoughts:

Thanks.

> - I think the tests should be written in the style of
> $node->command_fails instead of just command_fails.  At least, that's
> how it's done in the pg_basebackup tests.

Good idea.

> - I think the tests will fail if libz support is not built.  Again,
> pg_basebackup doesn't have tests for that.  So maybe omit that and
> address it later.

Let's address it now. This can be countered by querying pg_config()
before running the command of pg_receivexwal which uses --compress.

> - The variable exit_on_stop seems redundant with time_to_abort.  They do
> the same thing, so maybe your patch could reuse it.

Yes, that's doable. time_to_abort does not seem a variable name
adapted to me though if both are combined, so I have renamed that to
time_to_stop, which maps more with the fact that stop can be also
willingly wanted without a SIGINT.

Attached is a new version of the patch.
-- 
Michael
From 7987b412b3b16f8995ccbe39c7de0e6762aa964d Mon Sep 17 00:00:00 2001
From: Michael Paquier 
Date: Wed, 6 Sep 2017 13:39:30 +0900
Subject: [PATCH] Implement pg_receivewal --endpos

This is primarily useful in making any regression test using this utility
more deterministic as pg_receivewal cannot be started as a deamon in TAP
tests.

While this is less useful than the equivalent of pg_recvlogical, users
can as well use it for example to enforce WAL streaming up to a
end-of-backup position, to save only a minimal amount of WAL, though the
end position of WAL is only known once the backup is finished, but some
users may want to get that done as a two-step process with one single WAL
receiver in use all the time.

Use this new option to stream WAL data in a deterministic way within a
new set of TAP tests.
---
 doc/src/sgml/ref/pg_receivewal.sgml  | 16 ++
 src/bin/pg_basebackup/pg_receivewal.c| 38 +++---
 src/bin/pg_basebackup/t/020_pg_receivewal.pl | 75 +++-
 3 files changed, 121 insertions(+), 8 deletions(-)

diff --git a/doc/src/sgml/ref/pg_receivewal.sgml b/doc/src/sgml/ref/pg_receivewal.sgml
index 7c82e36c7c..6d192bd606 100644
--- a/doc/src/sgml/ref/pg_receivewal.sgml
+++ b/doc/src/sgml/ref/pg_receivewal.sgml
@@ -98,6 +98,22 @@ PostgreSQL documentation
   
  
 
+ 
+  -E lsn
+  --endpos=lsn
+  
+   
+If specified, automatically stop replication and exit with normal
+exit status 0 when receiving reaches the specified LSN.
+   
+
+   
+If there is a record with LSN exactly equal to lsn,
+the record will be considered.
+   
+  
+ 
+
  
   --if-not-exists
   
diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
index 4a1a5658fb..93b396c10f 100644
--- a/src/bin/pg_basebackup/pg_receivewal.c
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -36,12 +36,13 @@ static int	verbose = 0;
 static int	compresslevel = 0;
 static int	noloop = 0;
 static int	standby_message_timeout = 10 * 1000;	/* 10 sec = default */
-static volatile bool time_to_abort = false;
+static volatile bool time_to_stop = false;
 static bool do_create_slot = false;
 static bool slot_exists_ok = false;
 static bool do_drop_slot = false;
 static bool synchronous = false;
 static char *replication_slot = NULL;
+static XLogRecPtr endpos = InvalidXLogRecPtr;
 
 
 static void usage(void);
@@ -78,6 +79,7 @@ usage(void)
 	printf(_("\nOptions:\n"));
 	printf(_("  -D, --directory=DIRreceive write-ahead log files into this directory\n"));
 	printf(_("  --if-not-existsdo not error if slot already exists when creating a slot\n"));
+	printf(_("  -E, --endpos=LSN   exit after receiving the specified LSN\n"));
 	printf(_("  -n, --no-loop  do not loop on connection lost\n"));
 	printf(_("  -s, --status-interval=SECS\n"
 			 " time between status packets sent to server (default: %d)\n"), (standby_message_timeout / 1000));
@@ -112,6 +114,16 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished)
 progname, (uint32) (xlogpos >> 32), (uint32) xlogpos,
 timeline);
 
+	if (!XLogRecPtrIsInvalid(endpos) && endpos < xlogpos)
+	{
+		if (verbose)
+			fprintf(stderr, _("%s: stopped streaming at %X/%X (timeline %u)\n"),
+	progname, (uint32) (xlogpos >> 32), (uint32) xlogpos,
+	timeline);
+		time_to_stop = true;
+		return true;
+	}
+
 	/*
 	 * Note that we report the previous, not current, position here. After a
 	 * timeline switch, xlogpos points to the beginning of the segment because
@@ -128,7 

[HACKERS] GatherMerge misses to push target list

2017-09-05 Thread Amit Kapila
During my recent work on costing of parallel paths [1], I noticed that
we are missing to push target list below GatherMerge in some simple
cases like below.

Test prepration
-
create or replace function simple_func(var1 integer) returns integer
as $$
begin
return var1 + 10;
end;
$$ language plpgsql PARALLEL SAFE;

create table t1(c1 int, c2 char(5));
insert into t1 values(generate_series(1,50), 'aaa');
set parallel_setup_cost=0;
set parallel_tuple_cost=0;
set min_parallel_table_scan_size=0;
set max_parallel_workers_per_gather=4;
set cpu_operator_cost=0; set min_parallel_index_scan_size=0;

Case-1
-
postgres=# explain (costs off, verbose) select c1,simple_func(c1) from
t1 where c1 < 1 order by c1;
 QUERY PLAN
-
 Gather Merge
   Output: c1, simple_func(c1)
   Workers Planned: 4
   ->  Parallel Index Scan using idx_t1 on public.t1
 Output: c1
 Index Cond: (t1.c1 < 1)
(6 rows)

In the above case, I don't see any reason why we can't push the target
list expression (simple_func(c1)) down to workers.

Case-2
--
set enable_indexonlyscan=off;
set enable_indexscan=off;
postgres=# explain (costs off, verbose) select c1,simple_func(c1) from
t1 where c1 < 1 order by c1;
 QUERY PLAN

 Gather Merge
   Output: c1, simple_func(c1)
   Workers Planned: 4
   ->  Sort
 Output: c1
 Sort Key: t1.c1
 ->  Parallel Bitmap Heap Scan on public.t1
   Output: c1
   Recheck Cond: (t1.c1 < 1)
   ->  Bitmap Index Scan on idx_t1
 Index Cond: (t1.c1 < 1)
(11 rows)

It is different from above case (Case-1) because sort node can't
project, but I think adding a Result node on top of it can help to
project and serve our purpose.

The attached patch allows pushing the target list expression in both
the cases. I think we should handle GatherMerge case in
apply_projection_to_path similar to Gather so that target list can be
pushed.  Another problem was during the creation of ordered paths
where we don't allow target expressions to be pushed below gather
merge.

Plans after patch:

Case-1
--
postgres=# explain (costs off, verbose) select c1,simple_func(c1) from
t1 where c1 < 1 order by c1;
QUERY PLAN
--
 Gather Merge
   Output: c1, (simple_func(c1))
   Workers Planned: 4
   ->  Parallel Index Only Scan using idx_t1 on public.t1
 Output: c1, simple_func(c1)
 Index Cond: (t1.c1 < 1)
(6 rows)

Case-2
---
postgres=# explain (costs off, verbose) select c1,simple_func(c1) from
t1 where c1 < 1 order by c1;
QUERY PLAN
--
 Gather Merge
   Output: c1, (simple_func(c1))
   Workers Planned: 4
   ->  Result
 Output: c1, simple_func(c1)
 ->  Sort
   Output: c1
   Sort Key: t1.c1
   ->  Parallel Bitmap Heap Scan on public.t1
 Output: c1
 Recheck Cond: (t1.c1 < 1)
 ->  Bitmap Index Scan on idx_t1
   Index Cond: (t1.c1 < 1)
(13 rows)

Note, that simple_func() is pushed down to workers in both the cases.

Thoughts?

Note - If we agree on the problems and fix, then I can add regression
tests to cover above cases in the patch.

[1] - 
https://www.postgresql.org/message-id/CAA4eK1Ji_0pgrjFotAyvvfxGikxJQEKcxD863VQ-xYtfQBy0uQ%40mail.gmail.com

-- 
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


pushdown_target_gathermerge_v1.patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Fix performance of generic atomics

2017-09-05 Thread Tom Lane
Jeff Janes  writes:
> What scale factor and client count? How many cores per socket?  It looks
> like Sokolov was just starting to see gains at 200 clients on 72 cores,
> using -N transaction.

I spent some time poking at this with the test scaffolding I showed at
https://www.postgresql.org/message-id/17694.1504665058%40sss.pgh.pa.us

AFAICT, there are not huge differences between different coding methods
even for two processes in direct competition in the tightest possible
atomic-ops loops.  So if you're testing SQL operations, you're definitely
not going to see anything without a whole lot of concurrent processes.

Moreover, it matters which primitive you're testing, on which platform,
with which compiler, because we have a couple of layers of atomic ops
implementations.

I started out testing pg_atomic_fetch_add_u32(), as shown in the above-
mentioned message.  On x86_x64 with gcc, that is implemented by the
code for it in src/include/port/atomics/arch-x86.h.  If you dike out
that support, it falls back to the version in atomics/generic-gcc.h,
which seems no worse and possibly better.  Only if you also dike out
generic-gcc.h do you get to the version in atomics/generic.h that this
patch wants to change.  However, at that point you're also down to a
spinlock-based implementation of the underlying pg_atomic_read_u32_impl
and pg_atomic_compare_exchange_u32_impl, which means that performance
is going to be less than great anyway.  No platform that we consider
well-supported ought to be hitting the spinlock paths.  This means
that Sokolov's proposed changes in atomics/generic.h ought to be
uninteresting for performance on any platform we care about --- at
least for pg_atomic_fetch_add_u32().

However, Sokolov also proposes adding gcc-intrinsic support for
pg_atomic_fetch_and_xxx and pg_atomic_fetch_or_xxx.  This is a
different kettle of fish.  I repeated the experiments I'd done
for pg_atomic_fetch_add_u32(), per the above message, using the "or"
primitive, and found largely the same results as for "add": it's
slightly better under contention than the generic code, and
significantly better if the result value of the atomic op isn't needed.

So I think we should definitely take the part of the patch that
changes generic-gcc.h --- and we should check to see if we're missing
out on any other gcc primitives we should be using there.

I'm less excited about the proposed changes in generic.h.  There's
nothing wrong with them in principle, but they mostly shouldn't
make any performance difference on any platform we care about,
because we shouldn't get to that code in the first place on any
platform we care about.  If we are getting to that code for any
specific primitive, then either there's a gap in the platform-specific
or compiler-specific support, or it's debatable that we ought to
consider that primitive to be very primitive.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] proposal psql \gdesc

2017-09-05 Thread Pavel Stehule
2017-09-06 0:18 GMT+02:00 Tom Lane :

> Fabien COELHO  writes:
> > I did yet another rebase of your patch after Tom alphabetically ordered
> > backslash commands. Here is the result.
>
> Pushed with some massaging.
>

Thank you  very much

>
> regards, tom lane
>


Re: [HACKERS] pg_stat_wal_write statistics view

2017-09-05 Thread Haribabu Kommi
On Tue, Aug 15, 2017 at 7:39 AM, Peter Eisentraut <
peter.eisentr...@2ndquadrant.com> wrote:

> On 3/29/17 22:10, Haribabu Kommi wrote:
> > Updated patch to use shared counter instead of adding pg_stat_ calls to
> send
> > the statistics from each background process/worker.
>
> Your patch needs to be rebased and the OID assignments updated.
>

Rebased patch is attached. And also it has taken care of the comments
provided
by Andres and Fuji Masao in the upthread.

I separated the walwriter statistics from the other background processes to
check
how much load does the other processes are really contributing to wal
writing.

Following is the test results during my performance test.

 writes | walwriter_writes | backend_writes | dirty_writes |
walwriter_dirty_writes | backend_dirty_writes | write_blocks |
walwriter_write_blocks | backend_write_blocks | write_time |
walwriter_write_time | backend_write_time | sync_time | walwriter_sync_time
| backend_sync_time |  stats_reset
+--++--++--+--++--++--++---+-+---+---
  0 | 17748394 | 1383789657 |0 |
   0 |0 |0 |   21153194 |
3039806652 |  0 |0 |  0
| 0 |   259250230 |   17262560725 | 2017-09-05
18:22:41.087955+10
(1 row)

I didn't find any other background processes contribution to the WAL
writing activity.
May be we can combine them with backend stats?

I ran the performance test on this patch with pgbench to find out the
impact of these
changes. Because of my low end machine, the performance results are varying
too
much, I will try to get the performance figures from an high end machine by
that time.

Attached the latest patch and performance report.

Regards,
Hari Babu
Fujitsu Australia


0001-pg_stat_walwrites-statistics-view_v6.patch
Description: Binary data


pg_stat_walwrites_perf.xlsx
Description: MS-Excel 2007 spreadsheet

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] atomics/arch-x86.h is stupider than atomics/generic-gcc.h?

2017-09-05 Thread Tom Lane
I spent some time trying to devise a suitable performance microbenchmark
for the atomic ops, in pursuit of whether the proposal at
https://commitfest.postgresql.org/14/1154/
is worth doing.  I came up with the attached very simple-minded test
case, which you run with something like

create function my_test_atomic_ops(bigint) returns int
strict volatile language c as '/path/to/atomic-perf-test.so';

\timing

select my_test_atomic_ops(10);

The performance of a single process running this is interesting, but
only mildly so: what we want to know about is what happens when you
run two or more calls concurrently.

On my primary server, dual quad-core Xeon E5-2609 @ 2.4GHz, RHEL6
(so gcc version 4.4.7 20120313 (Red Hat 4.4.7-18)), in a disable-cassert
build, I see that a single process running the 1G-iterations case
repeatably takes about 9600ms.  Two competing processes take roughly
1 minute to do twice as much work.  (The two processes tend to finish
at significantly different times, indicating that this box's method
for resolving bus conflicts isn't 100% fair.  I'm taking the average
of the two runtimes as a representative number.)

This is with no source-code changes, meaning that what I'm testing is
arch-x86.h's version of pg_atomic_fetch_add_u32, which compiles to
basically

lock
xaddl   %eax,(%rdx)

I then diked out that version, so that the build fell back to
generic-gcc.h's version of the function.  With the test program
as attached, the inner loop is basically the same, and so is the
runtime.  But what I was testing before that was a version that
ignored the result of pg_atomic_fetch_add_u32,

while (count-- > 0)
{
(void) pg_atomic_fetch_add_u32(myptr, 1);
}

and what I was quite surprised to see was a single-thread time of
9600ms and a two-thread time of ~40s.  The reason was not too far
to seek: gcc is smart enough to notice that it doesn't need the
result of pg_atomic_fetch_add_u32, and so it compiles that to just

lock addl   $1, (%rax)

which is evidently significantly more efficient than the xaddl under
contention load.

Or in words of one syllable: at least for pg_atomic_fetch_add_u32,
we are working hard in atomics/arch-x86.h to get worse code than
gcc would give us natively.  (And, in case you didn't notice, this
is far from the latest and shiniest gcc.)

This case is not to be dismissed as insignificant either, since of the
three non-test occurrences of pg_atomic_fetch_add_u32 in our tree, two
ignore the result.

So I think we'd be well advised to cast a doubtful eye at the asm
constructs we've got here, and figure out which ones are really
meaningfully smarter than gcc's primitives.

regards, tom lane

#include "postgres.h"

#include "fmgr.h"
#include "storage/lwlock.h"
#include "storage/shmem.h"


PG_MODULE_MAGIC;

static pg_atomic_uint32 *globptr = NULL;

int32 globjunk = 0;

PG_FUNCTION_INFO_V1(my_test_atomic_ops);

Datum
my_test_atomic_ops(PG_FUNCTION_ARGS)
{
	int64		count = PG_GETARG_INT64(0);
	int32 result;
	pg_atomic_uint32 *myptr;
	int32 junk = 0;

	if (globptr == NULL)
	{
		/* First time through in this process; find shared memory */
		bool		found;

		LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);

		globptr = ShmemInitStruct("my_test_atomic_ops",
sizeof(*globptr),
);

		if (!found)
		{
			/* First time through anywhere */
			pg_atomic_init_u32(globptr, 0);
		}

		LWLockRelease(AddinShmemInitLock);
	}

	myptr = globptr;

	while (count-- > 0)
	{
		junk += pg_atomic_fetch_add_u32(myptr, 1);
	}

	globjunk += junk;

	result = pg_atomic_read_u32(myptr);

	PG_RETURN_INT32(result);
}

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] increasing the default WAL segment size

2017-09-05 Thread Andres Freund
Hi,

I was looking to commit this, but the changes I made ended up being
pretty large. Here's what I changed in the attached:
- split GUC_UNIT_BYTE into a separate commit, squashed rest
- renamed GUC_UNIT_BYT to GUC_UNIT_BYTE, don't see why we'd have such a
  weird abbreviation?
- bumped control file version, otherwise things wouldn't work correctly
- wal_segment_size text still said "Shows the number of pages per write
  ahead log segment."
- I still feel strongly that exporting XLogSegSize, which previously was
  a macro and now a integer variable, is a bad idea. Hence I've renamed
  it to wal_segment_size.
- There still were comments referencing XLOG_SEG_SIZE
- IsPowerOf2 regarded 0 as a valid power of two
- ConvertToXSegs() depended on a variable not passed as arg, bad idea.
- As previously mentioned, I don't think it's ok to rely on vars like
  XLogSegSize to be defined both in backend and frontend code.
- I don't think XLogReader can rely on XLogSegSize, needs to be
  parametrized.
- pg_rewind exported another copy of extern int XLogSegSize
- streamutil.h had a extern uint32 WalSegsz; but used
  RetrieveXlogSegSize, that seems needlessly different
- moved wal_segment_size (aka XLogSegSize) to xlog.h
- pg_standby included xlogreader, not sure why?
- MaxSegmentsPerLogFile still had a conflicting naming scheme
- you'd included "sys/stat.h", that's not really appropriate for system
  headers, should be  (and then grouped w/ rest)
- pg_controldata's warning about an invalid segsize missed newlines

Unresolved:
- this needs some new performance tests, the number of added instructions
  isn't trivial. Don't think there's anything, but ...
- read through it again, check long lines
- pg_standby's RetrieveWALSegSize() does too much for it's name. It
  seems quite weird that a function named that way has the section below
  "/* check if clean up is necessary */"
- the way you redid the ReadControlFile() invocation doesn't quite seem
  right. Consider what happens if XLOGbuffers isn't -1 - then we
  wouldn't read the control file, but you unconditionally copy it in
  XLOGShmemInit(). I think we instead should introduce something like
  XLOGPreShmemInit() that reads the control file unless in bootstrap
  mode. Then get rid of the second ReadControlFile() already present.
- In pg_resetwal.c:ReadControlFile() we ignore the file contents if
  there's an invalid segment size, but accept the contents as guessed if
  there's a crc failure - that seems a bit weird?
- verify EXEC_BACKEND does the right thing
- not this commit/patch, but XLogReadDetermineTimeline() could really
  use some simplifying of repetitive expresssions
- XLOGShmemInit shouldn't memcpy to temp_cfile and such, why not just
  save previous pointer in a local variable?
- could you fill in the Reviewed-By: line in the commit message?

Running out of concentration / time now.

- Andres
>From 15d16b8e2146b0491e8b64e780227424162dd784 Mon Sep 17 00:00:00 2001
From: Andres Freund 
Date: Tue, 5 Sep 2017 13:26:55 -0700
Subject: [PATCH 1/2] Introduce BYTES unit for GUCs.

This is already useful for track_activity_query_size, and will further
be used in followup commits.

Author: Beena Emerson
Reviewed-By: Andres Freund
Discussion: https://postgr.es/m/caog9apeu8bxvwbxkoo9j7zpm76task_vfmeeicejwhmmsli...@mail.gmail.com
---
 src/backend/utils/misc/guc.c | 14 +-
 src/include/utils/guc.h  |  1 +
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 246fea8693..25da06fffc 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -722,6 +722,11 @@ static const char *memory_units_hint = gettext_noop("Valid units for this parame
 
 static const unit_conversion memory_unit_conversion_table[] =
 {
+	{"GB", GUC_UNIT_BYTE, 1024 * 1024 * 1024},
+	{"MB", GUC_UNIT_BYTE, 1024 * 1024},
+	{"kB", GUC_UNIT_BYTE, 1024},
+	{"B", GUC_UNIT_BYTE, 1},
+
 	{"TB", GUC_UNIT_KB, 1024 * 1024 * 1024},
 	{"GB", GUC_UNIT_KB, 1024 * 1024},
 	{"MB", GUC_UNIT_KB, 1024},
@@ -2863,11 +2868,7 @@ static struct config_int ConfigureNamesInt[] =
 		{"track_activity_query_size", PGC_POSTMASTER, RESOURCES_MEM,
 			gettext_noop("Sets the size reserved for pg_stat_activity.query, in bytes."),
 			NULL,
-
-			/*
-			 * There is no _bytes_ unit, so the user can't supply units for
-			 * this.
-			 */
+			GUC_UNIT_BYTE
 		},
 		_track_activity_query_size,
 		1024, 100, 102400,
@@ -8113,6 +8114,9 @@ GetConfigOptionByNum(int varnum, const char **values, bool *noshow)
 	{
 		switch (conf->flags & (GUC_UNIT_MEMORY | GUC_UNIT_TIME))
 		{
+			case GUC_UNIT_BYTE:
+values[2] = "B";
+break;
 			case GUC_UNIT_KB:
 values[2] = "kB";
 break;
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index c1870d2130..467125a09d 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -219,6 +219,7 @@ typedef enum
 #define GUC_UNIT_BLOCKS			0x2000	/* value is 

Re: [HACKERS] Copyright in partition.h and partition.c

2017-09-05 Thread Amit Langote
On 2017/09/05 21:14, Tom Lane wrote:
> Amit Langote  writes:
>> On 2017/09/05 15:48, Etsuro Fujita wrote:
>>> Here is the copyright in partition.h:
>>>
>>>  * Copyright (c) 2007-2017, PostgreSQL Global Development Group
>>>
>>> I think it's reasonable that that matches the copyright in partition.c,
>>> but partition.c has:
>>>
>>>  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
>>>  * Portions Copyright (c) 1994, Regents of the University of California
>>>
>>> Is that intentional?
> 
>> No, it's unintentional.  The difference may have resulted from copying
>> different files to become partition.h and partition.c, respectively.
> 
>> Maybe, we should change both to say 2016-2017?
> 
>> I don't know the exact rule for how we determine those years.  Is there
>> some rule in place about that?  When I look at execParallel.c, which
>> supposedly got introduced into the tree recently, I see 1996-2017.  OTOH,
>> the files in contrib/bloom all have 2016-2017.
> 
> Our usual practice is to write the copyright like it is in partition.c
> even in new files.  This avoids any question about whether any of the
> code was copied-and-pasted from somewhere else in PG.  Even if not one
> word in the file can be traced to code that was somewhere else before,
> it seems to me that this is an appropriate thing to do, to give due
> credit to those who came before us.

Agreed.

> In short: we should make partition.h's copyright look like partition.c's
> not vice versa.

Attached patch does that.

Thanks,
Amit
diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h
index 2283c675e9..619e4c9cc8 100644
--- a/src/include/catalog/partition.h
+++ b/src/include/catalog/partition.h
@@ -4,7 +4,8 @@
  * Header file for structures and utility functions related to
  * partitioning
  *
- * Copyright (c) 2007-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
  *
  * src/include/catalog/partition.h
  *

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Fix performance of generic atomics

2017-09-05 Thread Jesper Pedersen

Hi,

On 05/25/2017 11:12 AM, Sokolov Yura wrote:

I agree that lonely semicolon looks bad.
Applied your suggestion for empty loop body (/* skip */).

Patch in first letter had while(true), but I removed it cause
I think it is uglier:
- `while(true)` was necessary for grouping read with `if`,
- but now there is single statement in a loop body and it is
   condition for loop exit, so it is clearly just a loop.

Optimization is valid cause compare_exchange always store old value
in `old` variable in a same atomic manner as atomic read.



I have tested this patch on a 2-socket machine, but don't see any 
performance change in the various runs. However, there is no regression 
either in all cases.


As such, I have marked the entry "Ready for Committer".

Remember to add a version postfix to your patches such that is easy to 
identify which is the latest version.


Best regards,
 Jesper


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] DROP SUBSCRIPTION hangs if sub is disabled in the same transaction

2017-09-05 Thread Arseny Sher
Arseny Sher  writes:

> Attached patch fixes this by stopping workers before RO drop, as
> already done in case when we drop replication slot.

Sorry, here is the patch.
>From 008d54dfe2e8ba610bb7c897cfbb4ee7a700aa2e Mon Sep 17 00:00:00 2001
From: Arseny Sher 
Date: Mon, 4 Sep 2017 16:55:08 +0300
Subject: [PATCH] Stop LR workers before dropping replication origin.

Previously, executing ALTER SUBSCRIPTION DISABLE and DROP SUBSCRIPTION in one
transaction would hang forever, because worker didn't stop until transaction is
commited, and transaction couldn't commit before worker exit since the latter
occupies ReplicationState.
---
 src/backend/commands/subscriptioncmds.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index 77620dfd42..5d608855a1 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -886,6 +886,10 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel)
 	else
 		slotname = NULL;
 
+	/* Get originid */
+	snprintf(originname, sizeof(originname), "pg_%u", subid);
+	originid = replorigin_by_name(originname, true);
+
 	/*
 	 * Since dropping a replication slot is not transactional, the replication
 	 * slot stays dropped even if the transaction rolls back.  So we cannot
@@ -909,9 +913,10 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel)
 	ReleaseSysCache(tup);
 
 	/*
-	 * If we are dropping the replication slot, stop all the subscription
-	 * workers immediately, so that the slot becomes accessible.  Otherwise
-	 * just schedule the stopping for the end of the transaction.
+	 * We stop all subscription workers immediately in two cases:
+	 * - We are dropping the replication slot, to make the slot accessible;
+	 * - We are dropping repl origin tracking, to release ReplicationState;
+	 * Otherwise just schedule the stopping for the end of the transaction.
 	 *
 	 * New workers won't be started because we hold an exclusive lock on the
 	 * subscription till the end of the transaction.
@@ -923,7 +928,7 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel)
 	{
 		LogicalRepWorker *w = (LogicalRepWorker *) lfirst(lc);
 
-		if (slotname)
+		if (slotname || originid != InvalidRepOriginId)
 			logicalrep_worker_stop(w->subid, w->relid);
 		else
 			logicalrep_worker_stop_at_commit(w->subid, w->relid);
@@ -937,8 +942,6 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel)
 	RemoveSubscriptionRel(subid, InvalidOid);
 
 	/* Remove the origin tracking if exists. */
-	snprintf(originname, sizeof(originname), "pg_%u", subid);
-	originid = replorigin_by_name(originname, true);
 	if (originid != InvalidRepOriginId)
 		replorigin_drop(originid, false);
 
-- 
2.11.0



--
Arseny Sher
Postgres Professional: https://postgrespro.ru
The Russian Postgres Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] DROP SUBSCRIPTION hangs if sub is disabled in the same transaction

2017-09-05 Thread Arseny Sher
Hello,

Currently, DROP SUBSCRIPTION on REL_10_STABLE would block forever in the
following setup:

node 1:
create table t (i int);
create publication p for table t;

node 2:
create table t (i int);
create subscription s CONNECTION 'port=5432' publication p;
begin;
alter subscription s disable ;
alter subscription s set (slot_name = none);
drop subscription s;
end;

It hangs in replorigin_drop because we wait until ReplicationState is
released. This should happen on exit of worker, but worker will not exit
until transaction commit because he doesn't see that the sub was
disabled.

Attached patch fixes this by stopping workers before RO drop, as
already done in case when we drop replication slot. I am not sure
whether this is a proper solution, but for now I don't see any problems
with early stop of workers: even if transaction later aborts, launcher
will happily restart them, isn't it?


--
Arseny Sher
Postgres Professional: https://postgrespro.ru
The Russian Postgres Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup behavior on non-existent slot

2017-09-05 Thread Magnus Hagander
On Mon, Sep 4, 2017 at 3:21 PM, Jeff Janes  wrote:

>
> If I tell pg_basebackup to use a non-existent slot, it immediately reports
> an error.  And then it exits with an error, but only after streaming the
> entire database contents.
>
> If you are doing this interactively and are on the ball, of course, you
> can hit ctrl-C when you see the error message.
>
> I don't know if this is exactly a bug, but it seems rather unfortunate.
>

I think that should qualify as a bug.

In 10 it will automatically create a transient slot in this case, but there
might still be a case where you can provoke this.



> Should the parent process of pg_basebackup be made to respond to SIGCHLD?
> Or call waitpid(bgchild, , WNOHANG) in some strategic loop?
>

I think it's ok to just call waitpid() -- we don't need to react super
quickly, but we should react. And we should then exit the main process with
an error before actually streaming everything.

-- 
 Magnus Hagander
 Me: https://www.hagander.net/ 
 Work: https://www.redpill-linpro.com/ 


Re: [HACKERS] [Proposal] Allow users to specify multiple tables in VACUUM commands

2017-09-05 Thread Michael Paquier
On Wed, Sep 6, 2017 at 2:36 AM, Bossart, Nathan  wrote:
> On 9/4/17, 8:16 PM, "Michael Paquier"  wrote:
>> So I would tend to think that the same column specified multiple times
>> should cause an error, and that we could let VACUUM run work N times
>> on a relation if it is specified this much. This feels more natural,
>> at least to me, and it keeps the code simple.
>
> I think that is a reasonable approach.  Another option I was thinking
> about was to de-duplicate only the individual column lists.  This
> alternative approach might be a bit more user-friendly, but I am
> beginning to agree with you that perhaps we should not try to infer
> the intent of the user in these "duplicate" scenarios.
>
> I'll work on converting the existing de-duplication patch into
> something more like what you suggested.

Cool. I'll look at anything you have.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] proposal psql \gdesc

2017-09-05 Thread Tom Lane
Fabien COELHO  writes:
> I did yet another rebase of your patch after Tom alphabetically ordered 
> backslash commands. Here is the result.

Pushed with some massaging.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal for changes to recovery.conf API

2017-09-05 Thread Michael Paquier
On Wed, Sep 6, 2017 at 12:31 AM, Simon Riggs  wrote:
> I've not worked on this at all, so it isn't ready for re-review and commit.
>
> I get the "lets do it early" thing and will try to extract a subset in 
> October.

Nice to see that you are still planning to work on that. I would
suggest to move this item to the next open CF then, with "waiting on
author" as status.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Replacing lfirst() with lfirst_node() appropriately in planner.c

2017-09-05 Thread Tom Lane
Ashutosh Bapat  writes:
> On Tue, Sep 5, 2017 at 4:02 PM, Jeevan Chalke
>  wrote:
>> Attached patch for replacing linitial() with linital_node() appropriately in
>> planner.c

> Ok. The patch looks good to me. It compiles and make check passes.
> Here are both the patches rebased on the latest sources.

LGTM, pushed.  I also changed a couple of places that left off any cast
of lfirst, eg:

-   List   *gset = lfirst(lc);
+   List   *gset = (List *) lfirst(lc);

While this isn't really harmful, it's not per prevailing style.

BTW, I think we *could* use "lfirst_node(List, ...)" in cases where
we know the list is supposed to be a list of objects rather than ints
or Oids.  I didn't do anything about that observation, though.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Fix performance of generic atomics

2017-09-05 Thread Jeff Janes
On Tue, Sep 5, 2017 at 11:39 AM, Jesper Pedersen  wrote:

> On 09/05/2017 02:24 PM, Tom Lane wrote:
>
>> Jesper Pedersen  writes:
>>
>>> I have tested this patch on a 2-socket machine, but don't see any
>>> performance change in the various runs. However, there is no regression
>>> either in all cases.
>>>
>>
>> Hm, so if we can't demonstrate a performance win, it's hard to justify
>> risking touching this code.  What test case(s) did you use?
>>
>>
> I ran pgbench (-M prepared) with synchronous_commit 'on' and 'off' using
> both logged and unlogged tables. Also ran an internal benchmark which
> didn't show anything either.
>

What scale factor and client count? How many cores per socket?  It looks
like Sokolov was just starting to see gains at 200 clients on 72 cores,
using -N transaction.

Cheers,

Jeff


Re: [HACKERS] assorted code cleanup

2017-09-05 Thread Tom Lane
Peter Eisentraut  writes:
> Do you mean specifically the hook variables, or any function pointers?
> I can see your point in the above case, but for example here

> -   if ((*tinfo->f_lt) (o.upper, c.upper, flinfo))
> +   if (tinfo->f_lt(o.upper, c.upper, flinfo))

> I think there is no loss of clarity and the extra punctuation makes it
> more complicated to read.

At one time there were C compilers that only accepted the former syntax.
But we have already occurrences of the latter in our tree, and no one
has complained, so I think that's a dead issue by now.

I do agree with the idea that we should use the * notation in cases where
the reader might otherwise think that a plain function was being invoked,
ie I don't like

some_function_pointer(args);

Even if the compiler isn't confused, readers might be.  But in the case of

structname->pointerfield(args);

it's impossible to read that as a plain function call, so I'm okay with
dropping the extra punctuation there.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] assorted code cleanup

2017-09-05 Thread Peter Eisentraut
On 8/21/17 01:11, Michael Paquier wrote:
>> - Drop excessive dereferencing of function pointers
> 
> -   (*next_ProcessUtility_hook) (pstmt, queryString,
> +   next_ProcessUtility_hook(pstmt, queryString,
>  context, params, queryEnv,
>  dest, completionTag);
> But this... Personally I like the current grammar which allows one to
> make the difference between a function call with something declared
> locally and something that may be going to a custom code path. So I
> think that you had better not update the system hooks that external
> modules can use via shared_preload_libraries.

Do you mean specifically the hook variables, or any function pointers?
I can see your point in the above case, but for example here

-   if ((*tinfo->f_lt) (o.upper, c.upper, flinfo))
+   if (tinfo->f_lt(o.upper, c.upper, flinfo))

I think there is no loss of clarity and the extra punctuation makes it
more complicated to read.

>> - Remove unnecessary parentheses in return statements
> 
> So you would still keep parenthesis like here for simple expressions:
> contrib/bloom/blutils.c:return (x - 1);
> No objections.
> 
> Here are some more:
> contrib/intarray/_int_bool.c:   return (calcnot) ?
> contrib/ltree/ltxtquery_op.c:   return (calcnot) ?
> 
> And there are many "(0)" "S_ANYTHING" in src/interfaces/ecpg/test/ and
> src/interfaces/ecpg/preproc/.

Thanks, I included these.

>> - Remove our own definition of NULL
> 
> Fine. c.h uses once NULL before enforcing its definition.

Actually, that would have worked fine, because the earlier use is a
macro definition, so NULL would not have been needed until it is used.

>> - fuzzystrmatch: Remove dead code
> 
> Those are remnants of a323ede, which missed to removed everything.

Good reference, makes sense.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] assorted code cleanup

2017-09-05 Thread Peter Eisentraut
On 8/29/17 03:32, Ryan Murphy wrote:
> The following review has been posted through the commitfest application:
> make installcheck-world:  tested, passed
> Implements feature:   tested, passed
> Spec compliant:   tested, passed
> Documentation:tested, passed
> 
> I've reviewed the code changes, and it's pretty clear to me that they clean 
> things up a bit while not changing any behavior.  They simplify things in a 
> way that make the code more comprehensible.  I've run all the tests and they 
> behave the same way as they did before the patch.  I also trying manually 
> playing around the the function in question, `metaphone`, and it seems to 
> behave the same as before.
> 
> I think it's ready to commit!
> 
> The new status of this patch is: Ready for Committer

Pushed, except the one with the function pointers, which some people
didn't like.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] psql - add special variable to reflect the last query status

2017-09-05 Thread Fabien COELHO



* It might be better if SQLSTATE and ERROR_MESSAGE were left
unchanged by a non-error query.



Hmmm. I'm not sure. If so, ERROR_MESSAGE should be LAST_ERROR_MESSAGE
maybe? Then what about LAST_ERROR_SQLSTATE to go with it, and let SQLSTATE
& ERROR_MESSAGE reflect the last command, and ERROR is just the boolean to
test if it occured?



That would reduce the need to copy them into other variables just
because you needed to do something else before printing them.  It'd save
a few cycles too.



Well, my suggestion would mean that they would be copied when an error
occurs, but only when it occurs, which would not be often.


Uh ... what?


Sorry if my sentence was not very clear. Time to go do bed:-)

I just mean that a LAST_ERROR_* would be set when an error occurs. When 
there is no error, it is expected to remain the same, and it does not cost 
anything to let it as is. If an error occured then you had a problem, a 
transaction aborted, paying to set a few variables when it occurs does not 
look like a big performance issue. Script usually expect to run without 
error, errors are rare events.


--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] A note about debugging TAP failures

2017-09-05 Thread Daniel Gustafsson
> On 05 Sep 2017, at 18:37, Peter Eisentraut  
> wrote:
> 
> On 4/25/17 11:00, Daniel Gustafsson wrote:
>> Makes sense, assuming a “clean slate” to run on seems a reasonable assumption
>> for the test and it makes for simpler code in PostgresNode.  Updated the 
>> patch
>> which now have static datadir names; retains on PG_TESTS_NOCLEAN or any kind 
>> of
>> test failure; and allows for cleaning datadirs without having to use make 
>> clean
>> (seems the wrong size hammer for that nail).
> 
> Committed.  I had to make some changes in the pg_rewind tests.  Those
> make data directories with conflicting names, which did not work anymore
> because the names are no longer random.

Makes sense, +1 on those changes.

> Other than that this is pretty nice.  Thanks!

Thanks for the commit!

cheers ./daniel

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [COMMITTERS] pgsql: Add psql variables showing server version and psql version.

2017-09-05 Thread Fabien COELHO



Huh?  The variable will be set in any case.  It should be correct for any
server version we might find in the wild --- so far as I can tell from the
commit history, every version supporting FE/BE protocol version 3 has sent
server_version at connection start.  With a pre-7.4 server, it looks like
the variables would be set to "0.0.0" and "0" respectively.


Scratch that: experimentation says it works fine with older servers too.
The oldest one I have in captivity reports


Ok, be it means a recent psql connecting to an old server. It does not 
work with an old psql.



SERVER_VERSION_NAME = '7.0.3'
SERVER_VERSION_NUM = '70003'

Looking more closely, I see that when using protocol version 2, libpq
will issue a "select version()" command at connection start to get
this info.


Then it could be used for free to set SERVER_VERSION if it can be 
extracted from libpq somehow?!


--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [COMMITTERS] pgsql: Add psql variables showing server version and psql version.

2017-09-05 Thread Fabien COELHO


Hello Simon,


Does raise the further question of how psql behaves when we connect to
a pre-10 server, so we have SERVER_VERSION_NUM but yet it is not set.
How does this
\if SERVER_VERSION_NUM < x


The if does not have expressions (yet), it just handles TRUE/ON/1 and 
FALSE/0/OFF.



Do we need some macro or suggested special handling?


If "SERVER_VERSION_NUM" is available in 10, then:

  -- exit if version < 10 (\if is ignored and \q is executed)
  \if false \echo "prior 10" \q \endif

  -- then test version through a server side expression, will work
  SELECT :SERVER_VERSION_NUM < 11 AS prior_11 \gset
  \if :prior_11
-- version 10
  \else
-- version 11 or more
  \endif

--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] JIT compiling expressions/deform + inlining prototype v2.0

2017-09-05 Thread Andres Freund
On 2017-09-05 19:43:33 +0100, Greg Stark wrote:
> On 5 September 2017 at 11:58, Konstantin Knizhnik
>  wrote:
> >
> > I wonder if we can perform some optimization in this case (assuming that in
> > typical cases column either contains mostly non-null values, either mostly
> > null values).
> 
> If you really wanted to go crazy here you could do lookup tables of
> bits of null bitmaps. Ie, you look at the first byte of the null
> bitmap, index into an array and it points to 8 offsets for the 8
> fields covered by that much of the bitmap. The lookup table might be
> kind of large since offsets are 16-bits so you're talking 256 * 16
> bytes or 2kB for every 8 columns up until the first variable size
> column (or I suppose you could even continue in the case where the
> variable size column is null).

I'm missing something here. What's this saving?  The code for lookups
with NULLs after jitting effectively is
a) one load for every 8 columns (could be optimized to one load every
   sizeof(void*) cols)
b) one bitmask for every column + one branch for null
c) load for the datum, indexed by register
d) saving the column value, that's independent of NULLness
e) one addi adding the length to the offset

Greetings,

Andres Freund


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Fix performance of generic atomics

2017-09-05 Thread Tom Lane
Jesper Pedersen  writes:
> On 09/05/2017 02:24 PM, Tom Lane wrote:
>> Hm, so if we can't demonstrate a performance win, it's hard to justify
>> risking touching this code.  What test case(s) did you use?

> I ran pgbench (-M prepared) with synchronous_commit 'on' and 'off' using 
> both logged and unlogged tables. Also ran an internal benchmark which 
> didn't show anything either.

That may just mean that pgbench isn't stressing any atomic ops very
hard (at least in the default scenario).

I'm tempted to write a little C function that just hits the relevant
atomic ops in a tight loop, and see how long it takes to do a few
million iterations.  That would be erring in the opposite direction,
of overstating the importance of atomic ops to real-world scenarios
--- but if we didn't get any win that way, then it's surely in the noise.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] JIT compiling expressions/deform + inlining prototype v2.0

2017-09-05 Thread Greg Stark
On 5 September 2017 at 11:58, Konstantin Knizhnik
 wrote:
>
> I wonder if we can perform some optimization in this case (assuming that in
> typical cases column either contains mostly non-null values, either mostly
> null values).

If you really wanted to go crazy here you could do lookup tables of
bits of null bitmaps. Ie, you look at the first byte of the null
bitmap, index into an array and it points to 8 offsets for the 8
fields covered by that much of the bitmap. The lookup table might be
kind of large since offsets are 16-bits so you're talking 256 * 16
bytes or 2kB for every 8 columns up until the first variable size
column (or I suppose you could even continue in the case where the
variable size column is null).

-- 
greg


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Fix performance of generic atomics

2017-09-05 Thread Jesper Pedersen

On 09/05/2017 02:24 PM, Tom Lane wrote:

Jesper Pedersen  writes:

I have tested this patch on a 2-socket machine, but don't see any
performance change in the various runs. However, there is no regression
either in all cases.


Hm, so if we can't demonstrate a performance win, it's hard to justify
risking touching this code.  What test case(s) did you use?



I ran pgbench (-M prepared) with synchronous_commit 'on' and 'off' using 
both logged and unlogged tables. Also ran an internal benchmark which 
didn't show anything either.


Setting the entry back to "Needs Review" for additional feedback from 
others.


Best regards,
 Jesper


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Fix performance of generic atomics

2017-09-05 Thread Tom Lane
Jesper Pedersen  writes:
> I have tested this patch on a 2-socket machine, but don't see any 
> performance change in the various runs. However, there is no regression 
> either in all cases.

Hm, so if we can't demonstrate a performance win, it's hard to justify
risking touching this code.  What test case(s) did you use?

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [COMMITTERS] pgsql: Add psql variables showing server version and psql version.

2017-09-05 Thread Tom Lane
I wrote:
> Huh?  The variable will be set in any case.  It should be correct for any
> server version we might find in the wild --- so far as I can tell from the
> commit history, every version supporting FE/BE protocol version 3 has sent
> server_version at connection start.  With a pre-7.4 server, it looks like
> the variables would be set to "0.0.0" and "0" respectively.

Scratch that: experimentation says it works fine with older servers too.
The oldest one I have in captivity reports

SERVER_VERSION_NAME = '7.0.3'
SERVER_VERSION_NUM = '70003'

Looking more closely, I see that when using protocol version 2, libpq
will issue a "select version()" command at connection start to get
this info.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Fix performance of generic atomics

2017-09-05 Thread Jesper Pedersen

Hi,

On 05/25/2017 11:12 AM, Sokolov Yura wrote:

I agree that lonely semicolon looks bad.
Applied your suggestion for empty loop body (/* skip */).

Patch in first letter had while(true), but I removed it cause
I think it is uglier:
- `while(true)` was necessary for grouping read with `if`,
- but now there is single statement in a loop body and it is
   condition for loop exit, so it is clearly just a loop.

Optimization is valid cause compare_exchange always store old value
in `old` variable in a same atomic manner as atomic read.



I have tested this patch on a 2-socket machine, but don't see any 
performance change in the various runs. However, there is no regression 
either in all cases.


As such, I have marked the entry "Ready for Committer".

Remember to add a version postfix to your patches such that is easy to 
identify which is the latest version.


Best regards,
 Jesper



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] psql - add special variable to reflect the last query status

2017-09-05 Thread Tom Lane
Fabien COELHO  writes:
>> * It might be better if SQLSTATE and ERROR_MESSAGE were left
>> unchanged by a non-error query.

> Hmmm. I'm not sure. If so, ERROR_MESSAGE should be LAST_ERROR_MESSAGE 
> maybe? Then what about LAST_ERROR_SQLSTATE to go with it, and let SQLSTATE 
> & ERROR_MESSAGE reflect the last command, and ERROR is just the boolean to 
> test if it occured?

>> That would reduce the need to copy them into other variables just 
>> because you needed to do something else before printing them.  It'd save 
>> a few cycles too.

> Well, my suggestion would mean that they would be copied when an error 
> occurs, but only when it occurs, which would not be often.

Uh ... what?

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] psql - add special variable to reflect the last query status

2017-09-05 Thread Fabien COELHO


Hello Tom,


* I think the ERROR_CODE variable should instead be named SQLSTATE.
That is what the SQL standard calls this string, and it's also what
just about all our documentation calls it; see PG_DIAG_SQLSTATE
in libpq, or the SQLSTATE 'x' construct in pl/pgsql, or the
sqlstate attribute of an exception object in plpython, etc etc.


I choose ERROR_CODE because it matched the ERROR boolean. But is may be a 
misnomer if the status is that all is well. I'm okay with SQLSTATE.


* I'm not exactly convinced that there's a use-case for STATUS that's 
not covered as well or better by ERROR. Client code that looks at 
PQresStatus for anything beyond error/not-error is usually doing that 
because it's library code that doesn't know what kind of query it's 
working on.  It seems like a stretch that a psql script would not know 
that.  Also, PQresultStatus memorializes some legacy distinctions, like 
"fatal" vs "nonfatal" error, that I think we'd be better off not 
exposing in psql scripting.


Ok.


* It might be better if SQLSTATE and ERROR_MESSAGE were left
unchanged by a non-error query.


Hmmm. I'm not sure. If so, ERROR_MESSAGE should be LAST_ERROR_MESSAGE 
maybe? Then what about LAST_ERROR_SQLSTATE to go with it, and let SQLSTATE 
& ERROR_MESSAGE reflect the last command, and ERROR is just the boolean to 
test if it occured?


That would reduce the need to copy them into other variables just 
because you needed to do something else before printing them.  It'd save 
a few cycles too.


Well, my suggestion would mean that they would be copied when an error 
occurs, but only when it occurs, which would not be often.


If you would like them, I'm not sure how these variable should be 
initialized. Undefined? Empty?



* Speaking of which, has anyone tried to quantify the performance
impact of this patch?  It might well be negligible, but I do not
think we should assume that without evidence.


I think it should be negligible compared to network connections, aborting 
an ongoing transaction, reading the script...


But I do not know libpq internals so I may be quite naive.


* I wonder why you didn't merge this processing into ProcessResult,
instead of inventing an extra function (whose call site seems rather
poorly chosen anyhow --- what's the justification for not counting this
overhead as part of the query runtime?).  You could probably eliminate
the refactoring you did, since it wouldn't be necessary to recompute
AcceptResult's result that way.


Hmmm. I assume that you are unhappy about ResultIsSuccess.

The refactoring is because the function is used twice. I choose to do that 
because the functionality is clear and could be made as a function which 
improved readability. Ok, PQresultStatus is thus called twice, I assumed 
that it is just reading a field in a struct... it could be returned to the 
caller with an additional pointer to avoid that.


The SendResult & ProcessResult functions are already quite heavy to my 
taste, I did not want to add significantly to that.


The ProcessResult switch does not test all states cleanly, it is really 
about checking about copy, and not so clear, so I do not think that it 
would mix well to add the variable stuff in the middle of that.


SendQuery is also pretty complex, including gotos everywhere.

So I did want to add to these two functions beyond the minimum. Now, I can 
also inline everything coldly in ProcessResult, no problem.


--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [COMMITTERS] pgsql: Add psql variables showing server version and psql version.

2017-09-05 Thread Tom Lane
Simon Riggs  writes:
> Does raise the further question of how psql behaves when we connect to
> a pre-10 server, so we have SERVER_VERSION_NUM but yet it is not set.

Huh?  The variable will be set in any case.  It should be correct for any
server version we might find in the wild --- so far as I can tell from the
commit history, every version supporting FE/BE protocol version 3 has sent
server_version at connection start.  With a pre-7.4 server, it looks like
the variables would be set to "0.0.0" and "0" respectively.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] PoC plpgsql - possibility to force custom or generic plan

2017-09-05 Thread Pavel Stehule
2017-09-05 19:38 GMT+02:00 Tom Lane :

> Pavel Stehule  writes:
> > 2. what syntax we should to use (if we accept this feature)? There was
> not
> > another proposal if I remember well - The PRAGMA syntax is strong because
> > we can very well specify to range where the plans caching will be
> > explicitly controlled. It is well readable and static.
>
> The complaint I have about PRAGMA is that it's yet another syntax for
> accomplishing pretty much the same thing.  If you don't like the GUC
> solution, we've already got the "comp_option" syntax for static options
> in plpgsql.  Sure, that's not too pretty, but that's not a good reason
> to invent yet another way to do it.
>

comp_option has only function scope, what is too limited for this purpose.

I don't prefer GUC for this purpose because you need to do SET/RESET on two
places. With GUC the code can looks like:

PERFORM set_config('cachexx', 'off')
FOR r IN SELECT ...
LOOP
  PERFORM set_config(' cachexx', 'on')
  
  PERFORM set_config('cachexx', 'off')
END LOOP;
PERFORM set_config('cachexx', 'on');


The another reason for inventing PRAGMA syntax to PLpgSQL was support for
autonomous transaction and I am thinking so is good idea using same syntax
like PL/SQL does.


>
> regards, tom lane
>


Re: [HACKERS] [COMMITTERS] pgsql: Add psql variables showing server version and psql version.

2017-09-05 Thread Simon Riggs
On 5 September 2017 at 09:58, Tom Lane  wrote:
> Robert Haas  writes:
>> On Tue, Sep 5, 2017 at 12:16 PM, Tom Lane  wrote:
>>> Hm.  I think it would be a fine idea to push this change into v10,
>>> so that all psql versions that have \if would have these variables.
>>> However, I'm less enthused about adding them to the 9.x branches.
>>> Given the lack of \if, I'm not seeing a use-case that would justify
>>> taking any compatibility risk for.
>>>
>>> Opinions anyone?
>
>> As I see it, the risks of back-patching are:
>
>> 1. Different minor versions will have different behavior, and that
>> tends to create more problems than it solves.
>
> Yeah.  Whatever use-case you think might exist for these variables in,
> say, 9.6 is greatly weakened by the fact that releases through 9.6.5
> don't have them.

Makes sense

> However, since v10 is still in beta I don't think that argument
> applies to it.

OK


Does raise the further question of how psql behaves when we connect to
a pre-10 server, so we have SERVER_VERSION_NUM but yet it is not set.
How does this
\if SERVER_VERSION_NUM < x
behave if unset? Presumably it fails, even though the version *is* less than x
Do we need some macro or suggested special handling?

-- 
Simon Riggshttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [Proposal] Allow users to specify multiple tables in VACUUM commands

2017-09-05 Thread Bossart, Nathan
On 9/4/17, 8:16 PM, "Michael Paquier"  wrote:
> So vacuum_multiple_tables_v14.patch is good for a committer in my
> opinion. With this patch, if the same relation is specified multiple
> times, then it gets vacuum'ed that many times. Using the same column
> multi-times results in an error as on HEAD, but that's not a new
> problem with this patch.

Thanks!

> So I would tend to think that the same column specified multiple times
> should cause an error, and that we could let VACUUM run work N times
> on a relation if it is specified this much. This feels more natural,
> at least to me, and it keeps the code simple.

I think that is a reasonable approach.  Another option I was thinking
about was to de-duplicate only the individual column lists.  This
alternative approach might be a bit more user-friendly, but I am
beginning to agree with you that perhaps we should not try to infer
the intent of the user in these "duplicate" scenarios.

I'll work on converting the existing de-duplication patch into
something more like what you suggested.

Nathan


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] PoC plpgsql - possibility to force custom or generic plan

2017-09-05 Thread Tom Lane
Pavel Stehule  writes:
> 2. what syntax we should to use (if we accept this feature)? There was not
> another proposal if I remember well - The PRAGMA syntax is strong because
> we can very well specify to range where the plans caching will be
> explicitly controlled. It is well readable and static.

The complaint I have about PRAGMA is that it's yet another syntax for
accomplishing pretty much the same thing.  If you don't like the GUC
solution, we've already got the "comp_option" syntax for static options
in plpgsql.  Sure, that's not too pretty, but that's not a good reason
to invent yet another way to do it.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [bug fix] Savepoint-related statements terminates connection

2017-09-05 Thread Tom Lane
Michael Paquier  writes:
> On Mon, Sep 4, 2017 at 11:15 PM, Tom Lane  wrote:
>> I don't want to go there, and was thinking we should expand the new
>> comment in DefineSavepoint to explain why not.

> Okay.

Does anyone want to do further review on this patch?  If so, I'll
set the CF entry back to "Needs Review".

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] PoC plpgsql - possibility to force custom or generic plan

2017-09-05 Thread Pavel Stehule
2017-09-05 15:01 GMT+02:00 Daniel Gustafsson :

> > On 08 Apr 2017, at 09:42, Pavel Stehule  wrote:
> >
> > 2017-04-08 2:30 GMT+02:00 Peter Eisentraut <
> peter.eisentr...@2ndquadrant.com  >>:
> > On 4/6/17 14:32, Pavel Stehule wrote:
> > > I like to see any proposals about syntax or implementation.
> > >
> > > Using PRAGMA is one variant - introduced by PLpgSQL origin - Ada
> > > language. The PRAGMA syntax can be used for PRAGMA autonomous with well
> > > known syntax. It scales well  - it supports function, block or command
> > > level.
> >
> > I had pragmas implemented in the original autonomous transactions patch
> > (https://www.postgresql.org/message-id/659a2fce-b6ee-06de-
> 05c0-c8ed6a019...@2ndquadrant.com  message-id/659a2fce-b6ee-06de-05c0-c8ed6a019...@2ndquadrant.com>).
> >  However, the difference there is that the behavior is lexical, specific
> > to plpgsql, whereas here you are really just selecting run time
> > behavior.  So a GUC, and also something that could apply to other
> > places, should be considered.
> >
> > I'll look there - we coordinate work on that.
>
> This patch was moved to the now started commitfest, and is marked as “Needs
> review”.  Based on this thread I will however change it to "waiting for
> author”,
> since there seems to be some open questions.  Has there been any new work
> done
> on this towards a new design/patch?
>

I didn't any work on this patch last months. I hope so this week I reread
this thread and I'll check what I do.

There are few but important questions:

1. we want this feature? I hope so we want - because I don't believe to
user invisible great solution - and this is simple solution that helps with
readability of some complex PL procedures.

2. what syntax we should to use (if we accept this feature)? There was not
another proposal if I remember well - The PRAGMA syntax is strong because
we can very well specify to range where the plans caching will be
explicitly controlled. It is well readable and static.

Regards

Pavel

>
> cheers ./daniel


Re: [HACKERS] [Proposal] Allow users to specify multiple tables in VACUUM commands

2017-09-05 Thread Tom Lane
"Bossart, Nathan"  writes:
> On 9/4/17, 10:32 PM, "Simon Riggs"  wrote:
>> If we want to keep the code simple we must surely consider whether the
>> patch has any utility.

> ... I'd argue that this feels like a natural extension of the
> VACUUM command, one that I, like others much earlier in this thread,
> was surprised to learn wasn't supported.

Yeah.  To me, one big argument for allowing multiple target tables is that
we allow it for other common utility commands such as TRUNCATE or LOCK
TABLE.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] postgres_fdw bug in 9.6

2017-09-05 Thread Ryan Murphy
> I tested that with HEAD, but couldn't reproduce this problem.  Didn't you
test that with HEAD?

Yeah, I know it's not an issue other people are having - I think it's
something specific about how my environment / postgres build is set up.  In
any case I knew that it wasn't caused by your patch.

Thanks,
Ryan


Re: [HACKERS] [Proposal] Allow users to specify multiple tables in VACUUM commands

2017-09-05 Thread Bossart, Nathan
On 9/4/17, 10:32 PM, "Simon Riggs"  wrote:
> ISTM there is no difference between
>   VACUUM a, b
> and
>   VACUUM a; VACUUM b;
>
> If we want to keep the code simple we must surely consider whether the
> patch has any utility.

Yes, this is true, but I think the convenience factor is a bit
understated with that example.  For example, if you need to manually
cleanup several tables for XID purposes,
VACUUM FREEZE VERBOSE table1;
VACUUM FREEZE VERBOSE table2;
VACUUM FREEZE VERBOSE table3;
VACUUM FREEZE VERBOSE table4;
VACUUM FREEZE VERBOSE table5;
becomes
VACUUM FREEZE VERBOSE table1, table2, table3, table4, table5;

I would consider even this to be a relatively modest example compared
to the sorts of things users might do.

In addition, I'd argue that this feels like a natural extension of the
VACUUM command, one that I, like others much earlier in this thread,
was surprised to learn wasn't supported.

Nathan


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [COMMITTERS] pgsql: Add psql variables showing server version and psql version.

2017-09-05 Thread Fabien COELHO


This is good. Please can we backpatch these are far as they will go 
(easily)? There is very little risk in doing so and significant 
benefits in being able to rely on scripts that know about versions.


Hm.  I think it would be a fine idea to push this change into v10,
so that all psql versions that have \if would have these variables.
However, I'm less enthused about adding them to the 9.x branches.
Given the lack of \if, I'm not seeing a use-case that would justify
taking any compatibility risk for.

Opinions anyone?


I think that it is harmless and useful for v10, and mostly useless for 
previous versions.


ISTM that the hack looking like:

  \if false \echo BAD VERSION \q \endif

Allow to test a prior 10 version and stop. However testing whether a 
variable is defined is not included yet, so differenciating between 10 and 
11 would not be easy... thus having 10 => version available would be 
significantly helpful for scripting.


--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [COMMITTERS] pgsql: Add psql variables showing server version and psql version.

2017-09-05 Thread Tom Lane
Robert Haas  writes:
> On Tue, Sep 5, 2017 at 12:16 PM, Tom Lane  wrote:
>> Hm.  I think it would be a fine idea to push this change into v10,
>> so that all psql versions that have \if would have these variables.
>> However, I'm less enthused about adding them to the 9.x branches.
>> Given the lack of \if, I'm not seeing a use-case that would justify
>> taking any compatibility risk for.
>> 
>> Opinions anyone?

> As I see it, the risks of back-patching are:

> 1. Different minor versions will have different behavior, and that
> tends to create more problems than it solves.

Yeah.  Whatever use-case you think might exist for these variables in,
say, 9.6 is greatly weakened by the fact that releases through 9.6.5
don't have them.

However, since v10 is still in beta I don't think that argument
applies to it.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] psql - add special variable to reflect the last query status

2017-09-05 Thread Tom Lane
A few thoughts about this patch:

* I think the ERROR_CODE variable should instead be named SQLSTATE.
That is what the SQL standard calls this string, and it's also what
just about all our documentation calls it; see PG_DIAG_SQLSTATE
in libpq, or the SQLSTATE 'x' construct in pl/pgsql, or the
sqlstate attribute of an exception object in plpython, etc etc.

* I'm not exactly convinced that there's a use-case for STATUS
that's not covered as well or better by ERROR.  Client code that
looks at PQresStatus for anything beyond error/not-error is
usually doing that because it's library code that doesn't know
what kind of query it's working on.  It seems like a stretch that
a psql script would not know that.  Also, PQresultStatus memorializes
some legacy distinctions, like "fatal" vs "nonfatal" error, that
I think we'd be better off not exposing in psql scripting.

* It might be better if SQLSTATE and ERROR_MESSAGE were left
unchanged by a non-error query.  That would reduce the need to
copy them into other variables just because you needed to do
something else before printing them.  It'd save a few cycles too.

* Speaking of which, has anyone tried to quantify the performance
impact of this patch?  It might well be negligible, but I do not
think we should assume that without evidence.

* I wonder why you didn't merge this processing into ProcessResult,
instead of inventing an extra function (whose call site seems rather
poorly chosen anyhow --- what's the justification for not counting this
overhead as part of the query runtime?).  You could probably eliminate
the refactoring you did, since it wouldn't be necessary to recompute
AcceptResult's result that way.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] JIT compiling expressions/deform + inlining prototype v2.0

2017-09-05 Thread Andres Freund
On 2017-09-05 13:58:56 +0300, Konstantin Knizhnik wrote:
> 
> 
> On 04.09.2017 23:52, Andres Freund wrote:
> > 
> > Yea, I've changed that already, although it's currently added earlier,
> > because the alignment is needed before, to access the column correctly.
> > I've also made number of efficiency improvements, primarily to access
> > columns with an absolute offset if all preceding ones are fixed width
> > not null columns - that is quite noticeable performancewise.
> 
> Unfortunately, in most of real table columns are nullable.

I'm not sure I agree with that assertion, but:


> I wonder if we can perform some optimization in this case (assuming that in
> typical cases column either contains mostly non-null values, either mostly
> null values).

Even if all columns are NULLABLE, the JITed code is still a good chunk
faster (a significant part of that is the slot->tts_{nulls,values}
accesses). Alignment is still cheaper with constants, and often enough
the alignment can be avoided (consider e.g. a table full of nullable
ints - everything is guaranteed to be aligned, or columns after an
individual NOT NULL column is also guaranteed to be aligned). What
largely changes is that the 'offset' from the start of the tuple has to
be tracked.

Greetings,

Andres Freund


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] A note about debugging TAP failures

2017-09-05 Thread Peter Eisentraut
On 4/25/17 11:00, Daniel Gustafsson wrote:
> Makes sense, assuming a “clean slate” to run on seems a reasonable assumption
> for the test and it makes for simpler code in PostgresNode.  Updated the patch
> which now have static datadir names; retains on PG_TESTS_NOCLEAN or any kind 
> of
> test failure; and allows for cleaning datadirs without having to use make 
> clean
> (seems the wrong size hammer for that nail).

Committed.  I had to make some changes in the pg_rewind tests.  Those
make data directories with conflicting names, which did not work anymore
because the names are no longer random.  Other than that this is pretty
nice.  Thanks!

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [COMMITTERS] pgsql: Add psql variables showing server version and psql version.

2017-09-05 Thread Robert Haas
On Tue, Sep 5, 2017 at 12:16 PM, Tom Lane  wrote:
> Hm.  I think it would be a fine idea to push this change into v10,
> so that all psql versions that have \if would have these variables.
> However, I'm less enthused about adding them to the 9.x branches.
> Given the lack of \if, I'm not seeing a use-case that would justify
> taking any compatibility risk for.
>
> Opinions anyone?

As I see it, the risks of back-patching are:

1. Different minor versions will have different behavior, and that
tends to create more problems than it solves.

2. There's a small probability that somebody's script could be relying
on these variables to be initially unset.

I think back-patching these changes into stable branches is a slippery
slope that we'd be best advised not to go down.  I don't feel as
strongly about v10.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [COMMITTERS] pgsql: Add psql variables showing server version and psql version.

2017-09-05 Thread Tom Lane
[ redirecting to -hackers for wider comment ]

Simon Riggs  writes:
> On 5 September 2017 at 07:51, Tom Lane  wrote:
>> Add psql variables showing server version and psql version.
>> 
>> We already had a psql variable VERSION that shows the verbose form of
>> psql's own version.  Add VERSION_NAME to show the short form (e.g.,
>> "11devel") and VERSION_NUM to show the numeric form (e.g., 11).
>> Also add SERVER_VERSION_NAME and SERVER_VERSION_NUM to show the short and
>> numeric forms of the server's version.  (We'd probably add SERVER_VERSION
>> with the verbose string if it were readily available; but adding another
>> network round trip to get it seems too expensive.)
>> 
>> The numeric forms, in particular, are expected to be useful for scripting
>> purposes, now that psql can do conditional tests.

> This is good.
> Please can we backpatch these are far as they will go (easily)?
> There is very little risk in doing so and significant benefits in
> being able to rely on scripts that know about versions.

Hm.  I think it would be a fine idea to push this change into v10,
so that all psql versions that have \if would have these variables.
However, I'm less enthused about adding them to the 9.x branches.
Given the lack of \if, I'm not seeing a use-case that would justify
taking any compatibility risk for.

Opinions anyone?

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] Assert that the correct locks are held when calling PageGetLSN()

2017-09-05 Thread Jacob Champion
Hi Michael, thanks for the review!

On Sun, Sep 3, 2017 at 11:14 PM, Michael Paquier
 wrote:
>> While working on checksum support for GPDB, we noticed that several
>> callers of PageGetLSN() didn't follow the correct locking procedure.
>
> Well, PageGetLSN can be used in some hot code paths, xloginsert.c
> being one, so it does not seem wise to me to switch it to something
> more complicated than a macro,

If raw perf is an issue -- now that I've refactored the assertion into
its own function, I could probably push the implementation back into a
macro. Something like

#define PageGetLSN(page) (AssertPageIsLockedForLSN((Page) page),
PageXLogRecPtrGet(((PageHeader) page)->pd_lsn))

Callers would need to watch out for the double-evaluation, but that's
already documented with this group of macros.

> and also it is not bounded to any
> locking contracts now. For example, pageinspect works on a copy of the
> buffer, so that's one case where we just don't care. So we should it
> be tightened in Postgres?

The proposed patch doesn't tighten the contract in this case -- if the
buffer that's passed to PageGetLSN() isn't a shared buffer (i.e. if
it's not part of the BufferBlocks array), the assertion passes
unconditionally.

> That's the portion of the proposal I don't
> get. You could introduce a custom API for this purpose, isn't the
> origin of your problems with GPDB for checksums that data is
> replicated across many nodes so things need to be locked uniquely?

GPDB introduces additional problems, but I think this issue is
independent of anything GPDB-specific. One way to look at it: are the
changes in the proposed patch, from PageGetLSN to BufferGetLSNAtomic,
correct? If not, well, that's a good discussion to have. But if they
are correct, then why were they missed? and is there a way to help
future contributors out in the future? That's the goal of this patch;
I don't think it's something that a custom API solves.

>> - This change introduces a subtle source incompatibility: whereas
>> previously both Pages and PageHeaders could be passed to PageGetLSN(),
>> now callers must explicitly cast to Page if they don't already have
>> one.
>
> That would produce warnings for extensions, so people would likely
> catch that when compiling with a newer version, if they use -Werror...
>
>> - Is my use of FRONTEND here correct? The documentation made it seem
>> like some compilers could try to link against the
>> AssertPageIsLockedForLSN function, even if PageGetLSN was never
>> called.
>
> +#if defined(USE_ASSERT_CHECKING) && !defined(FRONTEND)
> +void
> +AssertPageIsLockedForLSN(Page page)
> +{
> From a design point of view, bufmgr.c is an API interface for buffer
> management on the backend-size, so just using FRONTEND there looks
> wrong to me (bufmgr.h is already bad enough IMO now).

The comments suggested that bufmgr.h could be incidentally pulled into
frontend code through other headers. Or do you mean that I don't need
to check for FRONTEND in the C source file (since, I assume, it's only
ever being compiled for the backend)? I'm not sure what you mean by
"bufmgr.h is already bad enough".

>> - Use of PageGetLSN() in heapam.c seems to be incorrect, but I don't
>> really know what the best way is to fix it. It explicitly unlocks the
>> buffer, with the comment that visibilitymap_set() needs to handle
>> locking itself, but then calls PageGetLSN() to determine whether
>> visibilitymap_set() needs to be called.
>
> This bit in src/backend/access/transam/README may interest you:
> Note that we must only use PageSetLSN/PageGetLSN() when we know the action
> is serialised. Only Startup process may modify data blocks during recovery,
> so Startup process may execute PageGetLSN() without fear of serialisation
> problems. All other processes must only call PageSet/GetLSN when holding
> either an exclusive buffer lock or a shared lock plus buffer header lock,
> or be writing the data block directly rather than through shared buffers
> while holding AccessExclusiveLock on the relation.
>
> So I see no problem with the exiting caller.

Is heap_xlog_visible only called during startup? If so, is there a
general rule for which locks are required during startup and which
aren't?

Currently there is no exception in the assertion made for startup
processes, so that's something that would need to be changed. Is there
a way to determine whether the current process considers itself to be
a startup process?

Thanks again!
--Jacob


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] proposal: psql: check env variable PSQL_PAGER

2017-09-05 Thread Pavel Stehule
2017-09-05 18:06 GMT+02:00 Tom Lane :

> Pavel Stehule  writes:
> > [ psql-psql-pager-env-2.patch ]
>
> Pushed, with some fooling with the documentation (notably,
> re-alphabetizing relevant lists).
>
> Grepping turned up an additional place that's examining the PAGER
> environment variable, namely PQprint() in libpq/fe-print.c.  That's
> not used by psql (or anything else in core PG) anymore; we're only
> keeping it around for hypothetical external users of libpq.  I debated
> whether to make it honor PSQL_PAGER.  I decided not to, since those
> external users are by definition not psql ... but I added
> a comment about that.
>

Thank you very much

Pavel


>
> regards, tom lane
>


Re: [HACKERS] proposal: psql: check env variable PSQL_PAGER

2017-09-05 Thread Tom Lane
Pavel Stehule  writes:
> [ psql-psql-pager-env-2.patch ]

Pushed, with some fooling with the documentation (notably,
re-alphabetizing relevant lists).

Grepping turned up an additional place that's examining the PAGER
environment variable, namely PQprint() in libpq/fe-print.c.  That's
not used by psql (or anything else in core PG) anymore; we're only
keeping it around for hypothetical external users of libpq.  I debated
whether to make it honor PSQL_PAGER.  I decided not to, since those
external users are by definition not psql ... but I added
a comment about that.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup throttling doesn't throttle as promised

2017-09-05 Thread Alvaro Herrera
Alvaro Herrera wrote:
> Jeff Janes wrote:
> 
> > I'm attaching a patch for each option.  Each one independently solves the
> > problem.  But I think we should do both.  There is no point in issuing
> > unnecessary kill system calls, and there may also be more spurious wake-ups
> > than just these ones.
> 
> I modified patch 1 a bit -- result attached.  I would like to back-patch
> this all the way back to 9.4, but there are a few conflicts due to
> c29aff959dc so I think I'm going to do pg10 only unless I get some votes
> that it should go further back.  I think it should be fairly trivial to
> resolve.

Pushed patch 1 to master and pg10.

Please add the other one to commitfest.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal for changes to recovery.conf API

2017-09-05 Thread Simon Riggs
On 5 September 2017 at 06:47, Daniel Gustafsson  wrote:
>> On 27 Mar 2017, at 10:27, Simon Riggs  wrote:
>>
>> On 7 March 2017 at 23:31, Josh Berkus  wrote:
>>> On 03/02/2017 07:13 AM, David Steele wrote:
 Hi Simon,

 On 2/25/17 2:43 PM, Simon Riggs wrote:
> On 25 February 2017 at 13:58, Michael Paquier  
> wrote:
>
>> - trigger_file is removed.
>> FWIW, my only complain is about the removal of trigger_file, this is
>> useful to detect a trigger file on a different partition that PGDATA!
>> Keeping it costs also nothing..
>
> Sorry, that is just an error of implementation, not intention. I had
> it on my list to keep, at your request.
>
> New version coming up.

 Do you have an idea when the new version will be available?
>>>
>>> Please?  Having yet another PostgreSQL release go by without fixing
>>> recovery.conf would make me very sad.
>>
>> I share your pain, but there are various things about this patch that
>> make me uncomfortable. I believe we are looking for an improved design
>> not just a different design.
>>
>> I think the best time to commit such a patch is at the beginning of a
>> new cycle, so people have a chance to pick out pieces they don't like
>> and incrementally propose changes.
>>
>> So I am going to mark this MovedToNextCF, barring objections from
>> committers willing to make it happen in this release.
>
> Next CF has now become This CF, has there been any work on this highly sought
> after patch?  Would be good to get closure on this early in the v11 cycle.

I've not worked on this at all, so it isn't ready for re-review and commit.

I get the "lets do it early" thing and will try to extract a subset in October.

-- 
Simon Riggshttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Variable substitution in psql backtick expansion

2017-09-05 Thread Fabien COELHO



[ psql-version-num-8.patch ]


Pushed with some minor additional fiddling.


Ok, Thanks. I also noticed the reformating.

--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pgbench: Skipping the creating primary keys after initialization

2017-09-05 Thread Fabien COELHO



Sorry, I don't follow that. You meant I should add a newline before
pg_realloc()? That is,

+initialize_cmds =
+(char *) pg_realloc(initialize_cmds,
+sizeof(char) * n_cmds + 1);


Yes. Or maybe my terminal was doing tricks, because I had the impression 
that both argument where on the same line with many tabs in between, but 
maybe I just misinterpreted the diff file. My apology if it is the case.


--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Variable substitution in psql backtick expansion

2017-09-05 Thread Tom Lane
I wrote:
> "Daniel Verite"  writes:
>> That would look like the following, for example, with a 3-space margin
>> for the description:

>> AUTOCOMMIT
>>If set, successful SQL commands are automatically committed

> But we could do something close to that, say two-space indent for the
> variable names and four-space for the descriptions.

Done like that.  I forgot to credit you with the idea in the commit log;
sorry about that.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Variable substitution in psql backtick expansion

2017-09-05 Thread Tom Lane
Fabien COELHO  writes:
> [ psql-version-num-8.patch ]

Pushed with some minor additional fiddling.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal for changes to recovery.conf API

2017-09-05 Thread Daniel Gustafsson
> On 27 Mar 2017, at 10:27, Simon Riggs  wrote:
> 
> On 7 March 2017 at 23:31, Josh Berkus  wrote:
>> On 03/02/2017 07:13 AM, David Steele wrote:
>>> Hi Simon,
>>> 
>>> On 2/25/17 2:43 PM, Simon Riggs wrote:
 On 25 February 2017 at 13:58, Michael Paquier  
 wrote:
 
> - trigger_file is removed.
> FWIW, my only complain is about the removal of trigger_file, this is
> useful to detect a trigger file on a different partition that PGDATA!
> Keeping it costs also nothing..
 
 Sorry, that is just an error of implementation, not intention. I had
 it on my list to keep, at your request.
 
 New version coming up.
>>> 
>>> Do you have an idea when the new version will be available?
>> 
>> Please?  Having yet another PostgreSQL release go by without fixing
>> recovery.conf would make me very sad.
> 
> I share your pain, but there are various things about this patch that
> make me uncomfortable. I believe we are looking for an improved design
> not just a different design.
> 
> I think the best time to commit such a patch is at the beginning of a
> new cycle, so people have a chance to pick out pieces they don't like
> and incrementally propose changes.
> 
> So I am going to mark this MovedToNextCF, barring objections from
> committers willing to make it happen in this release.

Next CF has now become This CF, has there been any work on this highly sought
after patch?  Would be good to get closure on this early in the v11 cycle.

cheers ./daniel



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_upgrade changes can it use CREATE EXTENSION?

2017-09-05 Thread Tom Lane
Sandro Santilli  writes:
> On Wed, Aug 30, 2017 at 06:01:58PM -0400, Tom Lane wrote:
>> We intentionally *don't* do that; pg_dump goes to a lot of trouble to
>> duplicate the old extension contents exactly, instead.  There are a bunch
>> of corner cases that would fail if we allowed the new installation to
>> have different extension contents than the old.  Believe you me, we'd
>> rather have just issued CREATE EXTENSION, but it doesn't work.

> Did you mean `pg_upgrade` ("goes to a lot of trouble") ?

To be precise, pg_dump when working on behalf of pg_upgrade (that is, with
the --binary-upgrade switch).

>> Looking quickly at the thread you cite, I wonder how much of this problem
>> is caused by including version numbers in the library's .so filename.
>> Have you considered not doing that? 

> The name change is intentional, to reflect a promise we make between
> patch-level releases but not between minor-level releases. The promise
> to keep C function signatures referenced by SQL objects immutable and
> behavior compatible.

FWIW, I do not think that the library file name is a useful place to
try to enforce such a thing.  Changing the file name creates complications
for upgrade, and it doesn't actually stop you from breaking anything.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal: pg_rewind to skip config files

2017-09-05 Thread Vladimir Borodin

> 5 сент. 2017 г., в 15:48, Michael Paquier  
> написал(а):
> 
> On Tue, Sep 5, 2017 at 9:40 PM, Vladimir Borodin  wrote:
>> We do compress WALs and send them over network. Doing it via archive_command
>> in single thread is sometimes slower than new WALs are written under heavy
>> load.
> 
> Ah, yeah, true. I do use pg_receivexlog --compress for that locally
> and do a bulk copy of only the compressed WALs needed, when needed...
> So there is a guarantee that completed segments are durable locally,
> which is very useful.

It seems that option --compress appeared only in postgres 10 which is not ready 
for production yet. BTW I assume that pg_receivexlog is single-threaded too? So 
it still may be the bottleneck when 3-5 WALs per second are written.

> You should definitely avoid putting that in
> PGDATA though, the same counts for tablespaces within PGDATA for
> example.

I would love to but there might be some problems with archiving and in many 
cases the only partition with enough space to accumulate WALs is partition for 
PGDATA.

--
May the force be with you…
https://simply.name



Re: [HACKERS] Coverage improvements of src/bin/pg_basebackup and pg_receivewal --endpos

2017-09-05 Thread Peter Eisentraut
On 6/9/17 02:08, Michael Paquier wrote:
> On Wed, Jun 7, 2017 at 9:20 AM, Michael Paquier
>  wrote:
>> While it is possible to tackle some of those issues independently,
>> like pg_basebackup stuff, it seems to me that it would be helpful to
>> make the tests more deterministic by having an --endpos option for
>> pg_receivewal, similarly to what exists already in pg_recvlogical.
>>
>> Thoughts?
> 
> I have just played with that, and attached is a patch to implement the
> so-said option with a basic set of tests, increasing code coverage of
> pg_receivewal.c from 15% to 60%. I'll park that in the next CF of
> September.

Looks like a good idea.  A couple of thoughts:

- I think the tests should be written in the style of
$node->command_fails instead of just command_fails.  At least, that's
how it's done in the pg_basebackup tests.

- I think the tests will fail if libz support is not built.  Again,
pg_basebackup doesn't have tests for that.  So maybe omit that and
address it later.

- The variable exit_on_stop seems redundant with time_to_abort.  They do
the same thing, so maybe your patch could reuse it.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] Improve geometric types

2017-09-05 Thread Aleksander Alekseev
The following review has been posted through the commitfest application:
make installcheck-world:  tested, passed
Implements feature:   tested, passed
Spec compliant:   tested, passed
Documentation:tested, passed

LGTM.

The new status of this patch is: Ready for Committer

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pgbench tap tests & minor fixes.

2017-09-05 Thread Tom Lane
Fabien COELHO  writes:
>>> I run the test, figure out the number it found in the resulting
>>> error message, and update the number in the source.

>> Yeah, but then what error is all that work protecting you from?

> I'm not sure I understand your point. I agree that Perl doing the counting 
> may hide issues. Now it is more of an incremental thing, if a test is 
> added the counter is upgraded accordingly, and the local consistency can 
> be checked.

Well, IIUC the point of "tests => nnn" is to protect against the
possibility that some coding mistake caused some of the tests to not
get run.  If you change the test script and then take Perl's word for
what nnn should be, what protection have you got that you didn't introduce
such a mistake along with whatever your intended change was?  It seems
pointless to maintain the tests count that way.

> Anyway, as some tests may have to be skipped on some platforms, it seems 
> that the done_testing approach is sounder. The v12 patch uses that.

Sounds good.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal: pg_rewind to skip config files

2017-09-05 Thread Vladimir Borodin

> 5 сент. 2017 г., в 15:42, Chris Travers  написал(а):
> 
> On Tue, Sep 5, 2017 at 2:40 PM, Vladimir Borodin  > wrote:
> 
>> 5 сент. 2017 г., в 14:04, Michael Paquier > > написал(а):
>> 
>>> For example, in archive_command we put WALs for archiving from
>>> pg_xlog/pg_wal into another directory inside PGDATA and than another cron
>>> task makes real archiving. This directory ideally should be skipped by
>>> pg_rewind, but it would not be handled by proposed change.
>> 
>> I would be curious to follow the reasoning for such a two-phase
>> archiving (You basically want to push it in two places, no? But why
>> not just use pg_receivexlog then?). This is complicated to handle from
>> the point of view of availability and backup reliability + durability.
> 
> We do compress WALs and send them over network. Doing it via archive_command 
> in single thread is sometimes slower than new WALs are written under heavy 
> load.
> 
> How would this work when it comes to rewinding against a file directory? 

Very bad, of course. Sometimes we get 'could not remove file 
"/var/lib/postgresql/9.6/data/wals/000100C300C6": No such file or 
directory’ while running pg_rewind ($PGDATA/wals is a directory where 
archive_command copies WALs). That’s why I want to solve the initial problem. 
Both proposed solutions (using only needed files and skipping files through 
glob/regex) are fine for me, but not the initial patch.

--
May the force be with you…
https://simply.name



Re: [HACKERS] PoC plpgsql - possibility to force custom or generic plan

2017-09-05 Thread Daniel Gustafsson
> On 08 Apr 2017, at 09:42, Pavel Stehule  wrote:
> 
> 2017-04-08 2:30 GMT+02:00 Peter Eisentraut  >:
> On 4/6/17 14:32, Pavel Stehule wrote:
> > I like to see any proposals about syntax or implementation.
> >
> > Using PRAGMA is one variant - introduced by PLpgSQL origin - Ada
> > language. The PRAGMA syntax can be used for PRAGMA autonomous with well
> > known syntax. It scales well  - it supports function, block or command
> > level.
> 
> I had pragmas implemented in the original autonomous transactions patch
> (https://www.postgresql.org/message-id/659a2fce-b6ee-06de-05c0-c8ed6a019...@2ndquadrant.com
>  
> ).
>  However, the difference there is that the behavior is lexical, specific
> to plpgsql, whereas here you are really just selecting run time
> behavior.  So a GUC, and also something that could apply to other
> places, should be considered.
> 
> I'll look there - we coordinate work on that.

This patch was moved to the now started commitfest, and is marked as “Needs
review”.  Based on this thread I will however change it to "waiting for author”,
since there seems to be some open questions.  Has there been any new work done
on this towards a new design/patch?

cheers ./daniel

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal: pg_rewind to skip config files

2017-09-05 Thread Michael Paquier
On Tue, Sep 5, 2017 at 9:40 PM, Vladimir Borodin  wrote:
> We do compress WALs and send them over network. Doing it via archive_command
> in single thread is sometimes slower than new WALs are written under heavy
> load.

Ah, yeah, true. I do use pg_receivexlog --compress for that locally
and do a bulk copy of only the compressed WALs needed, when needed...
So there is a guarantee that completed segments are durable locally,
which is very useful. You should definitely avoid putting that in
PGDATA though, the same counts for tablespaces within PGDATA for
example.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Making clausesel.c Smarter

2017-09-05 Thread Daniel Gustafsson

> On 09 Apr 2017, at 12:14, David Rowley  wrote:
> 
> On 8 April 2017 at 09:33, Claudio Freire  wrote:
>> Otherwise, the patch LGTM, but I'd like to solve the quadratic
>> behavior too... are you going to try? Otherwise I could take a stab at
>> it myself. It doesn't seem very difficult.
> 
> I have some ideas in my head in a fairly generic way of solving this
> which I'd like to take care of in PG11.

This patch was moved to the currently open Commitfest.  Given the above
comment, is the last patch in this thread still up for review, or are you
working on a new version?  Just to avoid anyone reviewing an outdated version.

cheers ./daniel

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal: pg_rewind to skip config files

2017-09-05 Thread Chris Travers
On Tue, Sep 5, 2017 at 2:40 PM, Vladimir Borodin  wrote:

>
> 5 сент. 2017 г., в 14:04, Michael Paquier 
> написал(а):
>
> For example, in archive_command we put WALs for archiving from
> pg_xlog/pg_wal into another directory inside PGDATA and than another cron
> task makes real archiving. This directory ideally should be skipped by
> pg_rewind, but it would not be handled by proposed change.
>
>
> I would be curious to follow the reasoning for such a two-phase
> archiving (You basically want to push it in two places, no? But why
> not just use pg_receivexlog then?). This is complicated to handle from
> the point of view of availability and backup reliability + durability.
>
>
> We do compress WALs and send them over network. Doing it via
> archive_command in single thread is sometimes slower than new WALs are
> written under heavy load.
>

How would this work when it comes to rewinding against a file directory?

>
> --
> May the force be with you…
> https://simply.name
>
>


-- 
Best Regards,
Chris Travers
Database Administrator

Tel: +49 162 9037 210 | Skype: einhverfr | www.adjust.com
Saarbrücker Straße 37a, 10405 Berlin


Re: [HACKERS] Proposal: pg_rewind to skip config files

2017-09-05 Thread Chris Travers
On Tue, Sep 5, 2017 at 1:04 PM, Michael Paquier 
wrote:

> On Tue, Sep 5, 2017 at 7:54 PM, Vladimir Borodin  wrote:
> > 5 сент. 2017 г., в 12:31, Chris Travers 
> > написал(а):
> >
> > I think the simplest solution for now is to skip any files ending in
> .conf,
> > .log, and serverlog.
>
> This is not a portable solution. Users can include configuration files
> with the names they want. So the current patch as proposed is
> definitely not something worth it.
>

Actually that is exactly why I think the long-term solution is to figure
out what we need to copy and not copy anything we don't recognise.

That means the following directories as far as I can see:
 * base
 * global
 * pg_xlog/pg_wal
 * pg_clog/pg_xact
 * pg_commit_ts
 * pg_twophase
 * pg_snapshots?

Are there any other directories I am missing?


At any rate, I think the current state makes it very difficult to test
rewind adequately, and it makes it extremely difficult to use in a
non-trivial environment because you have to handle replication slots,
configuration files, and so forth yourself, and you have to be aware that
these *may* or *may not* be consistently clobbered by a rewind, so you have
to have some way of applying another set of files in following a rewind.

If nothing else we ought to *at least* special case the recovery.conf and
the postgresql.auto.conf, and pg_replslot because these are always located
there and should never be clobbered.


>
> > For example, in archive_command we put WALs for archiving from
> > pg_xlog/pg_wal into another directory inside PGDATA and than another cron
> > task makes real archiving. This directory ideally should be skipped by
> > pg_rewind, but it would not be handled by proposed change.
>
> I would be curious to follow the reasoning for such a two-phase
> archiving (You basically want to push it in two places, no? But why
> not just use pg_receivexlog then?). This is complicated to handle from
> the point of view of availability and backup reliability + durability.
>
> > While it is definitely an awful idea the user can easily put something
> > strange (i.e. logs) to any important directory in PGDATA (i.e. into base
> or
> > pg_wal). Or how for example pg_replslot should be handled (I asked about
> it
> > a couple of years ago [1])? It seems that a glob/regexp for things to
> skip
> > is a more universal solution.
> >
> > [1]
> > https://www.postgresql.org/message-id/flat/8DDCCC9D-450D-
> 4CA2-8CF6-40B382F1F699%40simply.name
>
> Well, keeping the code simple is not always a bad thing. Logs are an
> example that can be easily countered, as well as archives in your
> case.
>




> --
> Michael
>



-- 
Best Regards,
Chris Travers
Database Administrator

Tel: +49 162 9037 210 | Skype: einhverfr | www.adjust.com
Saarbrücker Straße 37a, 10405 Berlin


Re: [HACKERS] Disallowing multiple queries per PQexec()

2017-09-05 Thread Peter Eisentraut
On 6/14/17 10:05, Surafel Temesgen wrote:
> PGC_POSTMASTER implies that it's an instance-wide setting.
> Is is intentional? I can understand that it's more secure for this
> not to
> be changeable in an existing session, but it's also much less usable
> if you
> can't set it per-database and per-user.
> Maybe it should be PGC_SUSET ?
> 
> It’s my misunderstanding I thought PGC_POSTMASTER set only by 
> superuser and changed with a hard restart 
> 
> I attach a patch that incorporate the comments and uses similar routines
> with the rest of the file rather than using command tag

After reviewing the discussion, I'm inclined to reject this patch.
Several people have spoken out strongly against this patch.  It's clear
that this feature wouldn't actually offer any absolute protection; it
just closes one particular hole.  On the other hand, it introduces a
maintenance and management burden.

We already have libpq APIs that offer a more comprehensive protection
against SQL injection, so we can encourage users to use those, instead
of relying on uncertain measures such as this.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal: pg_rewind to skip config files

2017-09-05 Thread Vladimir Borodin

> 5 сент. 2017 г., в 14:04, Michael Paquier  
> написал(а):
> 
>> For example, in archive_command we put WALs for archiving from
>> pg_xlog/pg_wal into another directory inside PGDATA and than another cron
>> task makes real archiving. This directory ideally should be skipped by
>> pg_rewind, but it would not be handled by proposed change.
> 
> I would be curious to follow the reasoning for such a two-phase
> archiving (You basically want to push it in two places, no? But why
> not just use pg_receivexlog then?). This is complicated to handle from
> the point of view of availability and backup reliability + durability.

We do compress WALs and send them over network. Doing it via archive_command in 
single thread is sometimes slower than new WALs are written under heavy load.

--
May the force be with you…
https://simply.name



Re: [HACKERS] Proposal: pg_rewind to skip config files

2017-09-05 Thread Chris Travers
On Tue, Sep 5, 2017 at 12:54 PM, Vladimir Borodin  wrote:

>
> 5 сент. 2017 г., в 12:31, Chris Travers 
> написал(а):
>
> I think the simplest solution for now is to skip any files ending in
> .conf, .log, and serverlog.
>
>
> Why don’t you want to solve the problem once? It is a bit harder to get
> consensus on a way how to do it, but it seems that there are no reasons to
> make temporary solution here.
>
> For example, in archive_command we put WALs for archiving from
> pg_xlog/pg_wal into another directory inside PGDATA and than another cron
> task makes real archiving. This directory ideally should be skipped by
> pg_rewind, but it would not be handled by proposed change.
>

Ok let's back up a bit in terms of what I see is the proper long-term fix.
Simple code, by the way, is important, but at least as important are
programs which solve simple, well defined problems.  The current state is:

1.  pg_rewind makes *no guarantee* as to whether differences in logs,
config files, etc. are clobbered.  They may (If a rewind is needed) or not
(If the timelines haven't diverged).  Therefore the behavior of these sorts
of files with the invocation of pg_rewind is not really very well defined.
That's a fairly big issue in an operational environment.

2.  There are files which *may* be copied (I.e. are copied if the timelines
have diverged) which *may* have side effects on replication topology, wal
archiving etc.  Replication slots, etc. are good examples.

The problem I think pg_rewind should solve is "give me a consistent data
environment from the timeline on that server."  I would think that access
to the xlog/clog files would indeed be relevant to that.  If I were
rewriting the application now I would include those.  Just because
something can be handled separately doesn't mean it should be, and I would
refer not to assume that archiving is properly set up and working.

>
>
> Long run, it would be nice to change pg_rewind from an opt-out approach to
> an approach of processing the subdirectories we know are important.
>
>
> While it is definitely an awful idea the user can easily put something
> strange (i.e. logs) to any important directory in PGDATA (i.e. into base or
> pg_wal). Or how for example pg_replslot should be handled (I asked about it
> a couple of years ago [1])? It seems that a glob/regexp for things to skip
> is a more universal solution.
>

I am not convinced it is a universal solution unless you take an arbitrary
number or regexes to check and loop through checking all of them.  Then the
chance of getting something catastrophically wrong in a critical
environment goes way up and you may end up in an inconsistent state at the
end.

Simple code is good.  A program that solves simple problems reliably (and
in simple ways) is better.

The problem I see is that pg_rewind gets incorporated into other tools
which don't really provide the user before or after hooks and therefore it
isn't really fair to say, for example that repmgr has the responsibility to
copy server logs out if present, or to make sure that configuration files
are not in the directory.

The universal solution is to only touch the files that we know are needed
and therefore work simply and reliably in a demanding environment.


>
> [1] https://www.postgresql.org/message-id/flat/8DDCCC9D-
> 450D-4CA2-8CF6-40B382F1F699%40simply.name
>
>
> --
> May the force be with you…
> https://simply.name
>
>


-- 
Best Regards,
Chris Travers
Database Administrator

Tel: +49 162 9037 210 | Skype: einhverfr | www.adjust.com
Saarbrücker Straße 37a, 10405 Berlin


Re: [HACKERS] no test coverage for ALTER FOREIGN DATA WRAPPER name HANDLER ...

2017-09-05 Thread Ashutosh Bapat
On Wed, May 10, 2017 at 5:55 AM, Amit Langote
 wrote:
> On 2017/05/09 22:52, Mark Dilger wrote:
>>
>>> On May 9, 2017, at 12:18 AM, Amit Langote  
>>> wrote:
>>>
>>> Hi,
>>>
>>> On 2017/05/05 9:38, Mark Dilger wrote:
 Hackers,

 just FYI, I cannot find any regression test coverage of this part of the 
 grammar, not
 even in the contrib/ directory or TAP tests.  I was going to submit a 
 patch to help out,
 and discovered it is not so easy to do, and perhaps that is why the 
 coverage is missing.
>>>
>>> I think we could add the coverage by defining a dummy C FDW handler
>>> function in regress.c.  I see that some other regression tests use C
>>> functions defined there, such as test_atomic_ops().
>>>
>>> What do you think about the attached patch?  I am assuming you only meant
>>> to add tests for the code in foreigncmds.c (CREATE/ALTER FOREIGN DATA
>>> WRAPPER).
>>
>> Thank you for creating the patch.  I only see one small oversight, which is 
>> the
>> successful case of ALTER FOREIGN DATA WRAPPER ... HANDLER ... is still
>> not tested.  I added one line for that in the attached modification of your 
>> patch.
>
> Ah right, thanks.
>
> Adding this to the next commitfest as Ashutosh suggested.
>

The patch needs a rebase.

-- 
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Copyright in partition.h and partition.c

2017-09-05 Thread Tom Lane
Amit Langote  writes:
> On 2017/09/05 15:48, Etsuro Fujita wrote:
>> Here is the copyright in partition.h:
>> 
>>  * Copyright (c) 2007-2017, PostgreSQL Global Development Group
>> 
>> I think it's reasonable that that matches the copyright in partition.c,
>> but partition.c has:
>> 
>>  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
>>  * Portions Copyright (c) 1994, Regents of the University of California
>> 
>> Is that intentional?

> No, it's unintentional.  The difference may have resulted from copying
> different files to become partition.h and partition.c, respectively.

> Maybe, we should change both to say 2016-2017?

> I don't know the exact rule for how we determine those years.  Is there
> some rule in place about that?  When I look at execParallel.c, which
> supposedly got introduced into the tree recently, I see 1996-2017.  OTOH,
> the files in contrib/bloom all have 2016-2017.

Our usual practice is to write the copyright like it is in partition.c
even in new files.  This avoids any question about whether any of the
code was copied-and-pasted from somewhere else in PG.  Even if not one
word in the file can be traced to code that was somewhere else before,
it seems to me that this is an appropriate thing to do, to give due
credit to those who came before us.

In short: we should make partition.h's copyright look like partition.c's
not vice versa.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Replacing lfirst() with lfirst_node() appropriately in planner.c

2017-09-05 Thread Ashutosh Bapat
On Tue, Sep 5, 2017 at 4:02 PM, Jeevan Chalke
 wrote:
> Hi,
>
> lfirst_node() changes are missing for List node type and I was thinking
> about adding those. But it looks like we cannot do so as List can be a
> T_List, T_IntList, or T_OidList. So I am OK with that.

Thanks for investigating the case of T_List.

>
> Apart from this, changes look good to me. Everything works fine.
>
> As we are now doing it for lfirst(), can we also do this for linitial()?
> I did not find any usage for lsecond(), lthird(), lfourth() and llast()
> though.
>
> Attached patch for replacing linitial() with linital_node() appropriately in
> planner.c
>
Ok. The patch looks good to me. It compiles and make check passes.
Here are both the patches rebased on the latest sources.

I am marking this commitfest entry as "ready for committer".
From 25ae53d7f72a54a03ec90206c7e5579a562a121c Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat 
Date: Tue, 5 Sep 2017 16:50:58 +0530
Subject: [PATCH 1/2] Use lfirst_node() instead of lfirst() wherever suitable
 in planner.c

Ashutosh Bapat, reviewed by Jeevan Chalke
---
 src/backend/optimizer/plan/planner.c |   94 +-
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 9662302..a1dd157 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -411,7 +411,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
 		forboth(lp, glob->subplans, lr, glob->subroots)
 		{
 			Plan	   *subplan = (Plan *) lfirst(lp);
-			PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
+			PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
 
 			SS_finalize_plan(subroot, subplan);
 		}
@@ -430,7 +430,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
 	forboth(lp, glob->subplans, lr, glob->subroots)
 	{
 		Plan	   *subplan = (Plan *) lfirst(lp);
-		PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
+		PlannerInfo *subroot = lfirst_node(PlannerInfo, lr);
 
 		lfirst(lp) = set_plan_references(subroot, subplan);
 	}
@@ -586,7 +586,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	hasOuterJoins = false;
 	foreach(l, parse->rtable)
 	{
-		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
+		RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
 
 		if (rte->rtekind == RTE_JOIN)
 		{
@@ -643,7 +643,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	newWithCheckOptions = NIL;
 	foreach(l, parse->withCheckOptions)
 	{
-		WithCheckOption *wco = (WithCheckOption *) lfirst(l);
+		WithCheckOption *wco = lfirst_node(WithCheckOption, l);
 
 		wco->qual = preprocess_expression(root, wco->qual,
 		  EXPRKIND_QUAL);
@@ -663,7 +663,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 
 	foreach(l, parse->windowClause)
 	{
-		WindowClause *wc = (WindowClause *) lfirst(l);
+		WindowClause *wc = lfirst_node(WindowClause, l);
 
 		/* partitionClause/orderClause are sort/group expressions */
 		wc->startOffset = preprocess_expression(root, wc->startOffset,
@@ -705,7 +705,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	/* Also need to preprocess expressions within RTEs */
 	foreach(l, parse->rtable)
 	{
-		RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
+		RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
 		int			kind;
 		ListCell   *lcsq;
 
@@ -1080,7 +1080,7 @@ inheritance_planner(PlannerInfo *root)
 	rti = 1;
 	foreach(lc, parse->rtable)
 	{
-		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
+		RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
 
 		if (rte->rtekind == RTE_SUBQUERY)
 			subqueryRTindexes = bms_add_member(subqueryRTindexes, rti);
@@ -1102,7 +1102,7 @@ inheritance_planner(PlannerInfo *root)
 	{
 		foreach(lc, root->append_rel_list)
 		{
-			AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
+			AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc);
 
 			if (bms_is_member(appinfo->parent_relid, subqueryRTindexes) ||
 bms_is_member(appinfo->child_relid, subqueryRTindexes) ||
@@ -1130,7 +1130,7 @@ inheritance_planner(PlannerInfo *root)
 	 */
 	foreach(lc, root->append_rel_list)
 	{
-		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
+		AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc);
 		PlannerInfo *subroot;
 		RangeTblEntry *child_rte;
 		RelOptInfo *sub_final_rel;
@@ -1192,7 +1192,7 @@ inheritance_planner(PlannerInfo *root)
 			subroot->append_rel_list = NIL;
 			foreach(lc2, root->append_rel_list)
 			{
-AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);
+AppendRelInfo *appinfo2 = lfirst_node(AppendRelInfo, lc2);
 
 if (bms_is_member(appinfo2->child_relid, modifiableARIindexes))
 	appinfo2 = copyObject(appinfo2);
@@ -1227,7 +1227,7 @@ inheritance_planner(PlannerInfo *root)
 			rti = 1;
 			foreach(lr, parse->rtable)
 			{
-RangeTblEntry *rte = 

Re: [HACKERS] advanced partition matching algorithm for partition-wise join

2017-09-05 Thread Rajkumar Raghuwanshi
On Tue, Sep 5, 2017 at 4:34 PM, Ashutosh Bapat <
ashutosh.ba...@enterprisedb.com> wrote:

> I have fixed the issues which were marked as TODOs in the attached
> patches. Also, I have included your test change patch in my series of
> patches. Are there any other issues you have commented out?
>
> Thanks Ashutosh, All commented issue got fixed. I am working on some
combinations of N-way joins
to test partition matching, will send those as well once done.


Re: [HACKERS] Should we cacheline align PGXACT?

2017-09-05 Thread Daniel Gustafsson
> On 04 Apr 2017, at 14:58, David Steele  wrote:
> 
> On 4/4/17 8:55 AM, Alexander Korotkov wrote:
>> On Mon, Apr 3, 2017 at 9:58 PM, Andres Freund > 
>>I'm inclined to push this to the next CF, it seems we need a lot more
>>benchmarking here.
>> 
>> No objections.
> 
> This submission has been moved to CF 2017-07.

This CF has now started (well, 201709 but that’s what was meant in above), can
we reach closure on this patch in this CF?

cheers ./daniel

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] WIP: long transactions on hot standby feedback replica / proof of concept

2017-09-05 Thread Simon Riggs
On 4 September 2017 at 09:06, Alexander Korotkov
 wrote:

> Aborting read-only query on standby because of vacuum on master seems to be
> rather unexpected behaviour for user when hot standby feedback is on.
> I think we should work on this problem for v11.

Happy to help. My suggestion would be to discuss a potential theory of
operation and then code a patch.

As Alexander says, simply skipping truncation if standby is busy isn't
a great plan.

If we defer an action on standby replay, when and who will we apply
it? What happens if the standby is shutdown or crashes while an action
is pending.

Perhaps altering the way truncation requires an AccessExclusiveLock
would help workloads on both master and standby? If a Relation knew it
had a pending truncation then scans wouldn't need to go past newsize.
Once older lockers have gone we could simply truncate without the
lock. Would need a few pushups but seems less scary then trying to
make pending standby actions work well enough to commit.

-- 
Simon Riggshttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Tuple-routing for certain partitioned tables not working as expected

2017-09-05 Thread Etsuro Fujita

On 2017/08/30 17:20, Etsuro Fujita wrote:

On 2017/08/30 9:13, Amit Langote wrote:

On 2017/08/29 20:18, Etsuro Fujita wrote:

On 2017/08/25 22:26, Robert Haas wrote:

On Wed, Aug 23, 2017 at 4:55 AM, Etsuro Fujita
 wrote:
Agreed, but I'd vote for fixing this in v10 as proposed; I agree 
that just
ripping the CheckValidResultRel checks out entirely is not a good 
idea,

but
that seems OK to me at least as a fix just for v10.


I'm still not on-board with having this be the one case where we don't
do CheckValidResultRel.  If we want to still call it but pass down
some additional information that can selectively skip certain checks,
I could probably live with that.


Another idea would be to not do CheckValidResultRel for partitions in
ExecSetupPartitionTupleRouting; instead, do that the first time the
partition is chosen by ExecFindPartition, and if successfully checked,
initialize the partition's ResultRelInfo and other stuff.  (We could 
skip

this after the first time by checking whether we already have a valid
ResultRelInfo for the chosen partition.)  That could allow us to not 
only

call CheckValidResultRel the way it is, but avoid initializing useless
partitions for which tuples aren't routed at all.


I too have thought about the idea of lazy initialization of the partition
ResultRelInfos.  I think it would be a good idea, but definitely 
something

for PG 11.


Agreed.  Here is a patch to skip the CheckValidResultRel checks for a 
target table if it's a foreign partition to perform tuple-routing for, 
as proposed by Robert.


I'll add this to the open items list for v10.

Best regards,
Etsuro Fujita



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Protect syscache from bloating with negative cache entries

2017-09-05 Thread Kyotaro HORIGUCHI
Thank you for the comment.

At Mon, 28 Aug 2017 21:31:58 -0400, Robert Haas  wrote 
in 
> On Mon, Aug 28, 2017 at 5:24 AM, Kyotaro HORIGUCHI
>  wrote:
> > This patch have had interferences from several commits after the
> > last submission. I amended this patch to follow them (up to
> > f97c55c), removed an unnecessary branch and edited some comments.
> 
> I think the core problem for this patch is that there's no consensus
> on what approach to take.  Until that somehow gets sorted out, I think
> this isn't going to make any progress.  Unfortunately, I don't have a
> clear idea what sort of solution everybody could tolerate.
> 
> I still think that some kind of slow-expire behavior -- like a clock
> hand that hits each backend every 10 minutes and expires entries not
> used since the last hit -- is actually pretty sensible.  It ensures
> that idle or long-running backends don't accumulate infinite bloat
> while still allowing the cache to grow large enough for good
> performance when all entries are being regularly used.  But Tom
> doesn't like it.  Other approaches were also discussed; none of them
> seem like an obvious slam-dunk.

I suppose that it slows intermittent lookup of non-existent
objects. I have tried a slight different thing. Removing entries
by 'age', preserving specified number (or ratio to live entries)
of younger negative entries. The problem of that approach was I
didn't find how to determine the number of entries to preserve,
or I didn't want to offer additional knobs for them. Finally I
proposed the patch upthread since it doesn't need any assumption
on usage.

Though I can make another patch that does the same thing based on
LRU, the same how-many-to-preserve problem ought to be resolved
in order to avoid slowing the inermittent lookup.

> Turning to the patch itself, I don't know how we decide whether the
> patch is worth it.  Scanning the whole (potentially large) cache to
> remove negative entries has a cost, mostly in CPU cycles; keeping
> those negative entries around for a long time also has a cost, mostly
> in memory.  I don't know how to decide whether these patches will help
> more people than it hurts, or the other way around -- and it's not
> clear that anyone else has a good idea about that either.

Scanning a hash on invalidation of several catalogs (hopefully
slightly) slows certain percentage of inavlidations on maybe most
of workloads. Holding no-longer-lookedup entries surely kills a
backend under certain workloads sooner or later.  This doesn't
save the pg_proc cases, but saves pg_statistic and pg_class
cases. I'm not sure what other catalogs can bloat.

I could reduce the complexity of this. Inval mechanism conveys
only a hash value so this scans the whole of a cache for the
target OIDs (with possible spurious targets). This will be
resolved by letting inval mechanism convey an OID. (but this may
need additional members in an inval entry.)

Still, the full scan perfomed in CleanupCatCacheNegEntries
doesn't seem easily avoidable. Separating the hash by OID of key
or provide special dlist that points tuples in buckets will
introduce another complexity.


> Typos: funciton, paritial.

Thanks. ispell told me of additional typos corresnpond, belive
and undistinguisable.

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Protect syscache from bloating with negative cache entries

2017-09-05 Thread Kyotaro HORIGUCHI
Thank you for reviewing this.

At Sat, 2 Sep 2017 12:12:47 +1200, Thomas Munro  
wrote in 

Re: [HACKERS] Proposal: pg_rewind to skip config files

2017-09-05 Thread Michael Paquier
On Mon, Sep 4, 2017 at 10:38 PM, Alvaro Herrera  wrote:
> I wonder how portable fnmatch() is in practice (which we don't currently
> use anywhere).  A shell glob seems a more natural interface to me for
> this than a regular expression.

On Windows you could use roughly PathMatchSpecEx, but it does not seem
that all the wildcards of fnmatch are available there.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal: pg_rewind to skip config files

2017-09-05 Thread Michael Paquier
On Tue, Sep 5, 2017 at 7:54 PM, Vladimir Borodin  wrote:
> 5 сент. 2017 г., в 12:31, Chris Travers 
> написал(а):
>
> I think the simplest solution for now is to skip any files ending in .conf,
> .log, and serverlog.

This is not a portable solution. Users can include configuration files
with the names they want. So the current patch as proposed is
definitely not something worth it.

> For example, in archive_command we put WALs for archiving from
> pg_xlog/pg_wal into another directory inside PGDATA and than another cron
> task makes real archiving. This directory ideally should be skipped by
> pg_rewind, but it would not be handled by proposed change.

I would be curious to follow the reasoning for such a two-phase
archiving (You basically want to push it in two places, no? But why
not just use pg_receivexlog then?). This is complicated to handle from
the point of view of availability and backup reliability + durability.

> While it is definitely an awful idea the user can easily put something
> strange (i.e. logs) to any important directory in PGDATA (i.e. into base or
> pg_wal). Or how for example pg_replslot should be handled (I asked about it
> a couple of years ago [1])? It seems that a glob/regexp for things to skip
> is a more universal solution.
>
> [1]
> https://www.postgresql.org/message-id/flat/8DDCCC9D-450D-4CA2-8CF6-40B382F1F699%40simply.name

Well, keeping the code simple is not always a bad thing. Logs are an
example that can be easily countered, as well as archives in your
case.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] JIT compiling expressions/deform + inlining prototype v2.0

2017-09-05 Thread Konstantin Knizhnik



On 04.09.2017 23:52, Andres Freund wrote:


Yea, I've changed that already, although it's currently added earlier,
because the alignment is needed before, to access the column correctly.
I've also made number of efficiency improvements, primarily to access
columns with an absolute offset if all preceding ones are fixed width
not null columns - that is quite noticeable performancewise.


Unfortunately, in most of real table columns are nullable.
I wonder if we can perform some optimization in this case (assuming that 
in typical cases column either contains mostly non-null values, either 
mostly null values).


--
Konstantin Knizhnik
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Proposal: pg_rewind to skip config files

2017-09-05 Thread Vladimir Borodin

> 5 сент. 2017 г., в 12:31, Chris Travers  написал(а):
> 
> I think the simplest solution for now is to skip any files ending in .conf, 
> .log, and serverlog.

Why don’t you want to solve the problem once? It is a bit harder to get 
consensus on a way how to do it, but it seems that there are no reasons to make 
temporary solution here.

For example, in archive_command we put WALs for archiving from pg_xlog/pg_wal 
into another directory inside PGDATA and than another cron task makes real 
archiving. This directory ideally should be skipped by pg_rewind, but it would 
not be handled by proposed change.

> 
> Long run, it would be nice to change pg_rewind from an opt-out approach to an 
> approach of processing the subdirectories we know are important.

While it is definitely an awful idea the user can easily put something strange 
(i.e. logs) to any important directory in PGDATA (i.e. into base or pg_wal). Or 
how for example pg_replslot should be handled (I asked about it a couple of 
years ago [1])? It seems that a glob/regexp for things to skip is a more 
universal solution.

[1] 
https://www.postgresql.org/message-id/flat/8DDCCC9D-450D-4CA2-8CF6-40B382F1F699%40simply.name


--
May the force be with you…
https://simply.name



Re: [HACKERS] Replacing lfirst() with lfirst_node() appropriately in planner.c

2017-09-05 Thread Jeevan Chalke
Hi,

lfirst_node() changes are missing for List node type and I was thinking
about adding those. But it looks like we cannot do so as List can be a
T_List, T_IntList, or T_OidList. So I am OK with that.

Apart from this, changes look good to me. Everything works fine.

As we are now doing it for lfirst(), can we also do this for linitial()?
I did not find any usage for lsecond(), lthird(), lfourth() and llast()
though.

Attached patch for replacing linitial() with linital_node() appropriately in
planner.c

Thanks

On Fri, Jul 14, 2017 at 2:25 PM, Ashutosh Bapat <
ashutosh.ba...@enterprisedb.com> wrote:

> On Thu, Jul 13, 2017 at 9:15 PM, Alvaro Herrera
>  wrote:
> > Ashutosh Bapat wrote:
> >
> >> Happened to stumble across some instances of lfirst() which could use
> >> lfirst_node() in planner.c. Here's patch which replaces calls to
> >> lfirst() extracting node pointers by lfirst_node() in planner.c.
> >
> > Sounds good.
> >
> >> Are we carrying out such replacements in master or this will be
> >> considered for v11?
> >
> > This is for pg11 definitely; please add to the open commitfest.
>
> Thanks. Added. https://commitfest.postgresql.org/14/1195/
> --
> Best Wishes,
> Ashutosh Bapat
> EnterpriseDB Corporation
> The Postgres Database Company
>
>
> --
> Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers
>



-- 
Jeevan Chalke
Principal Software Engineer, Product Development
EnterpriseDB Corporation
The Enterprise PostgreSQL Company
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a1dd157..9115655 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1556,8 +1556,8 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 			/*--
 			  translator: %s is a SQL row locking clause such as FOR UPDATE */
 	 errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
-			LCS_asString(((RowMarkClause *)
-		  linitial(parse->rowMarks))->strength;
+			LCS_asString(linitial_node(RowMarkClause,
+	   parse->rowMarks)->strength;
 
 		/*
 		 * Calculate pathkeys that represent result ordering requirements
@@ -1687,7 +1687,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		qp_extra.tlist = tlist;
 		qp_extra.activeWindows = activeWindows;
 		qp_extra.groupClause = (gset_data
-? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
+? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
 : parse->groupClause);
 
 		/*
@@ -1757,25 +1757,25 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 			split_pathtarget_at_srfs(root, final_target, sort_input_target,
 	 _targets,
 	 _targets_contain_srfs);
-			final_target = (PathTarget *) linitial(final_targets);
+			final_target = linitial_node(PathTarget, final_targets);
 			Assert(!linitial_int(final_targets_contain_srfs));
 			/* likewise for sort_input_target vs. grouping_target */
 			split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
 	 _input_targets,
 	 _input_targets_contain_srfs);
-			sort_input_target = (PathTarget *) linitial(sort_input_targets);
+			sort_input_target = linitial_node(PathTarget, sort_input_targets);
 			Assert(!linitial_int(sort_input_targets_contain_srfs));
 			/* likewise for grouping_target vs. scanjoin_target */
 			split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
 	 _targets,
 	 _targets_contain_srfs);
-			grouping_target = (PathTarget *) linitial(grouping_targets);
+			grouping_target = linitial_node(PathTarget, grouping_targets);
 			Assert(!linitial_int(grouping_targets_contain_srfs));
 			/* scanjoin_target will not have any SRFs precomputed for it */
 			split_pathtarget_at_srfs(root, scanjoin_target, NULL,
 	 _targets,
 	 _targets_contain_srfs);
-			scanjoin_target = (PathTarget *) linitial(scanjoin_targets);
+			scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
 			Assert(!linitial_int(scanjoin_targets_contain_srfs));
 		}
 		else
@@ -2194,7 +2194,7 @@ preprocess_grouping_sets(PlannerInfo *root)
 		/*
 		 * Get the initial (and therefore largest) grouping set.
 		 */
-		gs = linitial(current_sets);
+		gs = linitial_node(GroupingSetData, current_sets);
 
 		/*
 		 * Order the groupClause appropriately.  If the first grouping set is
@@ -2344,8 +2344,8 @@ preprocess_rowmarks(PlannerInfo *root)
 		 * CTIDs invalid.  This is also checked at parse time, but that's
 		 * insufficient because of rule substitution, query pullup, etc.
 		 */
-		CheckSelectLocking(parse, ((RowMarkClause *)
-   linitial(parse->rowMarks))->strength);
+		CheckSelectLocking(parse, linitial_node(RowMarkClause,
+parse->rowMarks)->strength);
 	}
 	else
 	{
@@ 

Re: [HACKERS] [PROPOSAL] Use SnapshotAny in get_actual_variable_range

2017-09-05 Thread Daniel Gustafsson
> On 18 Aug 2017, at 08:50, Andrey Borodin  wrote:
> 
> The following review has been posted through the commitfest application:
> make installcheck-world:  tested, failed
> Implements feature:   tested, failed
> Spec compliant:   tested, failed
> Documentation:tested, failed
> 
> Hi! I've looked into the patch.
> 
> There is not so much of the code. The code itself is pretty clear and well 
> documented. I've found just one small typo "transactionn" in 
> HeapTupleSatisfiesNonVacuumable() comments.
> 
> Chosen Snapshot type is not a solution to the author's problem at hand. 
> Though implemented SnapshotNonVacuumable is closer to rational choice  than 
> currently used SnapshotDirty for range sketching. As far as I can see this is 
> what is get_actual_variable_range() is used for, despite word "actual" in 
> it's name. 
> The only place where get_actual_variable_range() is used for actual range is 
> preprocessed-out in get_variable_range():
>   /*
>* XXX It's very tempting to try to use the actual column min and max, 
> if
>* we can get them relatively-cheaply with an index probe.  However, 
> since
>* this function is called many times during join planning, that could
>* have unpleasant effects on planning speed.  Need more investigation
>* before enabling this.
>*/
> #ifdef NOT_USED
>   if (get_actual_variable_range(root, vardata, sortop, min, max))
>   return true;
> #endif
> 
> I think it would be good if we could have something like "give me actual 
> values, but only if it is on first index page", like conditional locks. But I 
> think this patch is a step to right direction.

Thanks for your review!  Based on your review and conclusions, I’ve bumped the
status to “Ready for Committer”. If you believe another status would be more
appropriate, please go in an update.

cheers ./daniel



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] WAL logging problem in 9.4.3?

2017-09-05 Thread Daniel Gustafsson
> On 13 Apr 2017, at 11:42, Kyotaro HORIGUCHI  
> wrote:
> 
> At Thu, 13 Apr 2017 13:52:40 +0900, Michael Paquier 
>  wrote in 
> 
>> On Tue, Apr 11, 2017 at 5:38 PM, Kyotaro HORIGUCHI
>>  wrote:
>>> Sorry, what I have just sent was broken.
>> 
>> You can use PROVE_TESTS when running make check to select a subset of
>> tests you want to run. I use that all the time when working on patches
>> dedicated to certain code paths.
> 
> Thank you for the information. Removing unwanted test scripts
> from t/ directories was annoyance. This makes me happy.
> 
 - Relation has new members no_pending_sync and pending_sync that
  works as instant cache of an entry in pendingSync hash.
 - Commit-time synchronizing is restored as Michael's patch.
 - If relfilenode is replaced, pending_sync for the old node is
  removed. Anyway this is ignored on abort and meaningless on
  commit.
 - TAP test is renamed to 012 since some new files have been added.
 
 Accessing pending sync hash occurred on every calling of
 HeapNeedsWAL() (per insertion/update/freeze of a tuple) if any of
 accessing relations has pending sync.  Almost of them are
 eliminated as the result.
>> 
>> Did you actually test this patch? One of the logs added makes the
>> tests a long time to run:
> 
> Maybe this patch requires make clean since it extends the
> structure RelationData. (Perhaps I saw the same trouble.)
> 
>> 2017-04-13 12:11:27.065 JST [85441] t/102_vacuumdb_stages.pl
>> STATEMENT:  ANALYZE;
>> 2017-04-13 12:12:25.766 JST [85492] LOG:  BufferNeedsWAL: pendingSyncs
>> = 0x0, no_pending_sync = 0
>> 
>> -   lsn = XLogInsert(RM_SMGR_ID,
>> -XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
>> +   rel->no_pending_sync= false;
>> +   rel->pending_sync = pending;
>> +   }
>> 
>> It seems to me that those flags and the pending_sync data should be
>> kept in the context of backend process and not be part of the Relation
>> data...
> 
> I understand that the context of "backend process" means
> storage.c local. I don't mind the context on which the data is,
> but I found only there that can get rid of frequent hash
> searching. For pending deletions, just appending to a list is
> enough and costs almost nothing, on the other hand pendig syncs
> are required to be referenced, sometimes very frequently.
> 
>> +void
>> +RecordPendingSync(Relation rel)
>> I don't think that I agree that this should be part of relcache.c. The
>> syncs are tracked should be tracked out of the relation context.
> 
> Yeah.. It's in storage.c in the latest patch. (Sorry for the
> duplicate name). I think it is a kind of bond between smgr and
> relation.
> 
>> Seeing how invasive this change is, I would also advocate for this
>> patch as only being a HEAD-only change, not many people are
>> complaining about this optimization of TRUNCATE missing when wal_level
>> = minimal, and this needs a very careful review.
> 
> Agreed.
> 
>> Should I code something? Or Horiguchi-san, would you take care of it?
>> The previous crash I saw has been taken care of, but it's been really
>> some time since I looked at this patch...
> 
> My point is hash-search on every tuple insertion should be evaded
> even if it happens rearely. Once it was a bit apart from your
> original patch, but in the latest patch the significant part
> (pending-sync hash) is revived from the original one.

This patch has followed along since CF 2016-03, do we think we can reach a
conclusion in this CF?  It was marked as "Waiting on Author”, based on
developments since in this thread, I’ve changed it back to “Needs Review”
again.

cheers ./daniel




-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Challenges preventing us moving to 64 bit transaction id (XID)?

2017-09-05 Thread Ildar Musin

Hi Alexander,

On 22.06.2017 18:36, Alexander Korotkov wrote:

On Wed, Jun 7, 2017 at 11:33 AM, Alexander Korotkov
> wrote:



0002-heap-page-special-1.patch
Putting xid and multixact bases into PageHeaderData would take extra 16
bytes on index pages too.  That would be waste of space for indexes.
This is why I decided to put bases into special area of heap pages.
This patch adds special area for heap pages contaning prune xid and
magic number.  Magic number is different for regular heap page and
sequence page.


We've discussed it earlier but it worth mentioning here too. You have 
pd_prune_xid of type TransactionId which is treated elsewhere as 
ShortTransactionId (see HeapPageGetPruneXid() and HeapPageSetPruneXid()) 
and hence introduces redundant 4 bytes. Could you please fix it?


--
Ildar Musin
i.mu...@postgrespro.ru


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] Move all am-related reloption code into src/backend/access/[am-name] and get rid of relopt_kind for custom AM

2017-09-05 Thread Nikolay Shaplov
В письме от 3 сентября 2017 11:45:43 пользователь Alvaro Herrera написал:

> > I've just made sure that patch is still applyable to the current master.
> > 
> > And I am still waiting for reviews :-)
> 
> I see that this patch adds a few tests for reloptions, for instance in
> bloom.  I think we should split this in at least two commits, one that
> adds those tests before the functionality change, so that they can be
> committed in advance, verify that the buildfarm likes it with the
> current code, and verify the coverage.
This sounds as a good idea. I created such patch and started a new thread for 
it https://www.postgresql.org/message-id/2615372.orqtEn8VGB%40x200m (I will 
add that thread to commitfest as soon as I understand what test should be left 
there)

> I also noticed that your patch adds an error message that didn't exist
> previously,
> 
> +   ereport(ERROR,
> +   (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
> +  errmsg("col%i should not be grater than length",
> i)));
> 
> this seems a bit troublesome, because of the "col%i" thing 
What the problem with col%i ?

> and also because of the typo.
Oh... I always had bad marks for all natural languages in school :-( ;-)

> I wonder if this means you've changed the way sanity checks work here.
This should not be like this (I hope). I will attentively look at this part of 
code again, and explain what exactly I've done. (I did it a year ago and now 
need some time to recall)

> The new checks around toast table creation look like they belong to a
> completely independent patch also ... 
This sounds reasonable too. Will do it, it is possible, as far as I remember.

> the error message there goes against message style guidelines also.
Oh... these style guidelines... will pay attention to it too...

-- 
Do code for fun. Can do it for money (Perl & C/C++ ~10h/week)


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] WIP: long transactions on hot standby feedback replica / proof of concept

2017-09-05 Thread Amit Kapila
On Tue, Sep 5, 2017 at 3:03 AM, Alexander Korotkov
 wrote:
> On Mon, Sep 4, 2017 at 2:04 PM,  wrote:
>>
>> Our clients complain about this issue and therefore I want to raise the
>> discussion and suggest several solutions to this problem:
>>
>> I. Why does PG use Fatal when Error is enough to release lock that rises
>> lock conflict?
>> "If (RecoveryConflictPending && DoingCommandRead)"
>>
>> II. Do we really need to truncate the table on hot standby exactly at the
>> same time when truncate on master occurs?
>>
>> In my case conflict happens when the autovacuum truncates table tbl1 on
>> master while backend on replica is performing a long transaction involving
>> the same table tbl1. This happens because truncate takes an
>> AccessExclusiveLock. To tackle this issue we have several options:
>>
>> 1. We can postpone the truncate on the master until all the replicas have
>> finished their transactions (in this case, feedback requests to the master
>> should be sent frequently)
>> Patch 1
>> vacuum_lazy_truncate.patch
>
>
> I've following comments on this patch:
> 1) You shouldn't use ">=" to compare xids.  You should use
> TransactionIdFollowsOrEquals() function which handles transaction id
> wraparound correctly.
> 2) As I understood, this patch makes heap truncate only when no concurrent
> transactions are running on both master and replicas with
> hot_standby_feedback enabled.  For busy system, it would be literally "never
> do heap truncate after vacuum".
>
>> 2. Maybe there is an option somehow not to send AccessExclusiveLock and
>> not to truncate table on the replica right away. We could try to wait a
>> little and truncate tbl1 on replica again.
>>
>> Here is a patch that makes replica skip truncate WAL record if some
>> transaction using the same table is already running on replica. And it also
>> forces master to send truncate_wal to replica with actual table length every
>> time autovacuum starts.
>> Patch 2
>> standby_truncate_skip_v1.patch
>>
>> In this case the transaction which is running for several hours won’t be
>> interrupted because of truncate. Even if for some reason we haven’t
>> truncated this table tbl1 right away, nothing terrible will happen. The next
>> truncate wal record will reduce the file size by the actual length (if some
>> inserts or updates have been performed on master).
>
>
> Since you wrote this patch under on my request, let me clarify its idea
> little more.
>
> Currently, lazy_truncate_heap() is very careful on getting
> AccessExclusiveLock to truncate heap.  It doesn't want either block other
> transaction or wait for this lock too long.  If lock isn't acquired after
> some number of tries lazy_truncate_heap() gives up.  However, once
> lazy_truncate_heap() acquires AccessExclusiveLock is acquired on master, it
> would be replayed on replicas where it will conflict with read-only
> transactions if any.  That leads to unexpected behaviour when
> hot_standby_feedback is on.
>
> Idea of fixing that is to apply same logic of getting AccessExclusiveLock on
> standby as on master: give up if somebody is holding conflicting lock for
> long enough.  That allows standby to have more free pages at the end of heap
> than master have.  That shouldn't lead to errors since those pages are
> empty, but allows standby to waste some extra space.  In order to mitigate
> this deficiency, we're generating XLOG_SMGR_TRUNCATE records more frequent:
> on finish of every vacuum.  Therefore, if even standby gets some extra space
> of empty pages, it would be corrected during further vacuum cycles.
>

I think one deficiency of this solution is that it will keep on
generating extra WAL even if standby doesn't need it (as standby has
successfully truncated the relation).  I don't know if we can just get
away by saying that an additional WAL record per vacuum cycle is
harmless especially when that doesn't serve any purpose (like for the
cases when standby is always able to truncate the heap on first WAL
record).  Am I missing some part of solution which avoids it?

-- 
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] [PATCH] Tests for reloptions

2017-09-05 Thread Nikolay Shaplov

This thread continues discussion at 
https://www.postgresql.org/message-id/20170903094543.kkqdbdjuxwa5z6ji@alvherre.pgsql
(Shortly: I refactored reloptions code, Alvaro offered to commit tests before 
the full patch)

> I see that this patch adds a few tests for reloptions, for instance in
> bloom.  I think we should split this in at least two commits, one that
> adds those tests before the functionality change, so that they can be
> committed in advance, verify that the buildfarm likes it with the
> current code, and verify the coverage.

This sounds as a really good idea.

Though I have several questions. This tests also covers some functionality 
that were introduced only in my patch:

1. Forbid SET and RESET options where they should not be changed
2. Forbid creating tables with toasts options when no toast table is created
3. Split StdRdOptions into HeapOptions and ToastOptions and forbid uising Heap 
specific options for toast.

In the patch I've attached I've commented out this functionality. But I am not 
quite sure that it is good idea to commit it this way in master.

May be it would be good to make 1-3 as a separate patches and bring it's tests 
with, as a separate commit. But...

2nd can be easily ported to master. It does not depend much on my reloptions 
patch as far as I remember.

It would be insane to port 1st feature to master. It highly integrated with 
reloptions mechanism, It would require complete reimplementation of this 
feature using old reloptions tools. I totally do not want to do it 

The 3rd functionality came from philosophy one relation-type -- one options 
catalog, that was implemented in my reloptions patch. It is not really needed 
in master without the full patch. With some efforts I think it can be made as a 
separate patch, thought I would also try to avoid it if possible.


So the questions still is: should we commit not existent functionality tests 
commented, uncomented but with no proper error response in expected output, or 
just remove these tests from this patch?

-- 
Do code for fun. Can do it for money (Perl & C/C++ ~10h/week)diff --git a/contrib/bloom/expected/bloom.out b/contrib/bloom/expected/bloom.out
index cbc50f7..37fc7f9 100644
--- a/contrib/bloom/expected/bloom.out
+++ b/contrib/bloom/expected/bloom.out
@@ -210,3 +210,33 @@ ORDER BY 1;
  text_ops | t
 (2 rows)
 
+-- reloptions test
+DROP INDEX bloomidx;
+CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (length=7, col1 = 4);
+SELECT reloptions FROM pg_class WHERE oid = 'bloomidx'::regclass;
+reloptions 
+---
+ {length=7,col1=4}
+(1 row)
+
+-- check for min and max values
+CREATE INDEX bloomidx2 ON tst USING bloom (i, t) WITH (length=0);
+ERROR:  value 0 out of bounds for option "length"
+DETAIL:  Valid values are between "1" and "4096".
+CREATE INDEX bloomidx2 ON tst USING bloom (i, t) WITH (length=4097);
+ERROR:  value 4097 out of bounds for option "length"
+DETAIL:  Valid values are between "1" and "4096".
+CREATE INDEX bloomidx2 ON tst USING bloom (i, t) WITH (col1=0);
+ERROR:  value 0 out of bounds for option "col1"
+DETAIL:  Valid values are between "1" and "4095".
+CREATE INDEX bloomidx2 ON tst USING bloom (i, t) WITH (col1=4096);
+ERROR:  value 4096 out of bounds for option "col1"
+DETAIL:  Valid values are between "1" and "4095".
+ check post_validate for colN 0) not valid;
 alter table parted_validate_test validate constraint parted_validate_test_chka;
 drop table parted_validate_test;
+-- test alter column options
+CREATE TABLE tmp(i integer);
+INSERT INTO tmp VALUES (1);
+ALTER TABLE tmp ALTER COLUMN i SET (n_distinct = 1, n_distinct_inherited = 2);
+ALTER TABLE tmp ALTER COLUMN i RESET (n_distinct_inherited);
+ANALYZE tmp;
+DROP TABLE tmp;
diff --git a/src/test/regress/expected/brin.out b/src/test/regress/expected/brin.out
index ca80f00..6568b6b 100644
--- a/src/test/regress/expected/brin.out
+++ b/src/test/regress/expected/brin.out
@@ -507,3 +507,6 @@ EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1;
Filter: (b = 1)
 (2 rows)
 
+ Check that changing reloptions for brin index is not allowed
+--ALTER INDEX brinidx SET (pages_per_range = 10);
+--ALTER INDEX brinidx RESET (pages_per_range);
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 064adb4..16a3b8b 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2337,7 +2337,7 @@ Options: fastupdate=on, gin_pending_list_limit=128
 CREATE INDEX hash_i4_index ON hash_i4_heap USING hash (random int4_ops);
 CREATE INDEX hash_name_index ON hash_name_heap USING hash (random name_ops);
 CREATE INDEX hash_txt_index ON hash_txt_heap USING hash (random text_ops);
-CREATE INDEX hash_f8_index ON hash_f8_heap USING hash (random float8_ops);
+CREATE INDEX hash_f8_index ON hash_f8_heap USING hash (random float8_ops) WITH (fillfactor=60);
 CREATE UNLOGGED TABLE unlogged_hash_table (id int4);
 CREATE INDEX 

  1   2   >