date:20170320

Re: [HACKERS] extended statistics: n-distinct

2017-03-20 Thread Alvaro Herrera

Alvaro Herrera wrote:

> * I'm not terribly happy with the header organization.  I think
> VacAttrStats should be in its own (new) src/include/statistics/analyze.h
> for example (which cleans up a bunch of existing stuff a bit)

I tried this and it doesn't actually do any good.  Patch attached, which
I intend to throw away.  The main problem is that most files interested
in analyze stuff also wants to do vacuum_delay_point, or access
default_statistics_target, both of which are declared in vacuum.h, so
these files have to include vacuum.h anyway. 

In order to achieve anything worthwhile we'd have to do a three-way
split I think, and that's more trouble that this is worth.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
commit db618933b50f5f2b29b3a79325d43883cb1af521[m
Author: Alvaro Herrera 
AuthorDate: Mon Mar 20 18:51:11 2017 -0300
CommitDate: Mon Mar 20 19:11:36 2017 -0300

Header reorganization: move stuff to statistics/analyze.h

diff --git a/contrib/postgres_fdw/postgres_fdw.c 
b/contrib/postgres_fdw/postgres_fdw.c
index e8cb2d0..9101ed1 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -17,6 +17,7 @@
 #include "access/htup_details.h"
 #include "access/sysattr.h"
 #include "catalog/pg_class.h"
+#include "catalog/pg_type.h"
 #include "commands/defrem.h"
 #include "commands/explain.h"
 #include "commands/vacuum.h"
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index cfcec34..9475591 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -22,6 +22,7 @@
 #include "access/hash_xlog.h"
 #include "access/relscan.h"
 #include "catalog/index.h"
+#include "catalog/pg_type.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "optimizer/plancat.h"
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index b91df98..ca8235e 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -39,6 +39,7 @@
 #include "parser/parse_relation.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
+#include "statistics/analyze.h"
 #include "storage/bufmgr.h"
 #include "storage/lmgr.h"
 #include "storage/proc.h"
diff --git a/src/backend/postmaster/autovacuum.c 
b/src/backend/postmaster/autovacuum.c
index 33ca749..5b8d072 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -74,6 +74,7 @@
 #include "catalog/dependency.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_database.h"
+#include "catalog/pg_statistic.h"
 #include "commands/dbcommands.h"
 #include "commands/vacuum.h"
 #include "lib/ilist.h"
diff --git a/src/backend/tsearch/ts_typanalyze.c 
b/src/backend/tsearch/ts_typanalyze.c
index 017435c..30f7f26 100644
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -16,6 +16,7 @@
 #include "access/hash.h"
 #include "catalog/pg_operator.h"
 #include "commands/vacuum.h"
+#include "statistics/analyze.h"
 #include "tsearch/ts_type.h"
 #include "utils/builtins.h"
 
diff --git a/src/backend/utils/adt/array_typanalyze.c 
b/src/backend/utils/adt/array_typanalyze.c
index 85b7a43..1a7a5ef 100644
--- a/src/backend/utils/adt/array_typanalyze.c
+++ b/src/backend/utils/adt/array_typanalyze.c
@@ -22,6 +22,7 @@
 #include "utils/datum.h"
 #include "utils/lsyscache.h"
 #include "utils/typcache.h"
+#include "statistics/analyze.h"
 
 
 /*
diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c 
b/src/backend/utils/adt/rangetypes_typanalyze.c
index a8d585ce..5f7a160 100644
--- a/src/backend/utils/adt/rangetypes_typanalyze.c
+++ b/src/backend/utils/adt/rangetypes_typanalyze.c
@@ -29,6 +29,7 @@
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/rangetypes.h"
+#include "statistics/analyze.h"
 
 static int float8_qsort_cmp(const void *a1, const void *a2);
 static int range_bound_qsort_cmp(const void *a1, const void *a2, void 
*arg);
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 4feb26a..c61753d 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -34,6 +34,7 @@
 #include "access/xact.h"
 #include "access/xlog_internal.h"
 #include "catalog/namespace.h"
+#include "catalog/pg_type.h"
 #include "commands/async.h"
 #include "commands/prepare.h"
 #include "commands/user.h"
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 541c2fa..26fa238 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -14,122 +14,13 @@
 #ifndef VACUUM_H
 #define VACUUM_H
 
-#include "access/htup.h"
-#include "catalog/pg_statistic.h"
-#include "catalog/pg_type.h"
 #include "nodes/parsenodes.h"
+#include "storage/block.h"
 #include "storage/buf.h"
 #include "storage/lock.h"
 #include "utils/relcache.h"
 
 
-/*--
- * ANALYZE builds one of these structs for

Re: [HACKERS] Removing binaries (was: createlang/droplang deprecated)

2017-03-20 Thread Robert Haas

On Sat, Mar 18, 2017 at 4:12 PM, Magnus Hagander  wrote:
> createdb, dropdb - also not clear they're about postgres, more likely to be
> used by mistake but not that bad. That said, do they add any *value* beyond
> what you can do with psql -c "CREATE DATABASE"? I don't really see one, so
> I'd suggest dropping these too.

That would annoy me, because I use these constantly.  I also think
that they solve a problem for users, which is this:

[rhaas ~]$ psql
psql: FATAL:  database "rhaas" does not exist
[rhaas ~]$ psql -c 'create database rhaas;'
psql: FATAL:  database "rhaas" does not exist
[rhaas ~]$ gosh, i know i need to connect to a database in order to
create the database to which psql tries to connect by default, so
there must be an existing database with some name, but what exactly is
that name, anyway?
-bash: gosh,: command not found

There was an occasion when this exact problem almost caused me to give
up on using PostgreSQL.  Everybody here presumably knows that
template1 and postgres are the magic words you can add to the end of
that command line to make it work, but that is NOT self-evident to
newcomers.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Andrew Dunstan



On 03/20/2017 10:25 AM, Craig Ringer wrote:
>
>
> I'd like to enable Carp's features to use confess for traces, and
> switch all use of die to that. We could learn a lot about
> unplanned-for test failures where a test script dies rather than
> failing a test if we used carp effectively.
>
>
>


Good idea. But there is no obvious call to die() or BAIL_OUT() that's
causing the error I saw.

cheers

andrew




-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Removing binaries

2017-03-20 Thread David Steele


On 3/20/17 3:40 PM, Jan de Visser wrote:

On Monday, March 20, 2017 3:30:49 PM EDT Robert Haas wrote:

On Sat, Mar 18, 2017 at 4:12 PM, Magnus Hagander 

wrote:

createdb, dropdb - also not clear they're about postgres, more likely to
be
used by mistake but not that bad. That said, do they add any *value*
beyond
what you can do with psql -c "CREATE DATABASE"? I don't really see one, so
I'd suggest dropping these too.


That would annoy me, because I use these constantly.  I also think
that they solve a problem for users, which is this:

[rhaas ~]$ psql
psql: FATAL:  database "rhaas" does not exist
[rhaas ~]$ psql -c 'create database rhaas;'
psql: FATAL:  database "rhaas" does not exist
[rhaas ~]$ gosh, i know i need to connect to a database in order to
create the database to which psql tries to connect by default, so
there must be an existing database with some name, but what exactly is
that name, anyway?
-bash: gosh,: command not found

There was an occasion when this exact problem almost caused me to give
up on using PostgreSQL.  Everybody here presumably knows that
template1 and postgres are the magic words you can add to the end of
that command line to make it work, but that is NOT self-evident to
newcomers.


Same here. I worked on a system with a shrink-wrap installer which installed
pgsql as well and initialized it for use by the system user of our software.
If a tester or sales engineer wanted to play with the DB, it would be about 30
minutes before they would end up at my desk, in tears.


How about adding a hint?

--
-David
da...@pgmasters.net


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] extended statistics: n-distinct

2017-03-20 Thread David Rowley

On 21 March 2017 at 08:02, Alvaro Herrera  wrote:
> Here is a closer to final version of the multivariate statistics series,
> last posted at
> https://www.postgresql.org/message-id/20170316222033.ncdi7nidah2gdzjx%40alvherre.pgsql

I've made another pass over the patch.

+  A notice is issued in this case.  Note that there is no guarantee that
+  the existing statistics is anything like the one that would have been
+  created.

I think the final sentence would be better read as "Note that only the
name of the statistics object is considered here. The definition of
the statistics is not considered"

+ * This is not handled automatically by DROP TABLE because statistics are
+ * on their own schema.

"in their own schema" ?

+ /* now that we know the number of attributes, allocate the attribute */
+ item->attrs = (AttrNumber *) palloc0(item->nattrs * sizeof(AttrNumber));

there's still a number of palloc0() calls in mvdist.c that could be
simple palloc() calls.

+ int nmultiple,
+ summultiple;

these seem not initialised to 0 before they're used.

+int
+compare_scalars_simple(const void *a, const void *b, void *arg)

I don't see where this is used.

+int
+compare_scalars_partition(const void *a, const void *b, void *arg)

or this

+int
+multi_sort_compare_dims(int start, int end,

should have a comment

+bool
+stats_are_enabled(HeapTuple htup, char type)

missing comment too

+ appendStringInfo(, "CREATE STATISTICS %s ON (",
+ quote_identifier(NameStr(statextrec->staname)));

I know I wrote this bit, but I did it like that because I didn't know
what stat types would be included. Do we need a WITH(ndistinct) or
something in there?

+ attname = get_relid_attribute_name(statextrec->starelid, attnum);

This one is my fault. It needs a quote_identifier() around it:

postgres=# create table mytable ("select" int, "insert" int);
CREATE TABLE
postgres=# create statistics mytable_stats on ("select","insert") from mytable;
CREATE STATISTICS
postgres=# select pg_get_statisticsextdef(oid) from pg_statistic_ext
where staname = 'mytable_stats';
 pg_get_statisticsextdef
--
 CREATE STATISTICS mytable_stats ON (select, insert) FROM mytable

+ while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+ /* TODO maybe include only already built statistics? */
+ result = insert_ordered_oid(result, HeapTupleGetOid(htup));


+ /* XXX ensure this is true. It was broken in v25 0002 */

can now remove this comment. I see you added a check to only allow
stats on tables and MVs.

+ printfPQExpBuffer(,
+  "SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n"
+  "  (SELECT string_agg(attname::text,', ') \n"

Might need to do something with pg_catalog.quote_ident(attname) here:

postgres=# \d mytable
  Table "public.mytable"
 Column |  Type   | Collation | Nullable | Default
+-+---+--+-
 select | integer |   |  |
 insert | integer |   |  |
Indexes:
"mytable_select_insert_idx" btree ("select", insert)
Statistics:
"public.mytable_stats" WITH (ndistinct) ON (select, insert)

notice lack of quoting on 'select'.

List   *keys; /* String nodes naming referenced columns */

Are these really keys? Sounds more like something you might call it if
it were an index. Maybe it should just be "columns"? Although we need
to consider that one day they might be expressions too.

-- 
 David Rowley   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] increasing the default WAL segment size

2017-03-20 Thread David Steele


Hi Beena,

On 3/20/17 2:07 PM, Beena Emerson wrote:

Added check for the version, the SHOW command will be run only in v10
and above. Previous versions do not need this.


I've just had the chance to have a look at this patch.  This is not a 
complete review, just a test of something I've been curious about.


With 16MB WAL segments the filename neatly aligns with the LSN.  For 
example:


WAL FILE 0001000100FE = LSN 1/FE00

This no longer holds true with this patch.  I created a cluster with 1GB 
segments and the sequence looked like:


00010001
00010002
00010003
00010001

Whereas I had expected something like:

00010040
00010080
000100CO
00010001

I scanned the thread but couldn't find any mention of this so I'm 
curious to know if it was considered? Was the prior correspondence 
merely serendipitous?


I'm honestly not sure which way I think is better, but I know either way 
it represents a pretty big behavioral change for any tools looking at 
pg_wal or using the various helper functions.


It's a probably a good thing to do at the same time as the rename, just 
want to make sure we are all aware of the changes.


--
-David
da...@pgmasters.net


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] Thanks for the TAP framework

2017-03-20 Thread Craig Ringer

Hi

It just occurred to me that much of what I've been doing recently
would've been exceedingly difficult to write and even harder to debug
without the TAP framework. I would've spent a LOT of time writing test
scripts and wondering whether the bug was in my scripts or my Pg code.

I still spend a while swearing at Perl, but I can't really imagine
doing nontrivial development without them anymore.

So ... thanks.

-- 
 Craig Ringer   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Logical decoding on standby

2017-03-20 Thread Simon Riggs

On 19 March 2017 at 21:12, Craig Ringer  wrote:

> Rebased attached.

Patch1 looks good to go. I'll correct a spelling mistake in the tap
test when I commit that later today.

Patch2 has a couple of points

2.1 Why does call to ReplicationSlotAcquire() move earlier in
pg_logical_slot_get_changes_guts()?

2.2 sendTimeLineIsHistoric looks incorrect, and at least isn't really
documented well.
The setting
  sendTimeLineIsHistoric = state->currTLI == ThisTimeLineID;
should be
  sendTimeLineIsHistoric = (state->currTLI != ThisTimeLineID);

but that doesn't cause failure because in read_local_xlog_page() we
say that we are reading from history when
state->currTLI != ThisTimeLineID explicitly rather than use
sendTimeLineIsHistoric

So it looks like we could do with a few extra comments
If you correct these I'll commit it tomorrow.

-- 
Simon Riggshttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [PATCH] Incremental sort (was: PoC: Partial sort)

2017-03-20 Thread Mithun Cy

On Mon, Feb 27, 2017 at 8:29 PM, Alexander Korotkov
 wrote:

This patch needs to be rebased.

1. It fails while applying as below

patching file src/test/regress/expected/sysviews.out
Hunk #1 FAILED at 70.
1 out of 1 hunk FAILED -- saving rejects to file
src/test/regress/expected/sysviews.out.rej
patching file src/test/regress/sql/inherit.sql

2. Also, there are compilation errors due to new commits.

-fwrapv -fexcess-precision=standard -O2 -I../../../../src/include
-D_GNU_SOURCE   -c -o createplan.o createplan.c
createplan.c: In function ‘create_gather_merge_plan’:
createplan.c:1510:11: warning: passing argument 3 of ‘make_sort’ makes
integer from pointer without a cast [enabled by default]
   gm_plan->nullsFirst);
   ^
createplan.c:235:14: note: expected ‘int’ but argument is of type ‘AttrNumber *’
 static Sort *make_sort(Plan *lefttree, int numCols, int skipCols,
  ^
createplan.c:1510:11: warning: passing argument 4 of ‘make_sort’ from
incompatible pointer type [enabled by default]
   gm_plan->nullsFirst);

-- 
Thanks and Regards
Mithun C Y
EnterpriseDB: http://www.enterprisedb.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] logical decoding of two-phase transactions

2017-03-20 Thread Craig Ringer

On 17 March 2017 at 23:59, Robert Haas  wrote:

> But that lock could need to be held for an unbounded period of time -
> as long as decoding takes to complete - which seems pretty
> undesirable.

Yeah. We could use a recovery-conflict like mechanism to signal the
decoding session that someone wants to abort the xact, but it gets
messy.

>  Worse still, the same problem will arise if you
> eventually want to start decoding ordinary, non-2PC transactions that
> haven't committed yet, which I think is something we definitely want
> to do eventually; the current handling of bulk loads or bulk updates
> leads to significant latency.

Yeah. If it weren't for that, I'd probably still just pursue locking.
But you're right that we'll have to solve this sooner or later. I'll
admit I hoped for later.

-- 
 Craig Ringer   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] Re: [COMMITTERS] pgsql: Add TAP tests for password-based authentication methods.

2017-03-20 Thread Heikki Linnakangas


On 03/18/2017 02:55 PM, Michael Paquier wrote:

On Fri, Mar 17, 2017 at 6:37 PM, Heikki Linnakangas
 wrote:

Add TAP tests for password-based authentication methods.

Tests all combinations of users with MD5, plaintext and SCRAM verifiers
stored in pg_authid, with plain 'password', 'md5' and 'scram'
authentication methods.


This patch has forgotten two things:
1) src/test/authentication/.gitignore.
2) A refresh of vcregress.pl to run this test suite on Windows.
Please find attached a patch to address both issues.


The tests don't actually work on Windows, and will all be skipped if 
try. So "vcregress authcheck" as in this patch is pretty useless.


With some effort, we could make it work on Windows. It currently assumes 
unix domain sockets, when it rewrites pg_hba.conf - that would need to 
use TCP instead. And to make it secure, use a long-enough random 
password instead of a hard-coded constant. I'm not volunteering to do 
that, though.


- Heikki



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Removing binaries (was: createlang/droplang deprecated)

2017-03-20 Thread Jan de Visser

On Monday, March 20, 2017 3:30:49 PM EDT Robert Haas wrote:
> On Sat, Mar 18, 2017 at 4:12 PM, Magnus Hagander  
wrote:
> > createdb, dropdb - also not clear they're about postgres, more likely to
> > be
> > used by mistake but not that bad. That said, do they add any *value*
> > beyond
> > what you can do with psql -c "CREATE DATABASE"? I don't really see one, so
> > I'd suggest dropping these too.
> 
> That would annoy me, because I use these constantly.  I also think
> that they solve a problem for users, which is this:
> 
> [rhaas ~]$ psql
> psql: FATAL:  database "rhaas" does not exist
> [rhaas ~]$ psql -c 'create database rhaas;'
> psql: FATAL:  database "rhaas" does not exist
> [rhaas ~]$ gosh, i know i need to connect to a database in order to
> create the database to which psql tries to connect by default, so
> there must be an existing database with some name, but what exactly is
> that name, anyway?
> -bash: gosh,: command not found
> 
> There was an occasion when this exact problem almost caused me to give
> up on using PostgreSQL.  Everybody here presumably knows that
> template1 and postgres are the magic words you can add to the end of
> that command line to make it work, but that is NOT self-evident to
> newcomers.

Same here. I worked on a system with a shrink-wrap installer which installed 
pgsql as well and initialized it for use by the system user of our software. 
If a tester or sales engineer wanted to play with the DB, it would be about 30 
minutes before they would end up at my desk, in tears.


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] WIP: Faster Expression Processing v4

2017-03-20 Thread Tom Lane

... is there a reason why resultnum for EEOP_ASSIGN_* steps is declared
size_t and not just int?  Since it's an array index, and one that
certainly can't be bigger than AttrNumber, that seems rather confusing.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Logical replication existing data copy

2017-03-20 Thread Stephen Frost

Peter,

* Peter Eisentraut (peter.eisentr...@2ndquadrant.com) wrote:
> The current patch causes a failure in the pg_dump tests, because the
> generated CREATE SUBSCRIPTION commands make connection attempts that
> don't work.  We have the pg_dump option --no-create-subscription-slots
> for this, but I suppose we should expand that to
> --no-subscription-connect and use the new NOCONNECT option instead.

I'll admit that I've not followed this very closely, so perhaps I'm
misunderstanding, but I thought our prior discussion lead to the idea
that pg_dump would always dump out subscriptions with 'NOCONNECT', so
that a restore wouldn't automatically start trying to connect to another
system.

In which case, I don't quite understand why we need a
"--no-subscription-connect" option.

Thanks!

Stephen

signature.asc
Description: Digital signature

Re: [HACKERS] Microvacuum support for Hash Index

2017-03-20 Thread Robert Haas

On Sat, Mar 18, 2017 at 4:35 AM, Amit Kapila  wrote:
> This version looks good to me.

Committed.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Stephen Frost

Andrew,

* Andrew Dunstan (andrew.duns...@2ndquadrant.com) wrote:
> On 03/20/2017 10:25 AM, Craig Ringer wrote:
> > I'd like to enable Carp's features to use confess for traces, and
> > switch all use of die to that. We could learn a lot about
> > unplanned-for test failures where a test script dies rather than
> > failing a test if we used carp effectively.
> 
> Good idea. But there is no obvious call to die() or BAIL_OUT() that's
> causing the error I saw.

I'll look at adding that.  I should be able to get the psql output with
the ERROR in it and that's mainly what you'll want for this.

Thanks!

Stephen


signature.asc
Description: Digital signature

Re: [HACKERS] patch proposal

2017-03-20 Thread David Steele


Hi Venkata,

On 2/28/17 11:59 PM, Venkata B Nagothi wrote:

On Wed, Mar 1, 2017 at 1:14 AM, Venkata B Nagothi > wrote:
On Tue, Jan 31, 2017 at 6:49 AM, David Steele > wrote:

Do you know when those will be ready?

Attached are both the patches with tests included.


Thanks for adding the tests.  They bring clarity to the patch.

Unfortunately, I don't think the first patch (recoveryStartPoint) will 
work as currently implemented.  The problem I see is that the new 
function recoveryStartsHere() depends on pg_control containing a 
checkpoint right at the end of the backup.  There's no guarantee that 
this is true, even if pg_control is copied last.  That means a time, 
lsn, or xid that occurs somewhere in the middle of the backup can be 
selected without complaint from this code depending on timing.


The tests pass (or rather fail as expected) because they are written 
using values before the start of the backup.  It's actually the end of 
the backup (where the database becomes consistent on recovery) that 
defines where PITR can start and this distinction definitely matters for 
very long backups.  A better test would be to start the backup, get a 
time/lsn/xid after writing some data, and then make sure that 
time/lsn/xid is flagged as invalid on recovery.


It is also problematic to assume that transaction IDs commit in order. 
If checkPoint.latestCompletedXid contains 5 then a recovery to xid 4 may 
or may not be successful depending on the commit order.  However, it 
appears in this case the patch would disallow recovery to 4.


I think this logic would need to be baked into recoveryStopsAfter() 
and/or recoveryStopsBefore() in order to work.  It's not clear to me 
what that would look like, however, or if the xid check is even practical.


Regards,
--
-David
da...@pgmasters.net


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Proposal: GetOldestXminExtend for ignoring arbitrary vacuum flags

2017-03-20 Thread Seki, Eiji

On 2017-02-24 04:17:20 Haribabu Kommi wrote:
>On Fri, Feb 24, 2017 at 3:17 PM, Seki, Eiji 
>
>wrote:
>
>>
>> Thank you for your comments.
>>
>> I reflected these comments to the attached patch. And I renamed IGNORE_XXX
>> flags to PROCARRAY_XXX flags.
>
>
>I checked the latest patch and I have some comments.
>
>+static int
>+ConvertProcarrayFlagToProcFlag(int flags)
>
>I feel this function is not needed, if we try to maintain same flag values
>for both PROC_XXX and PROCARRAY_XXX by writing some comments
>in the both the declarations place to make sure that the person modifying
>the flag values needs to update them in both the places. I feel it is
>usually
>rare that the flag values gets changed.
>
>+ * Now, flags is used only for ignoring backends with some flags.
>
>How about writing something like below.
>
>The flags are used to ignore the backends in calculation when any of the
>corresponding flags is set.
>
>+#define PROCARRAY_A_FLAG_VACUUM
>
>How about changing the flag name to PROCARRAY_VACUUM_FLAG
>(similar changes to other declarations)
>
>
>+/* Use these flags in GetOldestXmin as "flags" */
>
>Add some comments here to explain how to use these flags.
>
>+#define PROCARRAY_FLAGS_DEFAULT
>
>same here also as PROCARRAY_DEFAULT_FLAGS
>(similar changes to other declarations)

Thank you for you review.

I reflected your comment and attach the updated patch.

--
Regards,
Eiji Seki
Fujitsu


get_oldest_xmin_with_ignore_flags_v3.patch
Description: get_oldest_xmin_with_ignore_flags_v3.patch

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Allow interrupts on waiting standby

2017-03-20 Thread Michael Paquier

On Sun, Mar 19, 2017 at 5:14 AM, Tom Lane  wrote:
> Couple of thoughts on this patch ---

Thanks!

> 1. Shouldn't WaitExceedsMaxStandbyDelay's CHECK_FOR_INTERRUPTS be moved to
> after the WaitLatch call?  Not much point in being woken immediately by
> an interrupt if you're not going to respond.
>
> 2. Is it OK to ResetLatch here?  If the only possible latch event in this
> process is interrupt requests, then I think WaitLatch, then ResetLatch,
> then CHECK_FOR_INTERRUPTS is OK; but otherwise it seems like you risk
> discarding events that need to be serviced later.

Right, I have switched to WaitLatch(), ResetLatch() and then
CHECK_FOR_INTERRUPTS().

> 3. In the same vein, if we're going to check WL_POSTMASTER_DEATH, should
> there be a test for that and immediate exit(1) here?

OK, if the postmaster has died, there is not much recovery conflict
needed anyway.

> 4. I'd be inclined to increase the sleep interval only if we did time out,
> not if we were awakened by some other event.

OK, that makes sense.

> 5. The comment about maximum sleep length needs some work.  At first
> glance you might think that without the motivation of preventing long
> uninterruptible sleeps, we might as well allow the sleep length to grow
> well past 1s.  I think that'd be bad, because we want to wake up
> reasonably soon after the xact(s) we're waiting for commit.  But neither
> the original text nor the proposed replacement mention this.

OK, I did some work on this comment.

What do you think about the updated version attached?
-- 
Michael


standby-delay-latch-v3.patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] ANALYZE command progress checker

2017-03-20 Thread vinayak


Hi Ashutosh,

On 2017/03/19 17:56, Ashutosh Sharma wrote:

Hi,

I didn't find any major issues with the patch. It works as expected.
However, I have few minor comments that I would like to share,

+ 
+   Total number of sample rows. The sample it reads is taken randomly.
+   Its size depends on the default_statistics_target
parameter value.
+ 

1) I think it would be better if you could specify reference link to
the guc variable 'default_statistics_target'. Something like this,

.

If you go through monitoring.sgml, you would find that there is
reference link to all guc variables being used.

+1. Updated in the attached patch.


2) I feel that the 'computing_statistics' phase could have been
splitted into 'computing_column_stats', 'computing_index_stats'
Please let me know your thoughts on this.

Initially I have spitted this phase as 'computing heap stats' and 
'computing index stats' but
I agreed with Roberts suggestion to merge two phases into one as 
'computing statistics'.

+   certain commands during command execution.  Currently, the command
+   which supports progress reporting is VACUUM and
ANALYZE.  This may be
 expanded in the future.

3) I think above needs to be rephrased. Something like...Currently,
the supported progress reporting commands are 'VACUUM' and
'ANALYZE'.

+1. Updated in the attached patch.

Moreover, I also ran your patch on Windows platform and didn't find
any issues. Thanks.

Thank you for testing the patch on Windows platform.

Regards,
Vinayak Pokale
NTT Open Source Software Center


pg_stat_progress_analyze_v4.patch
Description: binary/octet-stream

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] ANALYZE command progress checker

2017-03-20 Thread vinayak


Hi Ashutosh,

Thank you for reviewing the patch.

On 2017/03/18 21:00, Ashutosh Sharma wrote:

Hi Vinayak,

Here are couple of review comments that may need your attention.

1. Firstly, I am seeing some trailing whitespace errors when trying to
apply your v3 patch using git apply command.

[ashu@localhost postgresql]$ git apply pg_stat_progress_analyze_v3.patch
pg_stat_progress_analyze_v3.patch:155: trailing whitespace.
CREATE VIEW pg_stat_progress_analyze AS
pg_stat_progress_analyze_v3.patch:156: trailing whitespace.
 SELECT
pg_stat_progress_analyze_v3.patch:157: trailing whitespace.
 S.pid AS pid, S.datid AS datid, D.datname AS datname,
pg_stat_progress_analyze_v3.patch:158: trailing whitespace.
 S.relid AS relid,
pg_stat_progress_analyze_v3.patch:159: trailing whitespace.
 CASE S.param1 WHEN 0 THEN 'initializing'
error: patch failed: doc/src/sgml/monitoring.sgml:521
I have tried to apply patch using "git apply" and it works fine in my 
environment.

I use below command to apply the patch.
patch -p1 < pg_stat_progress_analyze_v3.patch

2) The test-case 'rules' is failing.  I think it's failing because in
rules.sql 'ORDERBY viewname' is used which will put
'pg_stat_progress_analyze' ahead of 'pg_stat_progress_vacuum' in the
list of catalog views. You may need to correct rules.out file
accordingly. This is what i could see in rules.sql,

SELECT viewname, definition FROM pg_views WHERE schemaname <>
'information_schema' ORDER BY viewname;

I am still reviewing your patch and doing some testing. Will update if
i find any issues. Thanks.

Understood. I have fixed it.

Regards,
Vinayak Pokale
NTT Open Source Software Center


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] exposing wait events for non-backends (was: Tracking wait event for latches)

2017-03-20 Thread Michael Paquier

On Fri, Mar 17, 2017 at 6:19 PM, Kuntal Ghosh
 wrote:
> On Thu, Mar 16, 2017 at 1:18 PM, Michael Paquier
>  wrote:
>> +   /* We have userid for client-backends and wal-sender processes */
>> +   if (beentry->st_backendType == B_BACKEND ||
>> beentry->st_backendType == B_WAL_SENDER)
>> +   beentry->st_userid = GetSessionUserId();
>> +   else
>> +   beentry->st_userid = InvalidOid;
>> This can be true as well for bgworkers defining a role OID when
>> connecting with BackgroundWorkerInitializeConnection().
> Fixed.

And so you have the following thing to initialize the statistics:
@@ -5484,6 +5487,9 @@ BackgroundWorkerInitializeConnectionByOid(Oid
dboid, Oid useroid)

InitPostgres(NULL, dboid, NULL, useroid, NULL);

+   /* report the background worker process in the PgBackendStatus array */
+   pgstat_bestart();

And that bit as well:
+   if (beentry->st_backendType == B_BACKEND
+   || beentry->st_backendType == B_WAL_SENDER
+   || beentry->st_backendType == B_BG_WORKER)
+   beentry->st_userid = GetSessionUserId();
Unfortunately this is true only for background workers that connect to
a database. And this would break for bgworkers that do not do that.
The point to fix is here:
+   if (MyBackendId != InvalidBackendId)
+   {
[...]
+   else if (IsBackgroundWorker)
+   {
+   /* bgworker */
+   beentry->st_backendType = B_BG_WORKER;
+   }
Your code is assuming that a bgworker will always be setting
MyBackendId which is not necessarily true, and you would trigger the
assertion down:
Assert(MyAuxProcType != NotAnAuxProcess);
So you need to rely on IsBackgroundWorker in priority I think. I would
suggest as well to give up calling pgstat_bestart() in
BackgroundWorkerInitializeConnection[ByOid] and let the workers do
this work by themselves. This gives more flexibility. You would need
to update the logical replication worker and worker_spi for that as
well.

If you want to test this configuration, feel free to use this background worker:
https://github.com/michaelpq/pg_plugins/tree/master/hello_world
This just prints an entry to the logs every 10s, and does not connect
to any database. Adding a call to pgstat_bestart() in hello_main
triggers the assertion.

>> @@ -808,6 +836,7 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
>> nulls[12] = true;
>> nulls[13] = true;
>> nulls[14] = true;
>> +   nulls[23] = true;
>> }
>> That's not possible to have backend_type set as NULL, no?
>
> Yes, backend_type can't be null. But, I'm not sure whether it should
> be visible to a user with insufficient privileges. Anyway, I've made
> it visible to all user for now.
>
> Please find the updated patches in the attachment.

Yeah, hiding it may make sense...

Thanks for the updated versions/

/* The autovacuum launcher is done here */
if (IsAutoVacuumLauncherProcess())
+   {
return;
+   }
Unnecessary noise here.

Except for the two issues pointed out in this email, I am pretty cool
with this patch. Nice work.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] logical replication apply to run with sync commit off by default

2017-03-20 Thread Petr Jelinek

On 18/03/17 13:31, Petr Jelinek wrote:
> On 07/03/17 06:23, Petr Jelinek wrote:
>> Hi,
>>
>> there has been discussion at the logical replication initial copy thread
>> [1] about making apply work with sync commit off by default for
>> performance reasons and adding option to change that per subscription.
>>
>> Here I propose patch to implement this - it adds boolean column
>> subssynccommit to pg_subscription catalog which decides if
>> synchronous_commit should be off or local for apply. And it adds
>> SYNCHRONOUS_COMMIT = boolean to the list of WITH options for CREATE and
>> ALTER SUBSCRIPTION. When nothing is specified it will set it to false.
>>
>> The patch is built on top of copy patch currently as there are conflicts
>> between the two and this helps a bit with testing of copy patch.
>>
>> [1]
>> https://www.postgresql.org/message-id/CA+TgmoY7Lk2YKArcp4O=Qu=xoor8j71mad1oteojawmuje3...@mail.gmail.com
>>
> 
> I rebased this patch against recent changes and the latest version of
> copy patch.
> 

And another rebase after pg_dump tests commit.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services
>From 800b56aab4714333c2e240c087518a8d1b5e7dc0 Mon Sep 17 00:00:00 2001
From: Petr Jelinek 
Date: Mon, 6 Mar 2017 13:07:45 +0100
Subject: [PATCH] Add option to modify sync commit per subscription

This also changes default behaviour of subscription workers to
synchronous_commit = off
---
 doc/src/sgml/catalogs.sgml | 11 ++
 doc/src/sgml/ref/alter_subscription.sgml   |  1 +
 doc/src/sgml/ref/create_subscription.sgml  | 15 
 src/backend/catalog/pg_subscription.c  |  1 +
 src/backend/commands/subscriptioncmds.c| 59 +-
 src/backend/replication/logical/launcher.c |  2 +-
 src/backend/replication/logical/worker.c   | 28 +-
 src/bin/pg_dump/pg_dump.c  | 12 +-
 src/bin/pg_dump/pg_dump.h  |  1 +
 src/bin/pg_dump/t/002_pg_dump.pl   |  2 +-
 src/bin/psql/describe.c|  5 ++-
 src/include/catalog/pg_subscription.h  | 11 --
 src/test/regress/expected/subscription.out | 27 +++---
 src/test/regress/sql/subscription.sql  |  3 +-
 14 files changed, 144 insertions(+), 34 deletions(-)

diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 228ec78..f71d9c9 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -6395,6 +6395,17 @@
  
 
  
+  subsynccommit
+  bool
+  
+  
+   If true, the apply for the subscription will run with
+   synchronous_commit set to local.
+   Otherwise it will have it set to false.
+  
+ 
+
+ 
   subconninfo
   text
   
diff --git a/doc/src/sgml/ref/alter_subscription.sgml b/doc/src/sgml/ref/alter_subscription.sgml
index 6335e17..712de98 100644
--- a/doc/src/sgml/ref/alter_subscription.sgml
+++ b/doc/src/sgml/ref/alter_subscription.sgml
@@ -26,6 +26,7 @@ ALTER SUBSCRIPTION name WITH ( where suboption can be:
 
 SLOT NAME = slot_name
+| SYNCHRONOUS_COMMIT = boolean
 
 ALTER SUBSCRIPTION name SET PUBLICATION publication_name [, ...] { REFRESH WITH ( puboption [, ... ] ) | NOREFRESH }
 ALTER SUBSCRIPTION name REFRESH PUBLICATION WITH ( puboption [, ... ] )
diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml
index 6468470..6baff2f 100644
--- a/doc/src/sgml/ref/create_subscription.sgml
+++ b/doc/src/sgml/ref/create_subscription.sgml
@@ -29,6 +29,7 @@ CREATE SUBSCRIPTION subscription_name
  
@@ -145,6 +146,20 @@ CREATE SUBSCRIPTION subscription_name
 

+SYNCHRONOUS_COMMIT = boolean
+
+ 
+  Modifies the synchronous_commit setting of the
+  subscription workers. When set to true, the
+  synchronous_commit for the worker will be set to
+  local otherwise to false. The
+  default value is false independently of the default
+  synchronous_commit setting for the instance.
+ 
+
+   
+
+   
 NOCONNECT
 
  
diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c
index 9b74892..26921aa 100644
--- a/src/backend/catalog/pg_subscription.c
+++ b/src/backend/catalog/pg_subscription.c
@@ -68,6 +68,7 @@ GetSubscription(Oid subid, bool missing_ok)
 	sub->name = pstrdup(NameStr(subform->subname));
 	sub->owner = subform->subowner;
 	sub->enabled = subform->subenabled;
+	sub->synccommit = subform->subsynccommit;
 
 	/* Get conninfo */
 	datum = SysCacheGetAttr(SUBSCRIPTIONOID,
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index cba2d5c..402f682 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -60,12 +60,13 @@ static List *fetch_table_list(WalReceiverConn *wrconn, List *publications);
 static void
 parse_subscription_options(List

Re: [HACKERS] Logical decoding on standby

2017-03-20 Thread Craig Ringer

On 19 March 2017 at 22:12, Petr Jelinek  wrote:

> I am slightly worried about impact of the readTimeLineHistory() call but
> I think it should be called so little that it should not matter.

Pretty much my thinking too.

> That brings us to the big patch 0003.
>
> I still don't like the "New in 10.0" comments in documentation, for one
> it's 10, not 10.0 and mainly we don't generally write stuff like this to
> documentation, that's what release notes are for.

OK. Personally I think it's worthwhile for protocol docs, which are
more dev-focused. But I agree it's not consistent with the rest of the
docs, so removed.

(Frankly I wish we did this consistently throughout the Pg docs, too,
and it'd be much more user-friendly if we did, but that's just not
going to happen.)

> There is large amounts of whitespace mess (empty lines with only
> whitespace, spaces at the end of the lines), nothing horrible, but
> should be cleaned up.

Fixed.

> One thing I don't understand much is the wal_level change and turning
> off catalogXmin tracking. I don't really see anywhere that the
> catalogXmin would be reset in control file for example. There is TODO in
> UpdateOldestCatalogXmin() that seems related but tbh I don't follow
> what's happening there - comment says about not doing anything, but the
> code inside the if block are only Asserts.

UpdateOldestCatalogXmin(...) with force=true forces a
XactLogCatalogXminUpdate(...) call to write the new
procArray->replication_slot_catalog_xmin .

We call it with force=true from XLogReportParameters(...) when
wal_level has been lowered; see XLogReportParameters. This will write
out a xl_xact_catalog_xmin_advance with
procArray->replication_slot_catalog_xmin's value then update
ShmemVariableCache->oldestCatalogXmin in shmem.
ShmemVariableCache->oldestCatalogXmin will get written out in the next
checkpoint, which gets incorporated in the control file.

There is a problem though - StartupReplicationSlots() and
RestoreSlotFromDisk() don't care if catalog_xmin is set on a slot but
wal_level is < logical and will happily restore a logical slot, or a
physical slot with a catalog_xmin. So we can't actually assume that
procArray->replication_slot_catalog_xmin will be 0 if we're not
writing new logical WAL. This isn't a big deal, it just means we can't
short-circuit UpdateOldestCatalogXmin() calls if
!XLogLogicalInfoActive(). It also means the XLogReportParameters()
stuff can be removed since we don't care about wal_level for tracking
oldestCatalogXmin.

Fixed in updated patch.

I'm now reading over Andres's review.

-- 
 Craig Ringer   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-03-20 Thread Andrew Dunstan



On 03/10/2017 11:13 AM, Dmitry Dolgov wrote:
> > On 28 February 2017 at 19:21, Oleg Bartunov  > wrote:
> > 1. add json support
>
> I've added json support for all functions.
>
> >  Its_headline  should returns the original json with highlighting
>
> Yes, I see now. I don't think it's worth it to add a special option
> for that
> purpose, so I've just changed the implementation to return the
> original json(b).
>


This is a pretty good idea.

However, I think it should probably be broken up into a couple of pieces
- one for the generic json/jsonb transforms infrastructure (which
probably needs some more comments) and one for the FTS functions that
will use it.

cheers

andrew

-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] PATCH: Make pg_stop_backup() archive wait optional

2017-03-20 Thread Tsunakawa, Takayuki

From: David Steele [mailto:da...@pgmasters.net]
> Well, that's embarrassing.  When I recreated the function to add defaults
> I messed up the AS clause and did not pay attention to the results of the
> regression tests, apparently.
> 
> Attached is a new version rebased on 88e66d1.  Catalog version bump has
> also been omitted.

I was late to reply because yesterday was a national holiday in Japan.  I 
marked this as ready for committer.  The patch applied cleanly and worked as 
expected.

Regards
Takayuki Tsunakawa


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Our feature change policy

2017-03-20 Thread Robert Haas

On Mon, Mar 20, 2017 at 10:35 AM, Bruce Momjian  wrote:
> We keep re-litigating changes, either with pg_xlog, binaries, or
> pg_stat_activity, and at some point we need to settle on a
> policy.  The usual "change" options are:
>
> 1.  make the change now and mention it in the release notes
> 2.  #1, but also provide backward compatibility for 5+ years
> 3.  mark the feature as deprecated and remove/change it in 5+ years
> 4.  #3, but issue a warning for deprecated usage
>
> What causes us to choose different outcomes?

Well, sometimes backward compatibility is more possible than at other
times.  With standard_confirming_strings, we made it a GUC, but that
doesn't work with renaming a column in pg_stat_activity.  Also,
there's a big difference in my mind between changes that affect DBAs
and changes that affect user queries.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Logical decoding on standby

2017-03-20 Thread Craig Ringer

.On 20 March 2017 at 17:33, Andres Freund  wrote:
> Hi,
>
> Have you checked how high the overhead of XLogReadDetermineTimeline is?
> A non-local function call, especially into a different translation-unit
> (no partial inlining), for every single page might end up being
> noticeable.  That's fine in the cases it actually adds functionality,
> but for a master streaming out data, that's not actually adding
> anything.

I don't anticipate any significant effect given the large amount of
indirection via decoding, reorder buffer, snapshot builder, output
plugin, etc that we already do and how much memory allocation gets
done ... but it's worth checking. I could always move the fast path
into a macro or inline function if it does turn out to make a
detectable difference.

One of the things I want to get to is refactoring all the xlog page
reading stuff into a single place, shared between walsender and normal
backends, to get rid of this confusing mess we currently have. The
only necessary difference is how we wait for new WAL, the rest should
be as common as possible allowing for xlogreader's needs. I
particularly want to get rid of the two identically named static
XLogRead functions. But all that probably means making timeline.c
FRONTEND friendly and it's way too intrusive to contemplate at this
stage.

> Did you check whether you changes to read_local_xlog_page could cause
> issues with twophase.c? Because that now also uses it.

Thanks, that's a helpful point. The commit in question is 978b2f65. I
didn't notice that it introduced XLogReader use in twophase.c, though
I should've realised given the discussion about fetching recent 2pc
info from xlog. I don't see any potential for issues at first glance,
but I'll go over it in more detail. The main concern is validity of
ThisTimeLineID, but since it doesn't run in recovery I don't see much
of a problem there. That also means it can afford to use the current
timeline-oblivious read_local_xlog_page safely.

TAP tests for 2pc were added by 3082098. I'll check to make sure they
have appropriate coverage for this.

> Did you check whether ThisTimeLineID is actually always valid in the
> processes logical decoding could run in?  IIRC it's not consistently
> update during recovery in any process but the startup process.

I share your concerns that it may not be well enough maintained.
Thankyou for the reminder, that's been on my TODO and got lost when I
had to task-hop to other priorities.

I have some TAP tests to validate promotion that need finishing off.
My main concern is around live promotions, both promotion of standby
to master, and promotion of upstream master when streaming from a
cascaded replica.

[Will cover review of 0003 separately, next]

> 0002 should be doable as a whole this release, I have severe doubts that
> 0003 as a whole has a chance for 10 - the code is in quite a raw shape,
> there's a significant number of open ends.  I'd suggest breaking of bits
> that are independently useful, and work on getting those committed.

That would be my preference too.

I do not actually feel strongly about the need for logical decoding on
standby, and would in many ways prefer to defer it until we have
two-way hot standby feedback and the ability to have the master
confirm the actual catalog_xmin locked in to eliminate the current
race and ugly workaround for it. I'd rather have solid timeline
following in place now and bare-minimum failover capability.

I'm confident that the ability for xlogreader to follow timeline
switches will also be independently useful.

The parts I think are important for Pg10 are:

* Teach xlogreader to follow timeline switches
* Ability to create logical slots on replicas
* Ability to advance (via feedback or via SQL function) - no need to
actually decode and call output plugins at all.
* Ability to drop logical slots on replicas

That would be enough to provide minimal standby promotion without hideous hacks.

Unfortunately, slot creation on standby is probably the ugliest part
of the patch. It can be considerably simplified by imposing the rule
that the application must ensure catalog_xmin on the master is already
held down (with a replication slot) before creating a slot on the
standby, and it's the application's job to send feedback to the master
before any standbys it's keeping slots on. If the app fails to do so,
the slot on the downstream will become unusable and attempts to decode
changes from it will fail with conflict with recovery.

That'd get rid of a lot of the code including some of the ugliest
bits, since we'd no longer make any special effort with catalog_xmin
during slot creation. We're already pushing complexity onto apps for
this, after concluding that the transparent failover slots approach
wasn't the way forward, so I'm OK with that. Let the apps that want
logical decoding to support physical replica promotion pay most of the
cost.

I'd then like to revisit full decoding on standby later,

Re: [HACKERS] Radix tree for character conversion

2017-03-20 Thread Kyotaro HORIGUCHI

Hello,

At Fri, 17 Mar 2017 13:03:35 +0200, Heikki Linnakangas  wrote 
in <01efd334-b839-0450-1b63-f2dea9326...@iki.fi>
> On 03/17/2017 07:19 AM, Kyotaro HORIGUCHI wrote:
> > I would like to use convert() function. It can be a large
> > PL/PgSQL function or a series of "SELECT convert(...)"s. The
> > latter is doable on-the-fly (by not generating/storing the whole
> > script).
> >
> > | -- Test for SJIS->UTF-8 conversion
> > | ...
> > | SELECT convert('\', 'SJIS', 'UTF-8'); -- results in error
> > | ...
> > | SELECT convert('\897e', 'SJIS', 'UTF-8');
> 
> Makes sense.
> 
> >> You could then run those SQL statements against old and new server
> >> version, and verify that you get the same results.
> >
> > Including the result files in the repository will make this easy
> > but unacceptably bloats. Put mb/Unicode/README.sanity_check?
> 
> Yeah, a README with instructions on how to do sounds good. No need to
> include the results in the repository, you can run the script against
> an older version when you need something to compare with.

Ok, I'll write a small script to generate a set of "conversion
dump" and try to write README.sanity_check describing how to use
it.

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] comments in hash_alloc_buckets

2017-03-20 Thread Ashutosh Sharma

Hi,

While working on - [1], I realised that the following comments in
_hash_alloc_buckets() needs to be corrected.

/*
 * Initialize the freed overflow page.  Just zeroing the page won't work,
 * See _hash_freeovflpage for similar usage.
 */
_hash_pageinit(page, BLCKSZ);

Attached is the patch that corrects above comment. Thanks.

[1] - 
https://www.postgresql.org/message-id/CAMkU%3D1y6NjKmqbJ8wLMhr%3DF74WzcMALYWcVFhEpm7i%3DmV%3DXsOg%40mail.gmail.com

--
With Regards,
Ashutosh Sharma
EnterpriseDB:http://www.enterprisedb.com


corrected_comments_hash_alloc_buckets.patch
Description: application/download

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pageinspect and hash indexes

2017-03-20 Thread Ashutosh Sharma

On Mon, Mar 20, 2017 at 6:53 PM, Ashutosh Sharma  wrote:
> On Mon, Mar 20, 2017 at 9:31 AM, Amit Kapila  wrote:
>> On Sat, Mar 18, 2017 at 5:13 PM, Ashutosh Sharma  
>> wrote:
>>> On Sat, Mar 18, 2017 at 1:34 PM, Amit Kapila  
>>> wrote:
 On Sat, Mar 18, 2017 at 12:12 AM, Ashutosh Sharma  
 wrote:
> On Fri, Mar 17, 2017 at 10:54 PM, Jeff Janes  wrote:
>> While trying to figure out some bloating in the newly logged hash 
>> indexes,
>> I'm looking into the type of each page in the index.  But I get an error:
>>
>> psql -p 9876 -c "select hash_page_type(get_raw_page('foo_index_idx',x)) 
>> from
>> generate_series(1650,1650) f(x)"
>>
>> ERROR:  page is not a hash page
>> DETAIL:  Expected ff80, got .
>>
>> The contents of the page are:
>>
>> \xa400d8f203bf65c91800f01ff01f0420...
>>
>> (where the elided characters at the end are all zero)
>>
>> What kind of page is that actually?
>
> it is basically either a newly allocated bucket page or a freed overflow 
> page.
>

 What makes you think that it can be a newly allocated page?
 Basically, we always initialize the special space of newly allocated
 page, so not sure what makes you deduce that it can be newly allocated
 page.
>>>
>>> I came to know this from the following experiment.
>>>
>>> I  created a hash index and kept on inserting data in it till the split 
>>> happens.
>>>
>>> When split happened, I could see following values for firstblock and
>>> lastblock in _hash_alloc_buckets()
>>>
>>> Breakpoint 1, _hash_alloc_buckets (rel=0x7f6ac951ee30, firstblock=34,
>>> nblocks=32) at hashpage.c:993
>>> (gdb) n
>>> (gdb) pfirstblock
>>> $15 = 34
>>> (gdb) pnblocks
>>> $16 = 32
>>> (gdb) n
>>> (gdb) plastblock
>>> $17 = 65
>>>
>>> AFAIU, this bucket split resulted in creation of new bucket pages from
>>> block number 34 to 65.
>>>
>>> The contents for metap are as follows,
>>>
>>> (gdb) p*metap
>>> $18 = {hashm_magic = 105121344,hashm_version = 2, hashm_ntuples =
>>> 2593, hashm_ffactor = 81, hashm_bsize = 8152, hashm_bmsize = 4096,
>>> hashm_bmshift = 15,
>>>   hashm_maxbucket = 32,hashm_highmask = 63, hashm_lowmask = 31,
>>> hashm_ovflpoint = 6, hashm_firstfree = 0, hashm_nmaps = 1,
>>> hashm_procid = 450,
>>>   hashm_spares = {0, 0,0, 0, 0, 1, 1, 0 },
>>> hashm_mapp = {33,0 }}
>>>
>>> Now, if i try to check the page type for block number 65, this is what i 
>>> see,
>>>
>>> test=# select * from hash_page_type(get_raw_page('con_hash_index', 65));
>>> ERROR:  page is not a hash page
>>> DETAIL:  Expected ff80, got .
>>> test=#
>>>
>>
>> The contents of such a page should be zero and Jeff has reported some
>> valid-looking contents of the page.  If you see this page contents as
>> zero, then we can conclude what Jeff is seeing was an freed overflow
>> page.
>
> As shown in the mail thread-[1], the contents of metapage are,
>
> (gdb) p*metap
> $18 = {hashm_magic = 105121344,hashm_version = 2, hashm_ntuples
> =2593, hashm_ffactor = 81, hashm_bsize = 8152, hashm_bmsize = 4096,
> hashm_bmshift = 15, hashm_maxbucket = 32,hashm_highmask = 63,
> hashm_lowmask = 31, hashm_ovflpoint = 6, hashm_firstfree = 0,
> hashm_nmaps = 1,
> hashm_procid = 450, hashm_spares = {0, 0,0, 0, 0, 1, 1, 0  25 times>}, hashm_mapp = {33,0 }}
>
> postgres=# select spares from
> hash_metapage_info(get_raw_page('con_hash_index', 0));
>   spares
> ---
>  {0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
> (1 row)
>
> Here, if you see the spare page count  is just 1 which corresponds to
> bitmap page. So, that means there is no overflow page in hash index
> table and neither I have ran any DELETE statements in my experiment
> that would result in freeing an overflow page.
>
> Also, the page header content for such a page is,
>
> $9 = {pd_lsn = {xlogid = 0, xrecoff = 23638120}, pd_checksum = 0,
> pd_flags = 0,pd_lower = 24, pd_upper = 8176,pd_special = 8176,
>   pd_pagesize_version = 8196, pd_prune_xid = 0,pd_linp = 0x1f3aa88}
>
> From values of pd_lower and pd_upper it is clear that it is an empty page.
>
> The content of this page is,
>
> \xb0308a011800f01ff01f042000.
>
> I think it is not just happening for freed overflow but also for newly
> allocated bucket page. It would be good if we could mark  freed
> overflow page as UNUSED page rather than just initialising it's header
> portion and leaving the page type in special area as it is. Attached
> is the patch '0001-mark_freed_ovflpage_as_UNUSED_pagetype.patch' that
> marks a freed overflow page as an unused page.
>
> Also, I have now changed pageinspect module to

Re: [HACKERS] Logical replication existing data copy

2017-03-20 Thread Peter Eisentraut

The current patch causes a failure in the pg_dump tests, because the
generated CREATE SUBSCRIPTION commands make connection attempts that
don't work.  We have the pg_dump option --no-create-subscription-slots
for this, but I suppose we should expand that to
--no-subscription-connect and use the new NOCONNECT option instead.

I'm a bit puzzled at the moment how it worked before, because the
pg_dump test suite does not use that option.  But some change like that
is probably useful anyway.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] increasing the default WAL segment size

2017-03-20 Thread Robert Haas

On Mon, Mar 20, 2017 at 7:23 PM, David Steele  wrote:
> With 16MB WAL segments the filename neatly aligns with the LSN.  For
> example:
>
> WAL FILE 0001000100FE = LSN 1/FE00
>
> This no longer holds true with this patch.

It is already possible to change the WAL segment size using the
configure option --with-wal-segsize, and I think the patch should be
consistent with whatever that existing option does.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Craig Ringer

On 20 March 2017 at 22:39, Alvaro Herrera  wrote:
> Stephen Frost wrote:
>
>> Is there any hope of getting a "quiet" mode, where all the "ok" lines
>> aren't printed when things work..?
>
> Well, we currently have --verbose in PROVE_FLAGS.  Maybe you can take it
> out, or even add --quiet or --QUIET (see the prove(1) manpage).

If so, please preserve the current behaviour via something like

$(or $(PROVE_VERBOSITY),'--quiet')

since it's very useful to have more output when running individual
tests interactively.



-- 
 Craig Ringer   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Create replication slot in pg_basebackup if requested and not yet present

2017-03-20 Thread Michael Paquier

On Tue, Mar 21, 2017 at 1:32 AM, Michael Banck
 wrote:
> On Mon, Mar 20, 2017 at 02:42:32PM +0300, Arthur Zakirov wrote:
>> Also maybe it would be good if pg_basebackup had a way to drop created slot.
>> Although "drop slot" is not related with concept of automatically created
>> slots, it will good if user will have a way to drop slots.
>
> If you want to drop the slot after basebackup finishes you'd just use a
> temporary slot (i.e. the default), or am I understanding your use-case
> incorrectly?

Yup, agreed.

> I assumed the primary use-case for creating a non-temporary slot is to
> keep it around while the standby is active.

Indeed.

 /*
+ * Try to create a permanent replication slot if one is specified. Do
+ * not error out if the slot already exists, other errors are already
+ * reported by CreateReplicationSlot().
+ */
+if (!stream->temp_slot && stream->replication_slot)
+{
+if (!CreateReplicationSlot(conn, stream->replication_slot,
NULL, true, true))
+return false;
+}
This could be part of an else taken from the previous if where
temp_slot is checked.

There should be some TAP tests included. And +1 for sharing the same
behavior as pg_receivewal.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [POC] A better way to expand hash indexes.

2017-03-20 Thread Amit Kapila

On Mon, Mar 20, 2017 at 8:58 PM, Mithun Cy  wrote:
> Hi Amit, Thanks for the review,
>
> On Mon, Mar 20, 2017 at 5:17 PM, Amit Kapila  wrote:
>> idea could be to make hashm_spares a two-dimensional array
>> hashm_spares[32][4] where the first dimension will indicate the split
>> point and second will indicate the sub-split number.  I am not sure
>> whether it will be simpler or complex than the method used in the
>> proposed patch, but I think we should think a bit more to see if we
>> can come up with some simple technique to solve this problem.
>
> I think making it a 2-dimensional array will not be any useful in fact
> we really treat the given array 2-dimensional elements now.
>

Sure, I was telling you based on that.  If you are implicitly treating
it as 2-dimensional array, it might be easier to compute the array
offsets.

> The main concern of yours I think is the calculation steps to find the
> phase of the splitpoint group the bucket belongs to.
> + tbuckets = (1 << (splitpoint_group + 2));
> + phases_beyond_bucket =
> + (tbuckets - num_bucket) / (1 << (splitpoint_group - 1));
> + return (((splitpoint_group + 1) << 2) - phases_beyond_bucket) - 1;
>

It is not only about above calculation, but also what the patch is
doing in function _hash_get_tbuckets().  By the way function name also
seems unclear (mainly *tbuckets* in the name).

-- 
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [POC] A better way to expand hash indexes.

2017-03-20 Thread Amit Kapila

On Tue, Mar 21, 2017 at 8:16 AM, Amit Kapila  wrote:
> On Mon, Mar 20, 2017 at 8:58 PM, Mithun Cy  wrote:
>> Hi Amit, Thanks for the review,
>>
>> On Mon, Mar 20, 2017 at 5:17 PM, Amit Kapila  wrote:
>>> idea could be to make hashm_spares a two-dimensional array
>>> hashm_spares[32][4] where the first dimension will indicate the split
>>> point and second will indicate the sub-split number.  I am not sure
>>> whether it will be simpler or complex than the method used in the
>>> proposed patch, but I think we should think a bit more to see if we
>>> can come up with some simple technique to solve this problem.
>>
>> I think making it a 2-dimensional array will not be any useful in fact
>> we really treat the given array 2-dimensional elements now.
>>
>
> Sure, I was telling you based on that.  If you are implicitly treating
> it as 2-dimensional array, it might be easier to compute the array
> offsets.
>

The above sentence looks incomplete.
If you are implicitly treating it as a 2-dimensional array, it might
be easier to compute the array offsets if you explicitly also treats
as a 2-dimensional array.

-- 
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] asynchronous execution

2017-03-20 Thread Kyotaro HORIGUCHI

Hello. This is the final report in this CF period.

At Fri, 17 Mar 2017 17:35:05 +0900 (Tokyo Standard Time), Kyotaro HORIGUCHI 
 wrote in 
<20170317.173505.152063931.horiguchi.kyot...@lab.ntt.co.jp>
> Async-capable plan is generated in planner. An Append contains at
> least one async-capable child becomes async-aware Append. So the
> async feature should be effective also for the UNION ALL case.
> 
> The following will work faster than unpatched version.I 
> 
> SELECT sum(a) FROM (SELECT a FROM ft10 UNION ALL SELECT a FROM ft20 UNION ALL 
> SELECT a FROM ft30 UNION ALL SELECT a FROM ft40) as ft;
> 
> I'll measure the performance for the case next week.

I found that the following query works as the same as partitioned
table.

SELECT sum(a) FROM (SELECT a FROM ft10 UNION ALL SELECT a FROM ft20 UNION ALL 
SELECT a FROM ft30 UNION ALL SELECT a FROM ft40 UNION ALL *SELECT a FROM ONLY 
pf0*) as ft;

So, the difference comes from the additional async-uncapable
child (faster if contains any). In both cases, Append node runs
children asynchronously but slightly differently when all
async-capable children are busy.

I'll continue working on this from this point aiming to the next
commit fest.

Thank you for valuable feedback.

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] Freeze on Cygwin w/ concurrency

2017-03-20 Thread Noah Misch

"pgbench -i -s 50; pgbench -S -j2 -c16 -T900 -P5" freezes consistently on
Cygwin 2.2.1 and Cygwin 2.6.0.  (I suspect most other versions are affected.)
I've pinged[1] the Cygwin bug thread with some additional detail.  If a Cygwin
buildfarm member starts using --enable-tap-tests, you may see failures in the
pgbench test suite.  (lorikeet used --enable-tap-tests from 2017-03-18 to
2017-03-20, but it failed before reaching the pgbench test suite.)  Curious
that "make check" has too little concurrency to see more effects from this.


Frozen backends show a stack trace like this:

#0  0x7710139a in ntdll!ZwWriteFile () from 
/cygdrive/c/Windows/SYSTEM32/ntdll.dll
#1  0x07fefd851b2b in WriteFile () from 
/cygdrive/c/Windows/system32/KERNELBASE.dll
#2  0x76fb3576 in WriteFile () from 
/cygdrive/c/Windows/system32/kernel32.dll
#3  0x000180160c6c in transport_layer_pipes::write (this=, 
buf=, len=)
at /usr/src/debug/cygwin-2.6.0-1/winsup/cygserver/transport_pipes.cc:224
#4  0x00018015feb6 in client_request::send (this=0xa930, 
conn=0x6000e8290) at 
/usr/src/debug/cygwin-2.6.0-1/winsup/cygserver/client.cc:134
#5  0x000180160591 in client_request::make_request 
(this=this@entry=0xa930) at 
/usr/src/debug/cygwin-2.6.0-1/winsup/cygserver/client.cc:473
#6  0x000180114f79 in semop (semid=65540, sops=0xaa00, nsops=1) at 
/usr/src/debug/cygwin-2.6.0-1/winsup/cygwin/sem.cc:125
#7  0x000180117a4b in _sigfe () at sigfe.s:35
#8  0x00010063c81a in PGSemaphoreLock (sema=sema@entry=0x6e06a18) at 
pg_sema.c:387
#9  0x0001006a962b in LWLockAcquire (lock=lock@entry=0x6fff6774d80, 
mode=mode@entry=LW_SHARED) at lwlock.c:1286
#10 0x000100687d46 in BufferAlloc (foundPtr=0xab0b , strategy=0x0, blockNum=290, forkNum=MAIN_FORKNUM, 
relpersistence=112 'p', smgr=0x6000ea588) at bufmgr.c:1012

The postmaster, also frozen, shows a stack trace like this:

#0  0x771018ca in ntdll!ZwWaitForMultipleObjects () from 
/cygdrive/c/Windows/SYSTEM32/ntdll.dll
#1  0x07fefd851420 in KERNELBASE!GetCurrentProcess () from 
/cygdrive/c/Windows/system32/KERNELBASE.dll
#2  0x76fa1220 in WaitForMultipleObjects () from 
/cygdrive/c/Windows/system32/kernel32.dll
#3  0x000180120173 in child_info::sync (this=this@entry=0xc008, 
pid=4692, hProcess=@0xc1b0: 0x4b8, howlong=howlong@entry=30)
at /usr/src/debug/cygwin-2.6.0-1/winsup/cygwin/sigproc.cc:1010
#4  0x0001800aa163 in frok::parent (this=0xc000, stack_here=0xbfa0 
"") at /usr/src/debug/cygwin-2.6.0-1/winsup/cygwin/fork.cc:501
#5  0x0001800aaa05 in fork () at 
/usr/src/debug/cygwin-2.6.0-1/winsup/cygwin/fork.cc:607
#6  0x000180117a4b in _sigfe () at sigfe.s:35
#7  0x000100641618 in fork_process () at fork_process.c:61
#8  0x00010063e80a in StartAutoVacWorker () at autovacuum.c:1436

The postmaster log eventually has:

 28 [main] postgres 4408 child_info::sync: wait failed, pid 4692, Win32 
error 183
292 [main] postgres 4408 fork: child 4692 - died waiting for dll loading, 
errno 11


[1] https://cygwin.com/ml/cygwin/2017-03/msg00218.html


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Logical decoding on standby

2017-03-20 Thread Craig Ringer

On 20 March 2017 at 17:33, Andres Freund  wrote:

>> Subject: [PATCH 2/3] Follow timeline switches in logical decoding
>
> FWIW, the title doesn't really seem accurate to me.

Yeah, it's not really at the logical decoding layer at all.

"Teach xlogreader to follow timeline switches" ?

>> Logical slots cannot actually be created on a replica without use of
>> the low-level C slot management APIs so this is mostly foundation work
>> for subsequent changes to enable logical decoding on standbys.
>
> Everytime I read references to anything like this my blood starts to
> boil.  I kind of regret not having plastered RecoveryInProgress() errors
> all over this code.

In fairness, I've been trying for multiple releases to get a "right"
way in. I have no intention of using such hacks, and only ever did so
for testing xlogreader timeline following without full logical
decoding on standby being available.

>> From 8854d44e2227b9d076b0a25a9c8b9df9270b2433 Mon Sep 17 00:00:00 2001
>> From: Craig Ringer 
>> Date: Mon, 5 Sep 2016 15:30:53 +0800
>> Subject: [PATCH 3/3] Logical decoding on standby
>>
>> * Make walsender aware of ProcSignal and recovery conflicts, make walsender
>>   exit with recovery conflict on upstream drop database when it has an active
>>   logical slot on that database.
>> * Allow GetOldestXmin to omit catalog_xmin, be called already locked.
>
> "be called already locked"?

To be called with ProcArrayLock already held. But that's actually
outdated due to changes Petr requested earlier, thanks for noticing.

>> * Send catalog_xmin separately in hot_standby_feedback messages.
>> * Store catalog_xmin separately on a physical slot if received in 
>> hot_standby_feedback
>
> What does separate mean?

Currently, hot standby feedback sends effectively the
min(catalog_xmin, xmin) to the upstream, which in turn records that
either in the PGPROC entry or, if there's a slot in use, in the xmin
field on the slot.

So catalog_xmin on the standby gets promoted to xmin on the master's
physical slot. Lots of unnecessary bloat results.

This splits it up, so we can send catalog_xmin separately on the wire,
and store it on the physical replication slot as catalog_xmin, not
xmin.

>> * Separate the catalog_xmin used by vacuum from ProcArray's 
>> replication_slot_catalog_xmin,
>>   requiring that xlog be emitted before vacuum can remove no longer needed 
>> catalogs, store
>>   it in checkpoints, make vacuum and bgwriter advance it.
>
> I can't parse that sentence.

We now write an xlog record before allowing the catalog_xmin in
ProcArray replication_slot_catalog_xmin to advance and allow catalog
tuples to be removed. This is achieved by making vacuum use a separate
field in ShmemVariableCache, oldestCatalogXmin. When vacuum looks up
the new xmin from GetOldestXmin, it copies
ProcArray.replication_slot_catalog_xmin to
ShmemVariableCache.oldestCatalogXmin, writing an xlog record to ensure
we remember the new value and ensure standbys know about it.

This provides a guarantee to standbys that all catalog tuples >=
ShmemVariableCache.oldestCatalogXmin are protected from vacuum and
lets them discover when that threshold advances.

The reason we cannot use the xid field in existing vacuum xlog records
is that the downstream has no way to know if the xact affected
catalogs and therefore whether it should advance its idea of
catalog_xmin or not. It can't get a Relation for the affected
relfilenode because it can't use the relcache during redo. We'd have
to add a flag to every vacuum record indicating whether it affected
catalogs, which is not fun, and vacuum might not always even know. And
the standby would still need a way to keep track of the oldest valid
catalog_xmin across restart without the ability to write it to
checkpoints.

It's a lot simpler and cheaper to have the master do it.

>> * Add a new recovery conflict type for conflict with catalog_xmin. Abort
>>   in-progress logical decoding sessions with conflict with recovery where 
>> needed
>>   catalog_xmin is too old
>
> Are we retaining WAL for slots broken in that way?

Yes, until the slot is dropped.

If I added a persistent flag on the slot to indicate that the slot is
invalid, then we could ignore it for purposes of WAL retention. It
seemed unnecessary at this stage.

>> * Make extra efforts to reserve master's catalog_xmin during decoding startup
>>   on standby.
>
> What does that mean?

WaitForMasterCatalogXminReservation(...)

I don't like it. At all. I'd rather have hot standby feedback replies
so we can know for sure when the master has locked in our feedback.
It's my most disliked part of this patch.

>> * Remove checks preventing starting logical decoding on standby
>
> To me that's too many different things in one commit.  A bunch of them
> seem like it'd be good if they'd get independent buildfarm cycles too.

I agree with you. I had them all separate before and was told that
there were too many patches.

Re: [HACKERS] logical replication access control patches

2017-03-20 Thread Petr Jelinek

On 20/03/17 13:32, Peter Eisentraut wrote:
> On 3/18/17 09:31, Petr Jelinek wrote:
>>> 0003 Add USAGE privilege for publications
>>>
>>> a way to control who can subscribe to a publication
>>>
>> Hmm IIUC this removes ability of REPLICATION role to subscribe to
>> publications. I am not quite sure I like that.
> 
> Well, this is kind of the way with all privileges.  They take away
> abilities by default so you can assign them in a more fine-grained manner.
> 
> You can still connect as superuser and do anything you want, if you want
> a "quick start" setup.
> 
> Right now, any replication user connecting can use any publication.
> There is no way to distinguish different table groupings or different
> use cases, such as partial replication of some tables that should go
> over here, or archiving of some other tables that should go over there.
> That's not optimal.
> 

Hmm but REPLICATION role can do basebackup/consume wal, so how does
giving it limited publication access help? Wouldn't we need some
SUBSCRIPTION role/grant used instead for logical replication connections
instead of REPLICATION for this to make sense?

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Create replication slot in pg_basebackup if requested and not yet present

2017-03-20 Thread Arthur Zakirov


Hello,

On 19.03.2017 21:45, Michael Banck wrote:


So the patch I sent earlier creates the slot in ReceiveXlogStream() in
receivewal.c, as that's where the temp slot gets created as well, but
now I wonder whether that is maybe not the best place, as pg_receivewal
also calls that function. The other problem with receivewal.c is that
`verbose' isn't around in it so I don't how I'd print out a message
there.

So probably it is better to create the slot in pg_basebackup.c's
StartLogStreamer(), see the attached first patch, that one also adds
a verbose message.


I think such too. I suppose it is more clearly. StartLogStreamer() is 
better place for creating permanent and temporary slots.


Also maybe it would be good if pg_basebackup had a way to drop created 
slot. Although "drop slot" is not related with concept of automatically 
created slots, it will good if user will have a way to drop slots.


--
Arthur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Parallel Append implementation

2017-03-20 Thread Amit Khandekar

>> 2. Next, estimate the cost of the non-partial paths.  To do this, make
>> an array of Cost of that length and initialize all the elements to
>> zero, then add the total cost of each non-partial plan in turn to the
>> element of the array with the smallest cost, and then take the maximum
>> of the array elements as the total cost of the non-partial plans.  Add
>> this to the result from step 1 to get the total cost.
>
> So with costs (8, 5, 2), add 8 and 5 to 2 so that it becomes (8, 5,
> 15) , and so the max is 15 ? I surely am misinterpreting this.
>
> Actually, I couldn't come up with a general formula to find the
> non-partial paths total cost, given the per-subplan cost and number of
> workers. I mean, we can manually find out the total cost, but turning
> it into a formula seems quite involved. We can even do a dry-run of
> workers consuming each of the subplan slots and find the total time
> time units taken, but finding some approximation seemed ok.
>
> For e.g. we can manually find total time units taken for following :
> costs (8, 2, 2, 2) with 2 workers : 8
> costs (6, 6, 4, 1) with 2 workers : 10.
> costs (6, 6, 4, 1) with 3 workers : 6.
>
> But coming up with an alogrithm or a formula didn't look worth. So I
> just did the total cost and divided it by workers. And besides that,
> took the maximum of the 1st plan cost (since it is the highest) and
> the average of total. I understand it would be too much approximation
> for some cases, but another thing is, we don't know how to take into
> account some of the workers shifting to partial workers. So the shift
> may be quite fuzzy since all workers may not shift to partial plans
> together.


For non-partial paths, I did some comparison between the actual cost
and the cost taken by adding the per-subpath figures and dividing by
number of workers. And in the below cases, they do not differ
significantly. Here are the figures :

Case 1 :
Cost units of subpaths : 20 16 10 8 3 1.
Workers : 3
Actual total time to finish all workers : 20.
total/workers: 16.

Case 2 :
Cost units of subpaths : 20 16 10 8 3 1.
Workers : 2
Actual total time to finish all workers : 34.
total/workers: 32.

Case 3 :
Cost units of subpaths : 5 3 3 3 3
Workers : 3
Actual total time to finish all workers : 6
total/workers: 5.6

One more thing observed, is , in all of the above cases, all the
workers more or less finish at about the same time.

So this method seem to compare good which actual cost. The average
comes out a little less than the actual. But I think in the patch,
what I need to correct is, calculate separate per-worker costs of
non-partial and partial costs, and add them. This will give us
per-worker total cost, which is what a partial Append cost will be. I
just added all costs together.

There can be some extreme cases such as (5, 1, 1, 1, 1, 1) with 6
workers, where it will take at least 5 units, but average is 2. For
that we can clamp up the cost to the first path cost, so that for e.g.
it does not go lesser than 5 in this case.

Actually I have deviced one algorithm to calculate the exact time when
all workers finish non-partial costs. But I think it does not make
sense to apply it because it may be too much of calculation cost for
hundreds of paths.

But anyways, for archival purpose, here is the algorithm :

Per-subpath cost : 20 16 10 8 3 1, with 3 workers.
After 10 units (this is minimum of 20, 16, 10), the times remaining are :
10  6  0 8 3 1
After 6 units (minimum of 10, 06, 08), the times remaining are :
4  0  0 2 3 1
After 2 units (minimum of 4, 2, 3), the times remaining are :
 2  0  0 0 1 1
After 1 units (minimum of 2, 1, 1), the times remaining are :
 1  0  0 0 0 0
After 1 units (minimum of 1, 0 , 0), the times remaining are :
 0  0  0 0 0 0
Now add up above time chunks : 10 + 6 + 2 + 1 + 1 = 20

-- 
Thanks,
-Amit Khandekar
EnterpriseDB Corporation
The Postgres Database Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [POC] A better way to expand hash indexes.

2017-03-20 Thread Amit Kapila

On Sat, Mar 18, 2017 at 10:59 PM, Mithun Cy  wrote:
> On Thu, Mar 16, 2017 at 10:55 PM, David Steele  wrote:
>> It does apply with fuzz on 2b32ac2, so it looks like c11453c and
>> subsequent commits are the cause.  They represent a fairly substantial
>> change to hash indexes by introducing WAL logging so I think you should
>> reevaluate your patches to be sure they still function as expected.
>
> Thanks, David here is the new improved patch I have also corrected the
> pageinspect's test output. Also, added notes in README regarding the
> new way of adding bucket pages efficiently in hash index. I also did
> some more tests pgbench read only and read write;
>

To make this work, I think the calculations you have introduced are
not so easy to understand.  For example, it took me quite some time to
understand how the below function works to compute the location in
hash spares.

+uint32
+_hash_spareindex(uint32 num_bucket)
+{
..
+ /*
+ * The first 4 bucket belongs to corresponding first 4 splitpoint phases.
+ */
+ if (num_bucket <= 4)
+ return (num_bucket - 1); /* converted to base 0. */
+ splitpoint_group = _hash_log2(num_bucket) - 2; /* The are 4 buckets in
..
+ /*
+ * bucket's global splitpoint phase = total number of split point phases
+ * until its splitpoint group - splitpoint phase within this splitpoint
+ * group but after buckets own splitpoint phase.
+ */
+ tbuckets = (1 << (splitpoint_group + 2));
+ phases_beyond_bucket =
+ (tbuckets - num_bucket) / (1 << (splitpoint_group - 1));
+ return (((splitpoint_group + 1) << 2) - phases_beyond_bucket) - 1;
+}

I am not sure if it is just a matter of better comments to explain it
in a simple way or maybe we can try to find some simpler mechanism to
group the split into four (or more) equal parts.  I think if someone
else can read and share their opinion it could be helpful.  Another
idea could be to make hashm_spares a two-dimensional array
hashm_spares[32][4] where the first dimension will indicate the split
point and second will indicate the sub-split number.  I am not sure
whether it will be simpler or complex than the method used in the
proposed patch, but I think we should think a bit more to see if we
can come up with some simple technique to solve this problem.

+ * allocate them at once. Each splitpoint group will have 4 slots, we
+ * distribute the buckets equally among them. So we allocate only one
+ * forth of total buckets in new splitpoint group at time to consume
+ * one phase after another.

spelling.
/forth/fourth
/at time/at a time

-- 
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] logical decoding of two-phase transactions

2017-03-20 Thread Craig Ringer

> I thought about having special field (or reusing one of the existing fields)
> in snapshot struct to force filtering xmax > snap->xmax or xmin = snap->xmin
> as Petr suggested. Then this logic can reside in ReorderBufferCommit().
> However this is not solving problem with catcache, so I'm looking into it 
> right now.

OK, so this is only an issue if we have xacts that change the schema
of tables and also insert/update/delete to their heaps. Right?

So, given that this is CF3 for Pg10, should we take a step back and
impose the limitation that we can decode 2PC with schema changes or
data row changes, but not both?

Applications can record DDL in transactional logical WAL messages for
decoding during 2pc processing. Or apps can do 2pc for DML. They just
can't do both at the same time, in the same xact.

Imperfect, but a lot less invasive. And we can even permit apps to use
the locking-based approach I outlined earlier instead:

All we have to do IMO is add an output plugin callback to filter
whether we want to decode a given 2pc xact at PREPARE TRANSACTION time
or defer until COMMIT PREPARED. It could:

* mark the xact for deferred decoding at commit time (the default if
the callback doesn't exist); or

* Acquire a lock on the 2pc xact and request immediate decoding only
if it gets the lock so concurrent ROLLBACK PREPARED is blocked; or

* inspect the reorder buffer contents for row changes and decide
whether to decode now or later based on that.

It has a few downsides - for example, temp tables will be considered
"catalog changes" for now. But .. eh. We already accept a bunch of
practical limitations for catalog changes and DDL in logical decoding,
most notably regarding practical handling of full table rewrites.

> Just as before I marking this transaction committed in snapbuilder, but after
> decoding I delete this transaction from xip (which holds committed 
> transactions
> in case of historic snapshot).

That seems kind of hacky TBH. I didn't much like marking it as
committed then un-committing it.

I think it's mostly an interface issue though. I'd rather say
SnapBuildPushPrepareTransaction and SnapBuildPopPreparedTransaction or
something, to make it clear what we're doing.

-- 
 Craig Ringer   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] logical decoding of two-phase transactions

2017-03-20 Thread Simon Riggs

On 17 March 2017 at 23:59, Robert Haas  wrote:
> On Thu, Mar 16, 2017 at 10:34 PM, Craig Ringer  wrote:
>> On 17 March 2017 at 08:10, Stas Kelvich  wrote:
>>> While working on this i’ve spotted quite a nasty corner case with aborted 
>>> prepared
>>> transaction. I have some not that great ideas how to fix it, but maybe i 
>>> blurred my
>>> view and missed something. So want to ask here at first.
>>>
>>> Suppose we created a table, then in 2pc tx we are altering it and after 
>>> that aborting tx.
>>> So pg_class will have something like this:
>>>
>>> xmin | xmax | relname
>>> 100  | 200| mytable
>>> 200  | 0| mytable
>>>
>>> After previous abort, tuple (100,200,mytable) becomes visible and if we 
>>> will alter table
>>> again then xmax of first tuple will be set current xid, resulting in 
>>> following table:
>>>
>>> xmin | xmax | relname
>>> 100  | 300| mytable
>>> 200  | 0| mytable
>>> 300  | 0| mytable
>>>
>>> In that moment we’ve lost information that first tuple was deleted by our 
>>> prepared tx.
>>
>> Right. And while the prepared xact has aborted, we don't control when
>> it aborts and when those overwrites can start happening. We can and
>> should check if a 2pc xact is aborted before we start decoding it so
>> we can skip decoding it if it's already aborted, but it could be
>> aborted *while* we're decoding it, then have data needed for its
>> snapshot clobbered.
>>
>> This hasn't mattered in the past because prepared xacts (and
>> especially aborted 2pc xacts) have never needed snapshots, we've never
>> needed to do something from the perspective of a prepared xact.
>>
>> I think we'll probably need to lock the 2PC xact so it cannot be
>> aborted or committed while we're decoding it, until we finish decoding
>> it. So we lock it, then check if it's already aborted/already
>> committed/in progress. If it's aborted, treat it like any normal
>> aborted xact. If it's committed, treat it like any normal committed
>> xact. If it's in progress, keep the lock and decode it.
>
> But that lock could need to be held for an unbounded period of time -
> as long as decoding takes to complete - which seems pretty
> undesirable.

This didn't seem to be too much of a problem when I read it.

Sure, the issue noted by Stas exists, but it requires
Alter-Abort-Alter for it to be a problem. Meaning that normal non-DDL
transactions do not have problems. Neither would a real-time system
that uses the decoded data to decide whether to commit or abort the
transaction; in that case there would never be an abort until after
decoding.

So I suggest we have a pre-prepare callback to ensure that the plugin
can decide whether to decode or not. We can pass information to the
plugin such as whether we have issued DDL in that xact or not. The
plugin can then decide how it wishes to handle it, so if somebody
doesn't like the idea of a lock then don't use one. The plugin is
already responsible for many things, so this is nothing new.

-- 
Simon Riggshttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Parallel Append implementation

2017-03-20 Thread Robert Haas

On Fri, Mar 17, 2017 at 1:12 PM, Amit Khandekar  wrote:
>> - The substantive changes in add_paths_to_append_rel don't look right
>> either.  It's not clear why accumulate_partialappend_subpath is
>> getting called even in the non-enable_parallelappend case.  I don't
>> think the logic for the case where we're not generating a parallel
>> append path needs to change at all.
>
> When accumulate_partialappend_subpath() is called for a childrel with
> a partial path, it works just like accumulate_append_subpath() when
> enable_parallelappend is false. That's why, for partial child path,
> the same function is called irrespective of parallel-append or
> non-parallel-append case. May be mentioning this in comments should
> suffice here ?

I don't get it.  If you can get the same effect by changing something
or not changing it, presumably it'd be better to not change it.   We
try not to change things just because we can; the change should be an
improvement in some way.

>> - When parallel append is enabled, I think add_paths_to_append_rel
>> should still consider all the same paths that it does today, plus one
>> extra.  The new path is a parallel append path where each subpath is
>> the cheapest subpath for that childrel, whether partial or
>> non-partial.  If !enable_parallelappend, or if all of the cheapest
>> subpaths are partial, then skip this.  (If all the cheapest subpaths
>> are non-partial, it's still potentially useful.)
>
> In case of all-partial childrels, the paths are *exactly* same as
> those that would have been created for enable_parallelappend=off. The
> extra path is there for enable_parallelappend=on only when one or more
> of the child rels do not have partial paths. Does this make sense ?

No, I don't think so.  Imagine that we have three children, A, B, and
C.  The cheapest partial paths have costs of 10,000 each.  A, however,
has a non-partial path with a cost of 1,000.  Even though A has a
partial path, we still want to consider a parallel append using the
non-partial path because it figures to be hugely faster.

> The
> Path->total_cost for a partial path is *always* per-worker cost, right
> ? Just want to confirm this assumption of mine.

Yes.

>> Also, it
>> could be smarter about what happens with the costs of non-partial
>> paths. I suggest the following algorithm instead.
>>
>> 1. Add up all the costs of the partial paths.  Those contribute
>> directly to the final cost of the Append.  This ignores the fact that
>> the Append may escalate the parallel degree, but I think we should
>> just ignore that problem for now, because we have no real way of
>> knowing what the impact of that is going to be.
>
> I wanted to take into account per-subpath parallel_workers for total
> cost of Append. Suppose the partial subpaths have per worker total
> costs (3, 3, 3) and their parallel_workers are (2, 8, 4), with 2
> Append workers available. So according to what you say, the total cost
> is 9. With per-subplan parallel_workers taken into account, total cost
> = (3*2 + 3*8 * 3*4)/2 = 21.

But that case never happens, because the parallel workers for the
append is always at least as large as the number of workers for any
single child.

> May be I didn't follow exactly what you suggested. Your logic is not
> taking into account number of workers ? I am assuming you are
> calculating per-worker total cost here.
>>
>> 2. Next, estimate the cost of the non-partial paths.  To do this, make
>> an array of Cost of that length and initialize all the elements to
>> zero, then add the total cost of each non-partial plan in turn to the
>> element of the array with the smallest cost, and then take the maximum
>> of the array elements as the total cost of the non-partial plans.  Add
>> this to the result from step 1 to get the total cost.
>
> So with costs (8, 5, 2), add 8 and 5 to 2 so that it becomes (8, 5,
> 15) , and so the max is 15 ? I surely am misinterpreting this.

No.  If you have costs 8, 5, and 2 and only one process, cost is 15.
If you have two processes then for costing purposes you assume worker
1 will execute the first path (cost 8) and worker 2 will execute the
other two (cost 5 + 2 = 7), so the total cost is 8.  If you have three
workers, the cost will still be 8, because there's no way to finish
the cost-8 path in less than 8 units of work.

>> - In get_append_num_workers, instead of the complicated formula with
>> log() and 0.693, just add the list lengths and call fls() on the
>> result.  Integer arithmetic FTW!
>
> Yeah fls() could be used. BTW I just found that costsize.c already has
> this defined in the same way I did:
> #define LOG2(x)  (log(x) / 0.693147180559945)
> May be we need to shift this to some common header file.

LOG2() would make sense if you're working with a value represented as
a double, but if you have an integer input, I think fls() is better.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


--

Re: [HACKERS] logical replication access control patches

2017-03-20 Thread Peter Eisentraut

On 3/18/17 09:31, Petr Jelinek wrote:
>> 0003 Add USAGE privilege for publications
>>
>> a way to control who can subscribe to a publication
>>
> Hmm IIUC this removes ability of REPLICATION role to subscribe to
> publications. I am not quite sure I like that.

Well, this is kind of the way with all privileges.  They take away
abilities by default so you can assign them in a more fine-grained manner.

You can still connect as superuser and do anything you want, if you want
a "quick start" setup.

Right now, any replication user connecting can use any publication.
There is no way to distinguish different table groupings or different
use cases, such as partial replication of some tables that should go
over here, or archiving of some other tables that should go over there.
That's not optimal.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] logical decoding of two-phase transactions

2017-03-20 Thread Stas Kelvich


> On 20 Mar 2017, at 15:17, Craig Ringer  wrote:
> 
>> I thought about having special field (or reusing one of the existing fields)
>> in snapshot struct to force filtering xmax > snap->xmax or xmin = snap->xmin
>> as Petr suggested. Then this logic can reside in ReorderBufferCommit().
>> However this is not solving problem with catcache, so I'm looking into it 
>> right now.
> 
> OK, so this is only an issue if we have xacts that change the schema
> of tables and also insert/update/delete to their heaps. Right?
> 
> So, given that this is CF3 for Pg10, should we take a step back and
> impose the limitation that we can decode 2PC with schema changes or
> data row changes, but not both?

Yep, time is tight. I’ll try today/tomorrow to proceed with this two scan 
approach.
If I’ll fail to do that during this time then I’ll just update this patch to 
decode
only non-ddl 2pc transactions as you suggested.

>> Just as before I marking this transaction committed in snapbuilder, but after
>> decoding I delete this transaction from xip (which holds committed 
>> transactions
>> in case of historic snapshot).
> 
> That seems kind of hacky TBH. I didn't much like marking it as
> committed then un-committing it.
> 
> I think it's mostly an interface issue though. I'd rather say
> SnapBuildPushPrepareTransaction and SnapBuildPopPreparedTransaction or
> something, to make it clear what we're doing.

Yes, that will be less confusing. However there is no any kind of queue, so
SnapBuildStartPrepare / SnapBuildFinishPrepare should work too.

> -- 
> Craig Ringer   http://www.2ndQuadrant.com/
> PostgreSQL Development, 24x7 Support, Training & Services


Stas Kelvich
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company




-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Re: [COMMITTERS] pgsql: Improve pg_dump regression tests and code coverage

2017-03-20 Thread Peter Eisentraut

On 3/20/17 08:33, Stephen Frost wrote:
>> So was this 3340 line patch posted or discussed anyplace before it got
>> committed?
> I've mentioned a few times that I'm working on improving pg_dump
> regression tests and code coverage, which is what these were.  I'm a bit
> surprised that it's, apparently, a surprise to anyone or that strictly
> adding regression tests in the existing framework deserves very much
> discussion.

I think we had this discussion about adding a large number of (pg_dump)
tests without discussion or review already about a year ago, so I for
one am surprised that are still surprised.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [COMMITTERS] pgsql: Improve pg_dump regression tests and code coverage

2017-03-20 Thread Robert Haas

On Mon, Mar 20, 2017 at 8:33 AM, Stephen Frost  wrote:
> * Robert Haas (robertmh...@gmail.com) wrote:
>> So was this 3340 line patch posted or discussed anyplace before it got
>> committed?
>
> I've mentioned a few times that I'm working on improving pg_dump
> regression tests and code coverage, which is what these were.  I'm a bit
> surprised that it's, apparently, a surprise to anyone or that strictly
> adding regression tests in the existing framework deserves very much
> discussion.

I'm not saying it does.  I'm saying that it's polite, and expected, to
post patches and ask for opinions before committing things.

> What I think would be great would be some additional work on our code
> coverage, which is abysmal.  This, at least, gets us up over 80% for
> src/bin/pg_dump, but there's still quite a bit of work to be done there,
> as noted in the commit message, and lots of opportunity for improvement
> throughout the rest of the code base, as https://coverage.postgresql.org
> shows.

Sure, I think that would be great, too.  Following the community
process and improving code coverage are not opposing goals, such that
to support one is to oppose the other.  We need both things.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Re: [COMMITTERS] pgsql: Improve pg_dump regression tests and code coverage

2017-03-20 Thread Stephen Frost

Peter,

* Peter Eisentraut (peter.eisentr...@2ndquadrant.com) wrote:
> On 3/20/17 08:33, Stephen Frost wrote:
> >> So was this 3340 line patch posted or discussed anyplace before it got
> >> committed?
> > I've mentioned a few times that I'm working on improving pg_dump
> > regression tests and code coverage, which is what these were.  I'm a bit
> > surprised that it's, apparently, a surprise to anyone or that strictly
> > adding regression tests in the existing framework deserves very much
> > discussion.
> 
> I think we had this discussion about adding a large number of (pg_dump)
> tests without discussion or review already about a year ago, so I for
> one am surprised that are still surprised.

The concern you raised at the time, from my recollection, was that I had
added a new set of TAP tests post feature-freeze for pg_dump and there
was concern that it might cause an issue for packagers.

I don't recall a concern being raised about the tests themselves, and I
intentionally worked to make sure this landed before feature-freeze to
avoid that issue, though I believe we should actually be looking to try
to add tests post feature-freeze too, as we work to test things prior to
the release.

Thanks!

Stephen

signature.asc
Description: Digital signature

[HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Andrew Dunstan


If you look at this failure case

you see:

t/002_pg_dump.1..4449
# Looks like your test died before it could output anything.
dubious
Test returned status 255 (wstat 65280, 0xff00)
DIED. FAILED tests 1-4449
Failed 4449/4449 tests, 0.00% okay


That's really not helpful. We have no idea where things went wrong.

ISTM that the test setup and breakdown code, both in individual tests
and in PostgresNode.pm  should be liberally sprinkled with diag() calls
to make it easier to narrow down errors..

Thoughts?

cheers

andrew

-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Logical decoding on standby

2017-03-20 Thread Craig Ringer

On 20 March 2017 at 14:57, Simon Riggs  wrote:

> 2.1 Why does call to ReplicationSlotAcquire() move earlier in
> pg_logical_slot_get_changes_guts()?

That appears to be an oversight from an earlier version where it
looped over timelines in pg_logical_slot_get_changes_guts . Reverted.

> 2.2 sendTimeLineIsHistoric looks incorrect, and at least isn't really
> documented well.
> The setting
>   sendTimeLineIsHistoric = state->currTLI == ThisTimeLineID;
> should be
>   sendTimeLineIsHistoric = (state->currTLI != ThisTimeLineID);

Definitely wrong. Fixed.

> but that doesn't cause failure because in read_local_xlog_page() we
> say that we are reading from history when
> state->currTLI != ThisTimeLineID explicitly rather than use
> sendTimeLineIsHistoric

XLogRead(...), as called by logical_read_xlog_page, does test it. It's
part of the walsender-local log read callback. We don't hit
read_local_xlog_page at all when we're doing walsender based logical
decoding.

We have two parallel code paths for reading xlogs, one for walsender,
one for normal backends. The walsender one is glued together with a
bunch of globals that pass state "around" the xlogreader - we set it
up before calling into xlogreader, and then examine it when xlogreader
calls back into walsender.c with logical_read_xlog_page.

I really want to refactor that at some stage, getting rid of the use
of walsender globals for timeline state tracking and sharing more of
the xlog reading logic between walsender and normal backends. But
-ENOTIME, especially to do it as carefully as it must be done.

There are comments on read_local_xlog_page, logical_read_xlog_page
that mention this. Also XLogRead in
src/backend/access/transam/xlogutils.c  (which has the same name as
XLogRead in src/backend/replication/walsender.c). I have a draft for a
timeline following readme that would address some of this but don't
expect to be able to finish it off for this release cycle, and I'd
really rather clean it up instead.

-- 
 Craig Ringer   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] Re: [COMMITTERS] pgsql: Improve pg_dump regression tests and code coverage

2017-03-20 Thread Stephen Frost

Robert,

* Robert Haas (robertmh...@gmail.com) wrote:
> So was this 3340 line patch posted or discussed anyplace before it got
> committed?

I've mentioned a few times that I'm working on improving pg_dump
regression tests and code coverage, which is what these were.  I'm a bit
surprised that it's, apparently, a surprise to anyone or that strictly
adding regression tests in the existing framework deserves very much
discussion.

What I think would be great would be some additional work on our code
coverage, which is abysmal.  This, at least, gets us up over 80% for
src/bin/pg_dump, but there's still quite a bit of work to be done there,
as noted in the commit message, and lots of opportunity for improvement
throughout the rest of the code base, as https://coverage.postgresql.org
shows.

Thanks!

Stephen

signature.asc
Description: Digital signature

Re: [HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Stephen Frost

Andrew,

* Andrew Dunstan (andrew.duns...@2ndquadrant.com) wrote:
> If you look at this failure case
> 
> you see:
> 
> t/002_pg_dump.1..4449
> # Looks like your test died before it could output anything.
> dubious
>   Test returned status 255 (wstat 65280, 0xff00)
> DIED. FAILED tests 1-4449
>   Failed 4449/4449 tests, 0.00% okay
> 
> That's really not helpful. We have no idea where things went wrong.

The detail is in the logs, which is where I discovered the issue with
collations not being supported on all platforms and added a check to
skip the collation tests on those platforms.

> ISTM that the test setup and breakdown code, both in individual tests
> and in PostgresNode.pm  should be liberally sprinkled with diag() calls
> to make it easier to narrow down errors..

While I'm generally in favor of adding diag() info into the testing for
when things go wrong, what I don't want to do is increase the amount of
output that these tests produce without good cause.  I really wish there
was a "quiet" mode for the TAP tests which didn't report anything when
things are 'ok'.

Thanks!

Stephen

signature.asc
Description: Digital signature

[HACKERS] [Proposal] Make the optimiser aware of partitions ordering

2017-03-20 Thread Ronan Dunklau

Hello,

With native partitioning landing in Postgres 10, we (Julien Rouhaud and 
myself) had the idea for the 
following simple optimisation. This simple optimisation comes from a real use 
case.

= Proposal =

With range partitioning, we guarantee that each partition contains non-
overlapping values. Since we know the range allowed for each partition, it is 
possible to sort them according to the partition key (as is done already for 
looking up partitions).

Thus, we ca generate sorted Append plans instead of MergeAppend when sorting 
occurs on the partition key.

For example, consider the following model and query:


CREATE TABLE parent (id int) PARTITION BY RANGE (id);
CREATE TABLE child2 PARTITION OF parent FOR VALUES FROM (10) TO (20);
CREATE TABLE child1 PARTITION OF parent FOR VALUES FROM (1) TO (10);
CREATE INDEX ON child1(id);
CREATE INDEX ON child2(id);



EXPLAIN (COSTS OFF) SELECT * FROM parent ORDER BY id desc;

  QUERY PLAN  
--
 Merge Append
   Sort Key: parent.id DESC
   ->  Sort
 Sort Key: parent.id DESC
 ->  Seq Scan on parent
   ->  Index Only Scan Backward using child2_id_idx on child2
   ->  Index Only Scan Backward using child1_id_idx on child

We can guarantee that every value found in child2 will have a greater id than 
any value found in child1. Thus, we could generate a plan like this:

  QUERY PLAN   
---
 Append
   Sort Key: child2.id DESC
   ->  Index Only Scan Backward using child2_id_idx1 on child2
   ->  Index Only Scan Backward using child1_id_idx1 on child1

Skipping the merge step altogether.

This has the added benefit of providing an "early stop": if we add a limit to 
the query, we will only scan the indexes that are necessary for fetching the 
required amount of tuples. This is especially useful if we add a WHERE clause 
not covered by the index with the limit. Consider the following example, with 
a table "webvisits" partitioned by a ts colum. If we want to retrieve the 
latest 100 hits from a specific ip:

EXPLAIN (ANALYZE, BUFFERS) SELECT * FROM webvisits WHERE ipaddr = 
'93.184.216.34' ORDER BY ts DESC LIMIT 10;


--
 Limit  (cost=0.43..32.32 rows=10 width=104) (actual time=0.080..0.109 rows=10 
loops=1)
   Buffers: shared hit=7
   ->  Append  (cost=0.43..401009.65 rows=125740 width=104) (actual 
time=0.078..0.105 rows=10 loops=1)
 Sort Key: webvisits_201712.ts DESC
 Buffers: shared hit=7
 ->  Index Scan Backward using webvisits_201712_ipaddr_ts_idx on 
webvisits_201712  (cost=0.43..133617.70 rows=41901 width=104) (actual 
time=0.077..0.101 rows=10 loops=1)
   Index Cond: (ipaddr = '93.184.216.34'::inet)
   Buffers: shared hit=7
 ->  Index Scan Backward using webvisits_201711_ipaddr_ts_idx on 
webvisits_201711  (cost=0.43..131514.18 rows=41243 width=104) (never executed)
   Index Cond: (ipaddr = '93.184.216.34'::inet)
 ->  Index Scan Backward using webvisits_201710_ipaddr_ts_idx on 
webvisits_201710  (cost=0.43..135801.71 rows=42587 width=104) (never executed)
 

We have developed a proof-of-concept implementation for this optimisation.

For sub partitioning, intermediate Append nodes can be generated.

Consider the following examples:


CREATE TABLE parentparent (c1 int, c2 int) PARTITION BY range (c1);

-- Subpartition only on second column
CREATE TABLE parent1 PARTITION OF parentparent FOR VALUES FROM (1) TO (10)
PARTITION BY range (c2);

-- Subpartition on both columns
CREATE TABLE parent2 PARTITION OF parentparent FOR VALUES FROM (10) TO (20)
PARTITION BY range (c1, c2);

CREATE TABLE child11 PARTITION OF parent1 FOR VALUES FROM (1) TO (10);
CREATE TABLE child12 PARTITION OF parent1 FOR VALUES FROM (10) TO (20);
CREATE TABLE child21 PARTITION OF parent2 FOR VALUES FROM (10, 1) TO (15, 10);
CREATE TABLE child22 PARTITION OF parent2 FOR VALUES FROM (15, 10) TO (20, 
20);

EXPLAIN (COSTS OFF) SELECT * FROM parentparent ORDER BY c1;
  QUERY PLAN   
---
 Append
   Sort Key: parent1.c1
   ->  Sort
 Sort Key: parent1.c1
 ->  Append
   ->  Seq Scan on parent1
   ->  Seq Scan on child11
   ->  Seq Scan on child12
   ->  Append
 Sort Key: child21.c1
 ->  Sort
   Sort Key: child21.c1
   ->  Seq Scan on child21
 ->  Sort
   Sort Key: child22.c1
   ->  Seq Scan on child22


EXPLAIN (COSTS OFF) SELECT * FROM parentparent ORDER BY

[HACKERS] Re: [COMMITTERS] pgsql: Add TAP tests for password-based authentication methods.

2017-03-20 Thread Heikki Linnakangas


On 03/20/2017 02:32 AM, Peter Eisentraut wrote:

On 3/17/17 05:37, Heikki Linnakangas wrote:

Add TAP tests for password-based authentication methods.

Tests all combinations of users with MD5, plaintext and SCRAM verifiers
stored in pg_authid, with plain 'password', 'md5' and 'scram'
authentication methods.


This is missing an entry for tmp_check/ in .gitignore.  But maybe we can
do that globally instead of repeating it in every directory?


Hmm. That would be nice. However, many of the test suites have other 
files and directories in their .gitignores, so handling tmp_check 
globally wouldn't move the needle much. And handling some generated 
subdirectories globally and others locally seems confusing. For example, 
src/test/regress/.gitignore currently looks like this:



# Local binaries
/pg_regress

# Generated subdirectories
/tmp_check/
/results/
/log/

# Note: regreesion.* are only left behind on a failure; that's why they're not 
ignored
#/regression.diffs
#/regression.out


Not having /tmp_check/ in the above would look like an omission, when 
/results/ and /log/ are still listed.


If we could also handle results and log globally, that would be nice. 
But IMHO those names are too generic to put to a global .gitignore. We 
could rename them, but this starts to feel like more trouble than it's 
worth. I'll just go and create a .gitignore for /tmp_check/ to 
src/test/authentication.


- Heikki



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] logical decoding of two-phase transactions

2017-03-20 Thread Stas Kelvich


> On 20 Mar 2017, at 11:32, Craig Ringer  wrote:
> 
> On 19 March 2017 at 21:26, Petr Jelinek  wrote:
> 
>> I think only genam would need changes to do two-phase scan for this as
>> the catalog scans should ultimately go there. It's going to slow down
>> things but we could limit the impact by doing the two-phase scan only
>> when historical snapshot is in use and the tx being decoded changed
>> catalogs (we already have global knowledge of the first one, and it
>> would be trivial to add the second one as we have local knowledge of
>> that as well).
> 
> 
> TBH, I have no idea how to approach the genam changes for the proposed
> double-scan method. It sounds like Stas has some idea how to proceed
> though (right?)
> 

I thought about having special field (or reusing one of the existing fields)
in snapshot struct to force filtering xmax > snap->xmax or xmin = snap->xmin
as Petr suggested. Then this logic can reside in ReorderBufferCommit().
However this is not solving problem with catcache, so I'm looking into it right 
now.


> On 17 Mar 2017, at 05:38, Craig Ringer  wrote:
> 
> On 16 March 2017 at 19:52, Stas Kelvich  wrote:
> 
>> 
>> I’m working right now on issue with building snapshots for decoding prepared 
>> tx.
>> I hope I'll send updated patch later today.
> 
> 
> Great.
> 
> What approach are you taking?


Just as before I marking this transaction committed in snapbuilder, but after
decoding I delete this transaction from xip (which holds committed transactions
in case of historic snapshot).


> -- 
> Craig Ringer   http://www.2ndQuadrant.com/
> PostgreSQL Development, 24x7 Support, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] PinBuffer() no longer makes use of strategy

2017-03-20 Thread Alexander Korotkov

On Sun, Mar 19, 2017 at 3:51 AM, Jim Nasby  wrote:

> On 3/16/17 12:48 PM, David Steele wrote:
>
>> This patch looks pretty straight forward and applies cleanly and
>> compiles at cccbdde.
>>
>> It's not a straight revert, though, so still seems to need review.
>>
>> Jim, do you know when you'll have a chance to look at that?
>>
>
> Yes. Compiles and passes for me as well.
>
> One minor point: previously the code did
>
>   if (buf->usage_count < BM_MAX_USAGE_COUNT)
>
> but now it does
>
>   if (BUF_STATE_GET_USAGECOUNT(buf_state) != BM_MAX_USAGE_COUNT)
>
> being prone to paranoia, I prefer the first, but I've seen both styles in
> the code so I don't know if it's worth futzing with.
>

Ok, let's be paranoic and do this same way as before.  Revised patch is
attached.

--
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company


put-buffer-usagecount–logic–back-2.patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Patch to improve performance of replay of AccessExclusiveLock

2017-03-20 Thread David Rowley

On 18 March 2017 at 21:59, Simon Riggs  wrote:
> As Amit says, I don't see the gain from adding that to each xact state.
>
> I'd suggest refactoring my patch so that the existign
> MyXactAccessedTempRel becomes MyXactFlags and we can just set a flag
> in the two cases (temp rels and has-aels). That way we still have one
> Global but it doesn't get any uglier than it already is.

OK, now that I understand things a bit better, I've gone over your
patch and changing things around that it combines the two variables
into a flags variable which will be better suited for future
expansion.

I also fixed up the missing code from the Abort transaction. This is patch 0001

0002:

I was thinking it might be a good idea to back patch the patch which
removes the bogus code from StandbyReleaseLockTree(). Any users of
sub-transactions are currently paying a price that they've no reason
to, especially so when there's AELs held by other concurrent
transactions. We already know this code is slow, having this mistake
in there is not helping any.

0003:

Is intended to be patched atop of 0002 (for master only) and revises
this code further to remove the StandbyReleaseLockTree() function. To
me it seems better to do this to clear up any confusion about what's
going on here. This patch does close the door a little on tracking
AELs on sub-xacts. I really think if we're going to do that, then it
won't be that hard to put it back again. Seems better to be consistent
here and not leave any code around that causes people to be confused
about the same thing as I was confused about.  This patch does get rid
of StandbyReleaseLockTree() which isn't static. Are we likely to break
any 3rd party code by doing that?

0004:

Again master only, is a further small optimization and of
StandbyReleaseLocks() to further tighten the slow loop over each held
lock. I also think that it's better to be more explicit about the case
of when the function would release all locks. Although I'm not all
that sure in which case the function will actually be called with an
invalid xid.

Patch series is attached.

-- 
 David Rowley   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services

0001-Speed-up-replay-of-Access-Exclusive-Locks-on-hot-sta.patch
Description: Binary data

0002-Remove-bogus-code-to-release-locks-of-subxacts.patch
Description: Binary data

0003-Get-rid-of-StandbyReleaseLockTree.patch
Description: Binary data

0004-Small-optimization-for-StandbyReleaseLocks.patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Thanks for the TAP framework

2017-03-20 Thread Yuriy Zhuravlev

Hello.

For me Testgres https://github.com/postgrespro/testgres much better
because I have the allergy for Perl.
Unfortunately, it's not inside Postgres...

2017-03-20 10:21 GMT+03:00 Craig Ringer :

> Hi
>
> It just occurred to me that much of what I've been doing recently
> would've been exceedingly difficult to write and even harder to debug
> without the TAP framework. I would've spent a LOT of time writing test
> scripts and wondering whether the bug was in my scripts or my Pg code.
>
> I still spend a while swearing at Perl, but I can't really imagine
> doing nontrivial development without them anymore.
>
> So ... thanks.
>
> --
>  Craig Ringer   http://www.2ndQuadrant.com/
>  PostgreSQL Development, 24x7 Support, Training & Services
>
>
> --
> Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers
>

Re: [HACKERS] Logical decoding on standby

2017-03-20 Thread Andres Freund

Hi,

Have you checked how high the overhead of XLogReadDetermineTimeline is?
A non-local function call, especially into a different translation-unit
(no partial inlining), for every single page might end up being
noticeable.  That's fine in the cases it actually adds functionality,
but for a master streaming out data, that's not actually adding
anything.

Did you check whether you changes to read_local_xlog_page could cause
issues with twophase.c? Because that now also uses it.

Did you check whether ThisTimeLineID is actually always valid in the
processes logical decoding could run in?  IIRC it's not consistently
update during recovery in any process but the startup process.

On 2017-03-19 21:12:23 +0800, Craig Ringer wrote:
> From 2fa891a555ea4fb200d75b8c906c6b932699b463 Mon Sep 17 00:00:00 2001
> From: Craig Ringer 
> Date: Thu, 1 Sep 2016 10:16:55 +0800
> Subject: [PATCH 2/3] Follow timeline switches in logical decoding

FWIW, the title doesn't really seem accurate to me.

> Logical slots cannot actually be created on a replica without use of
> the low-level C slot management APIs so this is mostly foundation work
> for subsequent changes to enable logical decoding on standbys.

Everytime I read references to anything like this my blood starts to
boil.  I kind of regret not having plastered RecoveryInProgress() errors
all over this code.

> From 8854d44e2227b9d076b0a25a9c8b9df9270b2433 Mon Sep 17 00:00:00 2001
> From: Craig Ringer 
> Date: Mon, 5 Sep 2016 15:30:53 +0800
> Subject: [PATCH 3/3] Logical decoding on standby
>
> * Make walsender aware of ProcSignal and recovery conflicts, make walsender
>   exit with recovery conflict on upstream drop database when it has an active
>   logical slot on that database.
> * Allow GetOldestXmin to omit catalog_xmin, be called already locked.

"be called already locked"?

> * Send catalog_xmin separately in hot_standby_feedback messages.
> * Store catalog_xmin separately on a physical slot if received in 
> hot_standby_feedback

What does separate mean?

> * Separate the catalog_xmin used by vacuum from ProcArray's 
> replication_slot_catalog_xmin,
>   requiring that xlog be emitted before vacuum can remove no longer needed 
> catalogs, store
>   it in checkpoints, make vacuum and bgwriter advance it.

I can't parse that sentence.

> * Add a new recovery conflict type for conflict with catalog_xmin. Abort
>   in-progress logical decoding sessions with conflict with recovery where 
> needed
>   catalog_xmin is too old

Are we retaining WAL for slots broken in that way?

> * Make extra efforts to reserve master's catalog_xmin during decoding startup
>   on standby.

What does that mean?

> * Remove checks preventing starting logical decoding on standby

To me that's too many different things in one commit.  A bunch of them
seem like it'd be good if they'd get independent buildfarm cycles too.

> diff --git a/src/backend/access/heap/rewriteheap.c 
> b/src/backend/access/heap/rewriteheap.c
> index d7f65a5..36bbb98 100644
> --- a/src/backend/access/heap/rewriteheap.c
> +++ b/src/backend/access/heap/rewriteheap.c
> @@ -812,7 +812,8 @@ logical_begin_heap_rewrite(RewriteState state)
>   if (!state->rs_logical_rewrite)
>   return;
>
> - ProcArrayGetReplicationSlotXmin(NULL, _xmin);
> + /* Use the catalog_xmin being retained by vacuum */
> + ProcArrayGetReplicationSlotXmin(NULL, _xmin, NULL);

What does that comment mean? Vacuum isn't the only thing that prunes old
records.

> +/*
> + * Set the global oldest catalog_xmin used to determine when tuples
> + * may be removed from catalogs and user-catalogs accessible from logical
> + * decoding.
> + *
> + * Only to be called from the startup process or by 
> UpdateOldestCatalogXmin(),
> + * which ensures the update is properly written to xlog first.
> + */
> +void
> +SetOldestCatalogXmin(TransactionId oldestCatalogXmin)
> +{
> + Assert(InRecovery || !IsUnderPostmaster || AmStartupProcess() || 
> LWLockHeldByMe(ProcArrayLock));

Uh, that's long-ish.  And doesn't agree with the comment above
(s/startup process/process performing recovery/?).

This is a long enough list that I'd consider just dropping the assert.

> + else if (info == XLOG_XACT_CATALOG_XMIN_ADV)
> + {
> + xl_xact_catalog_xmin_advance *xlrec = 
> (xl_xact_catalog_xmin_advance *) XLogRecGetData(record);
> +
> + /*
> +  * Unless logical decoding is possible on this node, we don't 
> care about
> +  * this record.
> +  */
> + if (!XLogLogicalInfoActive() || max_replication_slots == 0)
> + return;

Too many negatives for my taste, but whatever.

> + /*
> +  * Apply the new catalog_xmin limit immediately. New decoding 
> sessions
> +  * will refuse to start if their slot is past it, and old ones 
> will
> +  * notice when we signal

Re: [HACKERS] Size vs size_t

2017-03-20 Thread Daniel Gustafsson

> On 16 Mar 2017, at 23:20, Tom Lane  wrote:
> 
> Thomas Munro  writes:
>> Naive replacement in new files (present in master but not in 9.6) with
>> the attached script, followed by a couple of manual corrections where
>> Size was really an English word in a comment, gets the attached diff.
> 
> In the case of mmgr/slab.c, a lot of those uses of Size probably
> correspond to instantiations of the MemoryContext APIs; so blindly
> changing them to "size_t" seems like a bit of a type violation
> (and might indeed draw warnings from pickier compilers).  Don't
> know if any of the other things you've identified here have similar
> entanglements.

While it might not be an issue that hits many developers, Size is also defined
on macOS in the MacTypes.h header so using CoreFoundation when hacking on macOS
port code will cause typedef redefinition errors.

Not really a case for or against, but another datapoint.

cheers ./daniel

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] PoC plpgsql - possibility to force custom or generic plan

2017-03-20 Thread Pavel Stehule

2017-03-19 14:30 GMT+01:00 Petr Jelinek :

> On 19/03/17 12:32, Pavel Stehule wrote:
> >
> >
> > 2017-03-18 19:30 GMT+01:00 Petr Jelinek  > >:
> >
> > On 16/03/17 17:15, David Steele wrote:
> > > On 2/1/17 3:59 PM, Pavel Stehule wrote:
> > >> Hi
> > >>
> > >> 2017-01-24 21:33 GMT+01:00 Pavel Stehule  
> > >>  >>>:
> > >>
> > >> Perhaps that's as simple as renaming all the existing
> _ns_*
> > >> functions to _block_ and then adding support for
> pragmas...
> > >>
> > >> Since you're adding cursor_options to PLpgSQL_expr it
> should
> > >> probably be removed as an option to exec_*.
> > >>
> > >> I have to recheck it. Some cursor options going from
> dynamic
> > >> cursor variables and are related to dynamic query - not
> query
> > >> that creates query string.
> > >>
> > >> hmm .. so current state is better due using options like
> > >> CURSOR_OPT_PARALLEL_OK
> > >>
> > >>  if (expr->plan == NULL)
> > >> exec_prepare_plan(estate, expr, (parallelOK ?
> > >>   CURSOR_OPT_PARALLEL_OK : 0) |
> > >> expr->cursor_options);
> > >>
> > >> This options is not permanent feature of expression - and
> then I
> > >> cannot to remove cursor_option argument from exec_*
> > >>
> > >> I did minor cleaning - remove cursor_options from plpgsql_var
> > >>
> > >> + basic doc
> > >
> > > This patch still applies cleanly and compiles at cccbdde.
> > >
> > > Any reviewers want to have a look?
> > >
> >
> > I'll bite.
> >
> > I agree with Jim that it's not very nice to add yet another
> > block/ns-like layer. I don't see why pragma could not be added to
> either
> > PLpgSQL_stmt_block (yes pragma can be for whole function but function
> > body is represented by PLpgSQL_stmt_block as well so no issue
> there), or
> > to namespace code. In namespace since they are used for other thing
> > there would be bit of unnecessary propagation but it's 8bytes per
> > namespace, does not seem all that much.
> >
> > My preference would be to add it to PLpgSQL_stmt_block (unless we
> plan
> > to add posibility to add pragmas for other loops and other things)
> but I
> > am not sure if current block is easily (and in a fast way) accessible
> > from all places where it's needed. Maybe the needed info could be
> pushed
> > to estate from PLpgSQL_stmt_block during the execution.
> >
> >
> > There is maybe partial misunderstand of pragma - it is set of nested
> > configurations used in compile time only. It can be used in execution
> > time too - it change nothing.
> >
> > The pragma doesn't build a persistent tree. It is stack of
> > configurations that allows fast access to current configuration, and
> > fast leaving of configuration when the change is out of scope.
> >
> > I don't see any any advantage to integrate pragma to ns or to
> > stmt_block. But maybe I don't understand to your idea.
> >
> > I see a another possibility in code - nesting init_block_directives() to
> > plpgsql_ns_push and free_block_directives() to plpgsql_ns_pop()
> >
>
> That's more or less what I mean by "integrating" to ns :)
>
> The main idea is to not add 3rd layer of block push/pop that's sprinkled
> in "random" places.
>

ok fixed

I reworked a maintaining settings - now it use lazy copy - the copy of
settings is created only when pragma is used in namespace. It remove any
impact on current code without pragmas.

Regards

Pavel


>
> --
>   Petr Jelinek  http://www.2ndQuadrant.com/
>   PostgreSQL Development, 24x7 Support, Training & Services
>
diff --git a/doc/src/sgml/plpgsql.sgml b/doc/src/sgml/plpgsql.sgml
index d356deb9f5..56da4d6163 100644
--- a/doc/src/sgml/plpgsql.sgml
+++ b/doc/src/sgml/plpgsql.sgml
@@ -802,6 +802,32 @@ $$ LANGUAGE plpgsql;
 happen in a plain SQL command.

   
+
+  
+   Block level PRAGMA
+
+   
+PRAGMA
+in PL/pgSQL
+   
+
+   
+The block level PRAGMA allows to change some
+PL/pgSQL compiler behave. Currently
+only PRAGMA PLAN_CACHE is supported.
+
+
+CREATE FUNCTION enforce_fresh_plan(_id text) RETURNS boolean AS $$
+DECLARE
+  PRAGMA PLAN_CACHE(force_custom_plan);
+BEGIN
+  -- in this block every embedded query uses one shot plan
+  RETURN EXISTS(SELECT * FROM tab WHERE id = _id);
+END;
+$$ LANGUAGE plpgsql;
+
+   
+  
   
 
   
@@ -4649,6 +4675,43 @@ $$ LANGUAGE plpgsql;
  use of the now() function would still be a better idea.
 
 
+
+
+ PRAGMA PLAN_CACHE
+
+ 
+  The plan cache behave can be controlled with PRAGMA

Re: [HACKERS] logical decoding of two-phase transactions

2017-03-20 Thread Craig Ringer

On 19 March 2017 at 21:26, Petr Jelinek  wrote:

> I think only genam would need changes to do two-phase scan for this as
> the catalog scans should ultimately go there. It's going to slow down
> things but we could limit the impact by doing the two-phase scan only
> when historical snapshot is in use and the tx being decoded changed
> catalogs (we already have global knowledge of the first one, and it
> would be trivial to add the second one as we have local knowledge of
> that as well).

We'll also have to clobber caches after we finish decoding a 2pc xact,
since we don't know those changes are visible to other xacts and can't
guarantee they'll ever be (if it aborts).

That's going to be "interesting" when trying to decode interleaved
transaction streams since we can't afford to clobber caches whenever
we see an xlog record from a different xact. We'll probably have to
switch to linear decoding with reordering when someone makes catalog
changes.

TBH, I have no idea how to approach the genam changes for the proposed
double-scan method. It sounds like Stas has some idea how to proceed
though (right?)

-- 
 Craig Ringer   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] logical decoding of two-phase transactions

2017-03-20 Thread Petr Jelinek

On 20/03/17 09:32, Craig Ringer wrote:
> On 19 March 2017 at 21:26, Petr Jelinek  wrote:
> 
>> I think only genam would need changes to do two-phase scan for this as
>> the catalog scans should ultimately go there. It's going to slow down
>> things but we could limit the impact by doing the two-phase scan only
>> when historical snapshot is in use and the tx being decoded changed
>> catalogs (we already have global knowledge of the first one, and it
>> would be trivial to add the second one as we have local knowledge of
>> that as well).
> 
> We'll also have to clobber caches after we finish decoding a 2pc xact,
> since we don't know those changes are visible to other xacts and can't
> guarantee they'll ever be (if it aborts).
> 

AFAIK reorder buffer already does that.

> That's going to be "interesting" when trying to decode interleaved
> transaction streams since we can't afford to clobber caches whenever
> we see an xlog record from a different xact. We'll probably have to
> switch to linear decoding with reordering when someone makes catalog
> changes.

We may need something that allows for representing multiple parallel
transactions in single process and a cheap way of switching between them
(ie, similar things we need for autonomous transactions). But that's not
something current patch has to deal with.

-- 
  Petr Jelinek  http://www.2ndQuadrant.com/
  PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] identity columns

2017-03-20 Thread Vitaly Burovoy

On 2/28/17, Peter Eisentraut  wrote:
> New patch that fixes everything.  ;-)

Great work!

> On 1/4/17 19:34, Vitaly Burovoy wrote:
>> 1. The fact COPY ignores GENERATED ALWAYS constraint (treats as
>> GENERATED BY DEFAULT) should be mentioned as well as rules.
>
> fixed (documentation added)
>
> What do you mean for rules?

I meant the phrase "However, it will not invoke rules." mentioned there.
For rewrite rules this behavior is mentioned on the COPY page, not on
the "CREATE RULE" page.
I think it would be better if that behavior is placed on both CREATE
TABLE and COPY pages.


>> 2. Usually error message for identical columns (with LIKE clause)
>> looks like this:
>> test=# CREATE TABLE def(i serial, n1 int NOT NULL, n2 int);
>> CREATE TABLE
>> test=# CREATE TABLE test(i serial, LIKE def INCLUDING ALL);
>> ERROR:  column "i" specified more than once
>>
>> but for generated columns it is disorienting enough:
>> test=# CREATE TABLE idnt(i int GENERATED BY DEFAULT AS IDENTITY);
>> CREATE TABLE
>> test=# CREATE TABLE test(i int GENERATED BY DEFAULT AS IDENTITY, LIKE
>> idnt INCLUDING ALL);
>> ERROR:  relation "test_i_seq" already exists
>
> Yeah, this is kind of hard to fix though, because the sequence names
> are decided during parse analysis, when we don't see that the same
> sequence name is also used by another new column.  We could maybe
> patch around it in an ugly way, but it doesn't seem worth it for this
> marginal case.

OK

>> 3. Strange error (not about absence of a column; but see pp.5 and 8):
>> test=# ALTER TABLE idnt ALTER COLUMN o ADD GENERATED ALWAYS AS IDENTITY;
>> ERROR:  identity column type must be smallint, integer, or bigint
>
> What's wrong with that?

The column mentioned there does not exist. Therefore the error should
be about it, not about a type of absent column:
test=# CREATE TABLE idnt(i int GENERATED BY DEFAULT AS IDENTITY, t TEXT);
CREATE TABLE
test=# ALTER TABLE idnt ALTER COLUMN o TYPE int;  -- expected error message
ERROR:  42703: column "o" of relation "idnt" does not exist
test=# -- compare with:
test=# ALTER TABLE idnt ALTER COLUMN o ADD GENERATED ALWAYS AS
IDENTITY;  -- strange error message
ERROR:  identity column type must be smallint, integer, or bigint


>> 5. Attempt to make a column be identity fails:
>> test=# ALTER TABLE idnt ALTER COLUMN n1 SET GENERATED BY DEFAULT;
>> ERROR:  column "n1" of relation "idnt" is not an identity column
>>
>> As I understand from the Spec, chapter 11.20 General Rule 2)b) says
>> about the final state of a column without mentioning of the initial
>> state.
>> Therefore even if the column has the initial state "not generated",
>> after the command it changes the state to either "GENERATED ALWAYS" or
>> "GENERATED BY DEFAULT".
>
> 11.12  states that "If  specification> or  is specified, then C
> shall be an identity column."  So this clause is only to alter an
> existing identity column, not make a new one.

Ouch. Right. The rule was at the upper level.

Nevertheless even now we don't follow rules directly.
For example, we allow "SET NOT NULL" and "TYPE" for the column which
are restricted by the spec.
Since we extend the spec by "ADD GENERATED...", "SEQUENCE NAME" and
allow more than one identity column, why can't we extend it by
allowing "SET GENERATED" for non-identity columns and drop "ADD
GENERATED..."?


>> 6. The docs mention a syntax:
>> ALTER [ COLUMN ] > class="PARAMETER">column_name { SET GENERATED { ALWAYS |
>> BY DEFAULT } | SET sequence_option | RESET
>> } [...]
>>
>> but the command fails:
>> test=# ALTER TABLE idnt ALTER COLUMN i RESET;
>> ERROR:  syntax error at or near ";"
>> LINE 1: ALTER TABLE idnt ALTER COLUMN i RESET;
>
> This works for me.  Check again please.

I'm sorry, it still does not work for me (whether you mix it up with "RESTART"):
test=# ALTER TABLE idnt ALTER COLUMN i RESET;
ERROR:  syntax error at or near ";"
LINE 1: ALTER TABLE idnt ALTER COLUMN i RESET;


>> 7. (the same line of the doc)
>> usually ellipsis appears in inside parenthesis with clauses can be
>> repeated, in other words it should be written as:
>> ALTER  column_name ( { SET GENERATED { ALWAYS |
>> BY DEFAULT } |  } [...] )
>
> But there are no parentheses in that syntax.  I think the syntax
> synopsis as written is correct.

I was wrong. Your version is correct.


>> 9. The command fails:
>> test=# ALTER TABLE idnt ALTER n2 ADD GENERATED ALWAYS AS IDENTITY;
>> ERROR:  column "n2" of relation "idnt" must be declared NOT NULL
>> before identity can be added
>>
>> whereas the Spec does not contains a requirement for a column to be a
>> NOT NULLABLE.
>> You can implicitly set a column as NOT NULL (as the "serial" macros
>> does), but not require it later.
>
> The spec requires that an identity column is NOT NULL.

OK. "11.4 SR 16)d" really says "The  NOT
NULL NOT DEFERRABLE is implicit."


>> 11. The last CF added partitioned tables which are similar to
>> inherited, but slightly different.
>
> fixed

Re: [HACKERS] Partition-wise join for join between (declaratively) partitioned tables

2017-03-20 Thread Robert Haas

On Mon, Mar 20, 2017 at 1:19 PM, Ashutosh Bapat
 wrote:
>> That seems different than what I suggested and I'm not sure what the
>> reason is for the difference?
>
> The patch adding macros IS_JOIN_REL() and IS_OTHER_REL() and changing
> the code to use it will look quite odd by itself. We are not changing
> all the instances of RELOPT_JOINREL or RELOPT_OTHER_MEMBER_REL to use
> those. There is code which needs to check those kinds, instead of "all
> join rels" or "all other rels" resp. So the patch will add those
> macros, change only few places to use those macros, which are intended
> to be changed while applying partition-wise join support for single
> level partitioned table.

Hmm.  You might be right, but I'm not convinced.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] increasing the default WAL segment size

2017-03-20 Thread Beena Emerson

Hello,

PFA the updated patch.

On Fri, Mar 17, 2017 at 6:40 AM, Robert Haas  wrote:

> On Tue, Mar 14, 2017 at 1:44 AM, Beena Emerson 
> wrote:
> > Attached is the updated patch. It fixes the issues and also updates few
> code
> > comments.
>
> I did an initial readthrough of this patch tonight just to get a
> feeling for what's going on.  Based on that, here are a few review
> comments:
>
> The changes to pg_standby seem to completely break the logic to wait
> until the file has attained the correct size.  I don't know how to
> salvage that logic off-hand, but just breaking it isn't acceptable.
>

Using, the XLogLongPageHeader->xlp_seg_size, all the original checks have
been retained. This methid is even used in pg_waldump.



>
> + Note that changing this value requires an initdb.
>
> Instead, maybe say something like "Note that this value is fixed for
> the lifetime of the database cluster."
>

Corrected.


>
> -intmax_wal_size = 64;/* 1 GB */
> -intmin_wal_size = 5;/* 80 MB */
> +intwal_segment_size = 2048;/* 16 MB */
> +intmax_wal_size = 1024 * 1024;/* 1 GB */
> +intmin_wal_size = 80 * 1024;/* 80 MB */
>
> If wal_segment_size is now measured in multiple of XLOG_BLCKSZ, then
> it's not the case that 2048 is always 16MB.  If the other values are
> now measured in kB, perhaps rename the variables to add _kb, to avoid
> confusion with the way it used to work (and in general).  The problem
> with leaving this as-is is that any existing references to
> max_wal_size in core or extension code will silently break; you want

it to break in a noticeable way so that it gets fixed.
>
>
The  wal_segment_size  now is DEFAULT_XLOG_SEG_SIZE / XLOG_BLCKSZ;
min and max wal_size  have _kb postfix


> + * UsableBytesInSegment: It is set in assign_wal_segment_size and stores
> the
> + * number of bytes in a WAL segment usable for WAL data.
>
> The comment doesn't need to say where it gets set, and it doesn't need
> to repeat the variable name.  Just say "The number of bytes in a..."
>

Done.


>
> +assign_wal_segment_size(int newval, void *extra)
>
> Why does a PGC_INTERNAL GUC need an assign hook?  I think the GUC
> should only be there to expose the value; it shouldn't have
> calculation logic associated with it.
>

Removed the function and called the functions in ReadControlFile.


>
>  /*
> + * initdb passes the WAL segment size in an environment variable. We
> don't
> + * bother doing any sanity checking, we already check in initdb that
> the
> + * user gives a sane value.
> + */
> +XLogSegSize = pg_atoi(getenv("XLOG_SEG_SIZE"), sizeof(uint32), 0);
>
> I think we should bother.  I don't like the idea of the postmaster
> crashing in flames without so much as a reasonable error message if
> this parameter-passing mechanism goes wrong.
>

I have rechecked the XLogSegSize.


>
> +{"wal-segsize", required_argument, NULL, 'Z'},
>
> When adding an option with no documented short form, generally one
> picks a number that isn't a character for the value at the end.  See
> pg_regress.c or initdb.c for examples.
>

Done.


>
> +   wal_segment_size = atoi(str_wal_segment_size);
>
> So, you're comfortable interpreting --wal-segsize=1TB or
> --wal-segsize=1GB as 1?  Implicitly, 1MB?
>

Imitating the current behaviour of config option --with-wal-segment, I have
used strtol to throw an error if the value is not only integers.


>
> + * ControlFile is not accessible here so use SHOW wal_segment_size command
> + * to set the XLogSegSize
>
> Breaks compatibility with pre-9.6 servers.
>

Added check for the version, the SHOW command will be run only in v10 and
above. Previous versions do not need this.


>
> --
Thank you,

Beena Emerson

EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


02-initdb-walsegsize-v5.patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Partition-wise join for join between (declaratively) partitioned tables

2017-03-20 Thread Robert Haas

On Mon, Mar 20, 2017 at 9:44 AM, Ashutosh Bapat
 wrote:
>> I believe it would also be best to include 0011's changes to
>> adjust_appendrel_attrs_multilevel in 0001.
>
> The function needs to repeat the "adjustment" process for every
> "other" relation (join or base) that it encounters, by testing using
> OTHER_BASE_REL or OTHER_JOINREL in short IS_OTHER_REL(). The last
> macros are added by the partition-wise join implementation patch 0005.
> It doesn't make sense to add that macro in 0001 OR modify that
> function twice, once in 0001 and then after 0005. So, I will leave it
> to be part of 0011, where the changes are actually needed.

Hmm.  I would kind of like to move the IS_JOIN_REL() and
IS_OTHER_REL() stuff to the front of the series.  In other words, I
propose that we add those macros first, each testing for only the one
kind of RelOptInfo that exists today, and change all the code to use
them.  Then, when we add child joinrels, we can modify the macros at
the same time.  The problem with doing it the way you have it is that
those changes will have to be squashed into the main partitionwise
join commit, because otherwise stuff will be broken.  Doing it the
other way around lets us commit that bit separately.

> Done. Now SQL file has 325 lines and output has 1697 lines as against
> 515 and 4085 lines resp. earlier.

Sounds reasonable.

> Now that that purpose has served, I have reordered the
> patches so that test patch comes after the implementation and follow
> on fixes.

Sounds good.

> There are two ways to fix it,
>
> 1. when we create a reparameterized path add it to the list of paths,
> thus the parameterization bubbles up the join tree. But then we will
> be changing the path list after set_cheapest() has been called OR may
> be throwing out paths which other paths refer to. That's not
> desirable. May be we can save this path in another list and create
> join paths using this path instead of reparameterizing existing join
> paths.
> 2. Add code to reparameterize_path() to handle join paths, and I think
> all kinds of paths since we might have trickle the parameterization
> down the joining paths which could be almost anything including
> sort_paths, unique_paths etc. That looks like a significant effort. I
> think, we should attack it separately after the stock partition-wise
> join has been committed.

I don't understand #1.  #2 sounds like what I was expecting.  I agree
it can be postponed.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] free space map and visibility map

2017-03-20 Thread Robert Haas

On Sat, Mar 18, 2017 at 5:42 PM, Jeff Janes  wrote:
> Isn't HEAP2_CLEAN only issued before an intended HOT update?  (Which then
> can't leave the block as all visible or all frozen).  I think the issue is
> here is HEAP2_VISIBLE or HEAP2_FREEZE_PAGE.  Am I reading this correctly,
> that neither of those ever update the FSM, regardless of FPI?

Yes, updates to the FSM are never logged.  Forcing replay of
HEAP2_FREEZE_PAGE to update the FSM might be a good idea.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Stephen Frost

Alvaro,

* Alvaro Herrera (alvhe...@2ndquadrant.com) wrote:
> Stephen Frost wrote:
> > * Andrew Dunstan (andrew.duns...@2ndquadrant.com) wrote:
> > > ISTM that the test setup and breakdown code, both in individual tests
> > > and in PostgresNode.pm  should be liberally sprinkled with diag() calls
> > > to make it easier to narrow down errors..
> > 
> > While I'm generally in favor of adding diag() info into the testing for
> > when things go wrong, what I don't want to do is increase the amount of
> > output that these tests produce without good cause.  I really wish there
> > was a "quiet" mode for the TAP tests which didn't report anything when
> > things are 'ok'.
> 
> That's diag's idea; you use it like
> "ok() or diag('failed because of snow')".
> so nothing is printed unless there is a problem.  You're not supposed to
> call it unconditionally.

That seems reasonable then.

> Something that would probably be helpful would be to put the server log
> lines corresponding to the failure in diag(); for example we could keep
> the log file open, do a seek(SEEK_END) just before running the test, and
> reading from that point onwards; probably stop reading after 5 lines or
> so.  They wouldn't be output unless there is a failure.  (Of course,
> this'd have to be done in the harness, not the test itself, to avoid
> cluttering already ugly individual test files.)

Agreed, this should be in the harness.

Is there any hope of getting a "quiet" mode, where all the "ok" lines
aren't printed when things work..?

Thanks!

Stephen


signature.asc
Description: Digital signature

Re: [HACKERS] Our feature change policy

2017-03-20 Thread Stephen Frost

Tom,

* Tom Lane (t...@sss.pgh.pa.us) wrote:
> Stephen Frost  writes:
> > * Bruce Momjian (br...@momjian.us) wrote:
> >> 1.  make the change now and mention it in the release notes
> >> 2.  #1, but also provide backward compatibility for 5+ years
> >> 3.  mark the feature as deprecated and remove/change it in 5+ years
> >> 4.  #3, but issue a warning for deprecated usage
> 
> > I don't generally feel like #1 is so rarely used (nor do I think it
> > should be rare that we use it).  With regard to #2, if we're going to do
> > that, I'd really like to see us decide ahead of time on a point in time
> > when we will remove the backwards-compatibility, otherwise it seems to
> > live on forever.  For my 2c, #3 should be reserved for things we are
> > explicitly removing, not for things we're changing and we should do #4
> > whenever possible in those cases because we're going to be removing it.
> 
> > Otherwise, #3 ends up being a case where we're holding up progress for
> > years because we have to announce that we're going to deprecate
> > something and then wait before we actually make whatever the change is.
> 
> Well, to what extent are we "holding up progress" in this particular
> case?  There is no other development work that's stalled by not renaming
> these binaries.  I think there should be some explicit accounting for
> the impact of delay if we're going to try to formalize these decisions
> better.

The rename of the binaries is case #2 above, at least as I was thinking
of it.  I don't believe we are holding up progress in that case.

I was imagining the changes to pg_stat_activity as possibly falling
under #3/change, meaning that we'd wait for 5 years before actually
renaming those columns, which is part of why I was objecting to that
idea.

If we're able to make the change and provide backwards compatibility
then I think the main question is if it's worth it to do so and, if so,
when are we going to remove that backwards compatibility.  If we don't
expect to ever be able to remove it, then that's an issue.

Thanks!

Stephen


signature.asc
Description: Digital signature

Re: [HACKERS] Our feature change policy

2017-03-20 Thread Bruce Momjian

On Mon, Mar 20, 2017 at 11:57:13AM -0400, Stephen Frost wrote:
> Tom,
> 
> * Tom Lane (t...@sss.pgh.pa.us) wrote:
> > Stephen Frost  writes:
> > > * Bruce Momjian (br...@momjian.us) wrote:
> > >> 1.  make the change now and mention it in the release notes
> > >> 2.  #1, but also provide backward compatibility for 5+ years
> > >> 3.  mark the feature as deprecated and remove/change it in 5+ years
> > >> 4.  #3, but issue a warning for deprecated usage
...
> > Well, to what extent are we "holding up progress" in this particular
> > case?  There is no other development work that's stalled by not renaming
> > these binaries.  I think there should be some explicit accounting for
> > the impact of delay if we're going to try to formalize these decisions
> > better.
> 
> The rename of the binaries is case #2 above, at least as I was thinking
> of it.  I don't believe we are holding up progress in that case.

Agreed.

> I was imagining the changes to pg_stat_activity as possibly falling
> under #3/change, meaning that we'd wait for 5 years before actually
> renaming those columns, which is part of why I was objecting to that
> idea.

Yes, that is a good point.  Removing contrib/tsearch2 or contrib/xml2 is
really not holding up progress on anything, but not delaying the
addition of wait event reporting to pg_stat_activity is certainly
holding up progress.  #3 and #4 would need to be weighted depending on
whether choosing them would delay progress, e.g. it did delay progress
on standard-conforming strings, but the delay was determined to be
reasonable.

> If we're able to make the change and provide backwards compatibility
> then I think the main question is if it's worth it to do so and, if so,
> when are we going to remove that backwards compatibility.  If we don't
> expect to ever be able to remove it, then that's an issue.

Agreed.

-- 
  Bruce Momjian  http://momjian.us
  EnterpriseDB http://enterprisedb.com

+ As you are, so once was I.  As I am, so you will be. +
+  Ancient Roman grave inscription +


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Partition-wise join for join between (declaratively) partitioned tables

2017-03-20 Thread Rafia Sabih

On Mon, Mar 20, 2017 at 8:21 AM, Robert Haas  wrote:
> On Fri, Mar 17, 2017 at 8:10 PM, Robert Haas  wrote:
>> While I was studying what you did with reparameterize_path_by_child(),
>> I started to wonder whether reparameterize_path() doesn't need to
>> start handling join paths.  I think it only handles scan paths right
>> now because that's the only thing that can appear under an appendrel
>> created by inheritance expansion, but you're changing that.  Maybe
>> it's not critical -- I think the worst consequences of missing some
>> handling there is that we won't consider a parameterized path in some
>> case where it would be advantageous to do so.  Still, you might want
>> to investigate a bit.
>
> I spent a fair amount of time this weekend musing over
> reparameterize_path_by_child().  I think a key question for this patch
> - as you already pointed out - is whether we're happy with that
> approach.  When we discover that we want to perform a partitionwise
> parameterized nestloop, and therefore that we need the paths for each
> inner appendrel to get their input values from the corresponding outer
> appendrel members rather than from the outer parent, we've got two
> choices.  The first is to do what the patch actually does, which is to
> build a new path tree for the nestloop inner path parameterized by the
> appropriate childrel.  The second is to use the existing paths, which
> are parameterized by the parent rel, and then somehow allow make that
> work.  For example, you can imagine that create_plan_recurse() could
> pass down a list of parameterized nestloops above the current point in
> the path tree, and a parent-child mapping for each, and then we could
> try to substitute everything while actually generating the plan
> instead of creating paths sooner.  Which is better?
>
> It would be nice to hear opinions from anyone else who cares, but
> after some thought I think the approach you've picked is probably
> better, because it's more like what we do already.  We have existing
> precedent for reparameterizing a path, but none for allowing a Var for
> one relation (the parent) to in effect refer to another relation (the
> child).
>
> That having been said, having try_nestloop_path() perform the
> reparameterization at the very top of the function seems quite
> undesirable.  You're creating a new path there before you know whether
> it's going to be rejected by the invalid-parameterization test and
> also before you know whether initial_cost_nestloop is going to reject
> it.  It would be much better if you could find a way to postpone the
> reparameterization until after those steps, and only do it if you're
> going to try add_path().

On a further testing of this patch I find another case when it is
showing regression, the time taken with patch is around 160 secs and
without it is 125 secs.
Another minor thing to note that is planning time is almost twice with
this patch, though I understand that this is for scenarios with really
big 'big data' so this may not be a serious issue in such cases, but
it'd be good if we can keep an eye on this that it doesn't exceed the
computational bounds for a really large number of tables..
Please find the attached .out file to check the output I witnessed and
let me know if anymore information is required
Schema and data was similar to the preciously shared schema with the
addition of more data for this case, parameter settings used were:
work_mem = 1GB
random_page_cost = seq_page_cost = 0.1

-- 
Regards,
Rafia Sabih
EnterpriseDB: http://www.enterprisedb.com/


pwj_regress_2.out
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Patch: Write Amplification Reduction Method (WARM)

2017-03-20 Thread Alvaro Herrera

Pavan Deolasee wrote:
> On Tue, Mar 14, 2017 at 7:17 AM, Alvaro Herrera 
> wrote:

> > I didn't like this comment very much.  But it's not necessary: you have
> > already given relcache responsibility for setting rd_supportswarm.  The
> > only problem seems to be that you set it in RelationGetIndexAttrBitmap
> > instead of RelationGetIndexList, but it's not clear to me why.  I think
> > if the latter function is in charge, then we can trust the flag more
> > than the current situation.
> 
> I looked at this today.  AFAICS we don't have access to rd_amroutine in
> RelationGetIndexList since we don't actually call index_open() in that
> function. Would it be safe to do that? I'll give it a shot, but thought of
> asking here first.

Ah, you're right, we only have the pg_index tuple for the index, not the
pg_am one.  I think one pg_am cache lookup isn't really all that
terrible (though we should ensure that there's no circularity problem in
doing that), but I doubt that going to the trouble of invoking the
amhandler just to figure out if it supports WARM is acceptable.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Partitioned tables and relfilenode

2017-03-20 Thread Robert Haas

On Fri, Mar 17, 2017 at 4:57 AM, Amit Langote
 wrote:
>> Yes, but on the flip side, you're having to add code in a lot of
>> places -- I think I counted 7 -- where you turn around and ignore
>> those AppendRelInfos.
>
> Perhaps you were looking at the previous version with "minimal" appinfos
> containing the child_is_partitioned field?

Yes, I think I was.  I think this version looks a lot better.

 /*
+ * Close the root partitioned rel if we opened it above, but keep the
+ * lock.
+ */
+if (rel != mtstate->resultRelInfo->ri_RelationDesc)
+heap_close(rel, NoLock);

We didn't take a lock above, though, so drop everything in the comment
from "but" onward.

-add_paths_to_append_rel(root, rel, live_childrels);
+add_paths_to_append_rel(root, rel, live_childrels, partitioned_rels);

I think it would make more sense to put the new logic into
add_paths_to_append_rel, instead of passing this down as an additional
parameter.

+ * do not appear anywhere else in the plan.  Situation is exactly the

The situation is...

+if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
+{
+foreach(lc, root->pcinfo_list)
+{
+PartitionedChildRelInfo *pc = lfirst(lc);
+
+if (pc->parent_relid == parentRTindex)
+{
+partitioned_rels = pc->child_rels;
+break;
+}
+}
+}

You seem to have a few copies of this logic.  I think it would be
worth factoring it out into a separate function.

+root->glob->nonleafResultRelations =
+list_concat(root->glob->nonleafResultRelations,
+list_copy(splan->partitioned_rels));

Please add a brief comment.  One line is fine.

+newrc->isParent = childrte->relkind == RELKIND_PARTITIONED_TABLE;

I'm not sure what project style is, but I personally find these kinds
of assignments easier to read with an extra set of parantheses:

newrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE);

+if (partitioned_rels == NIL)
+return;
+
+foreach(lc, partitioned_rels)

I think the if-test is pointless; the foreach loop is going to start
by comparing the initial value with NIL.

Why doesn't ExecSerializePlan() need to transfer a proper value for
nonleafResultRelations to workers?  Seems like it should.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Our feature change policy

2017-03-20 Thread David G. Johnston

On Mon, Mar 20, 2017 at 8:57 AM, Stephen Frost  wrote:

> Tom,
>
> * Tom Lane (t...@sss.pgh.pa.us) wrote:
> > Stephen Frost  writes:
> > > * Bruce Momjian (br...@momjian.us) wrote:
> > >> 1.  make the change now and mention it in the release notes
> > >> 2.  #1, but also provide backward compatibility for 5+ years
> > >> 3.  mark the feature as deprecated and remove/change it in 5+ years
> > >> 4.  #3, but issue a warning for deprecated usage
> >

> I was imagining the changes to pg_stat_activity as possibly falling
> under #3/change, meaning that we'd wait for 5 years before actually
> renaming those columns, which is part of why I was objecting to that
> idea.
>

Which ends up boiling down to:

A. Make a change and document it in the release notes

B. If applicable, and desired, provide a 5 year backward compatibility
deprecation period (i.e., 3 =>[implies] 2). Generally 2 => 3 but the
deprecation period is undefined.

C. Optionally, if deprecating, provide explicit warnings when the
deprecated feature is used

Guidelines for when to desire the 5 year period would be helpful.  And also
acknowledge that we may wish to choose a shorter period of time, or
institute immediate removal, at our discretion.

Nothing says we cannot go longer than 5 years but given our support policy
I would say that we'd rarely desired to do so intentionally - the burden of
proof falling to those who would want to keep something longer.

David J.

Re: [HACKERS] Partition-wise join for join between (declaratively) partitioned tables

2017-03-20 Thread Robert Haas

On Mon, Mar 20, 2017 at 9:44 AM, Ashutosh Bapat
 wrote:
> Right. If we could use parent Vars to indicate parent Var or child Var
> depending upon the context, a lot of memory issues would be solved; we
> wouldn't need to translate a single expression. But I think that's not
> straight forward. I have been thinking about some kind of polymorphic
> Var node, but it seems a lot more invasive change. Although, if we
> could get something like that, we would save a huge memory. :)

Yes, that's why I'm interested in exploring that approach once the
basic framework is in place here.

> I am wondering whether we need to change
> calc_non_nestloop_required_outer() similar to
> calc_nestloop_required_outer() just to keep their signatures in sync.

I haven't looked at the patch, but I don't think you need to worry about that.

> Should I work on completing reparamterized_path_by_child() to support
> all kinds of paths?

Yes, or at the very least all scans, like reparameterize_path() already does.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] WIP: Faster Expression Processing v4

2017-03-20 Thread Tom Lane

Andres Freund  writes:
> Additionally I added a regression test for the nearly entirely untested
> nodeTidscan.c, after I'd broken it previously without noticing (thanks
> Andreas).

I went ahead and pushed this part, since it seemed pretty uncontroversial.
I added a bit more stuff to get the LOC measurement up on the planner
side too.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Create replication slot in pg_basebackup if requested and not yet present

2017-03-20 Thread Michael Banck

Hi,

On Mon, Mar 20, 2017 at 02:42:32PM +0300, Arthur Zakirov wrote:
> Also maybe it would be good if pg_basebackup had a way to drop created slot.
> Although "drop slot" is not related with concept of automatically created
> slots, it will good if user will have a way to drop slots.

If you want to drop the slot after basebackup finishes you'd just use a
temporary slot (i.e. the default), or am I understanding your use-case
incorrectly?

I assumed the primary use-case for creating a non-temporary slot is to
keep it around while the standby is active.

Michael

-- 
Michael Banck
Projektleiter / Senior Berater
Tel.: +49 2166 9901-171
Fax:  +49 2166 9901-100
Email: michael.ba...@credativ.de

credativ GmbH, HRB Mönchengladbach 12080
USt-ID-Nummer: DE204566209
Trompeterallee 108, 41189 Mönchengladbach
Geschäftsführung: Dr. Michael Meskes, Jörg Folz, Sascha Heuer

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Our feature change policy

2017-03-20 Thread David G. Johnston

On Mon, Mar 20, 2017 at 9:18 AM, Bruce Momjian  wrote:

> .  #3 and #4 would need to be weighted depending on
> whether choosing them would delay progress, e.g. it did delay progress
> on standard-conforming strings, but the delay was determined to be
> reasonable.
>

w.r.t. standard-conforming strings to this day we are in indefinite
deprecation of the backward compatibility mechanics.  We simply made the
choice of whether to use the backward compatibility mode explicit when we
changed the GUC default.  For features with that possibility adding an "D.
Optionally, when applicable, maintain current behavior during release and
later switch the default behavior to the new mode after N years."  Odds are
if we are considering instituting item D we'd be content with discussing
the specifics of the case and not rely on any kind of rule or guideline to
define N.  As evidenced defaults and deprecation are orthogonal concerns.

David J.

[HACKERS] Re: [COMMITTERS] pgsql: Improve pg_dump regression tests and code coverage

2017-03-20 Thread Stephen Frost

Robert,

* Robert Haas (robertmh...@gmail.com) wrote:
> On Mon, Mar 20, 2017 at 8:33 AM, Stephen Frost  wrote:
> > * Robert Haas (robertmh...@gmail.com) wrote:
> >> So was this 3340 line patch posted or discussed anyplace before it got
> >> committed?
> >
> > I've mentioned a few times that I'm working on improving pg_dump
> > regression tests and code coverage, which is what these were.  I'm a bit
> > surprised that it's, apparently, a surprise to anyone or that strictly
> > adding regression tests in the existing framework deserves very much
> > discussion.
> 
> I'm not saying it does.  I'm saying that it's polite, and expected, to
> post patches and ask for opinions before committing things.

While I certainly agree with that when it comes to new features, changes
in work-flow, bug fixes and other things, I'm really not sure that
requiring posting to the list and waiting for responses every time
someone wants to add some regression tests is a useful way to spend
time.  While the patch looked big, a lot of that is just that the
current structure requires listing out every 'like' and 'unlike' set for
each test, which adds up.

In this particular case, I've been discussing these pg_dump regression
tests for months, as I've been going through fixing bugs found by them
and back-patching them.  I had time over this weekend to watch the
buildfarm and make sure that it didn't explode (in which case, I would
have reverted the patch immediately, of course).  I would have preferred
to commit these new tests in a more fine-grained fashion, but I kept
finding issues, which meant that commiting them earlier would have just
turned the buildfarm red, which wouldn't have been beneficial to anyone.

I'm quite pleased to see that, for the most part, the tests have been
successful on the buildfarm.

Thanks!

Stephen

signature.asc
Description: Digital signature

Re: [HACKERS] Partition-wise join for join between (declaratively) partitioned tables

2017-03-20 Thread Ashutosh Bapat

On Sat, Mar 18, 2017 at 5:40 AM, Robert Haas  wrote:
> On Fri, Mar 17, 2017 at 9:15 AM, Ashutosh Bapat
>  wrote:
>> This set of patches fixes both of those things.
>
> 0001 changes the purpose of a function and then 0007 renames it.  It
> would be better to include the renaming in 0001 so that you're not
> taking multiple whacks at the same function in the same patch series.

adjust_relid_set was renamed as adjust_child_relids() post
"extern"alising. I think, this comment is about that function. Done.

> I believe it would also be best to include 0011's changes to
> adjust_appendrel_attrs_multilevel in 0001.
>

The function needs to repeat the "adjustment" process for every
"other" relation (join or base) that it encounters, by testing using
OTHER_BASE_REL or OTHER_JOINREL in short IS_OTHER_REL(). The last
macros are added by the partition-wise join implementation patch 0005.
It doesn't make sense to add that macro in 0001 OR modify that
function twice, once in 0001 and then after 0005. So, I will leave it
to be part of 0011, where the changes are actually needed.

> 0002 should either add find_param_path_info() to the relevant header
> file as extern from the beginning, or it should declare and define it
> as static and then 0007 can remove those markings.  It makes no sense
> to declare it as extern but put the prototype in the .c file.

Done, added find_param_path_info() as an extern definition to start
with. I have also squashed 0001 and 0002 together, since they are both
refactoring patches and from your next mail about
reparameterize_path_by_child(), it seems that we are going to accept
the approach in that patch.

>
> 0004 still needs to be pared down.  If you want to get something
> committed this release cycle, you have to get these details taken care
> of, uh, more or less immediately.  Actually, preferably, several weeks
> ago.  You're welcome to maintain your own test suite locally but what
> you submit should be what you are proposing for commit -- or if not,
> then you should separate the part proposed for commit and the part
> included for dev testing into two different patches.
>

Done. Now SQL file has 325 lines and output has 1697 lines as against
515 and 4085 lines resp. earlier.

> In 0005's README, the part about planning partition-wise joins in two
> phases needs to be removed.

Done.

> This patch also contains a small change
> to partition_join.sql that belongs in 0004.

The reason I added the test patch prior to implementation was 1. for
me to make sure the tests that the queries run without the
optimization and the results they produce to catch any issues with
partitioning implementation. That would help someone looking at those
patches as well. 2. Once partitioning implementation patch was
applied, once could see the purpose of changes in two follow on
patches. Now that that purpose has served, I have reordered the
patches so that test patch comes after the implementation and follow
on fixes. If you still want to run the test before or after any of
those patches, you could apply the patch separately.

>
> 0008 removes direct tests against RELOPT_JOINREL almost everywhere,
> but it overlooks the new ones added to postgres_fdw.c by
> b30fb56b07a885f3476fe05920249f4832ca8da5.  It should be updated to
> cover those as well, I suspect.

Done.

deparseSubqueryTargetList() and some other functions are excluding
"other" base relation from the assertions. I guess, that's a problem.
Will submit a separate patch to fix this.

> The commit message claims that it
> will "Similarly replace RELOPT_OTHER_MEMBER_REL test with
> IS_OTHER_REL() where we want to test for child relations of all kinds,
> but in fact it makes exactly zero such substitutions.

The relevant changes have been covered by other commits. Removed this
line from the commit message.

>
> While I was studying what you did with reparameterize_path_by_child(),
> I started to wonder whether reparameterize_path() doesn't need to
> start handling join paths.  I think it only handles scan paths right
> now because that's the only thing that can appear under an appendrel
> created by inheritance expansion, but you're changing that.  Maybe
> it's not critical -- I think the worst consequences of missing some
> handling there is that we won't consider a parameterized path in some
> case where it would be advantageous to do so.  Still, you might want
> to investigate a bit.

Yes, we need to update reparameterize_path() for child-joins. A path
for child base relation gets reparameterized, if there exists a path
with that parameterization in at least one other child. The
parameterization bubbles up the join tree from base relations. So, if
a child required to be reparameterized, probably all its joins require
reparameterization, since that parameterization would bubble up the
child-join tree in which some other child participates. But as you
said it's an optimization and not a

Re: [HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Andrew Dunstan



On 03/20/2017 10:08 AM, Tom Lane wrote:
> I am *absolutely* not in favor of adding anything to the scripts' routine
> output, because it will just make this problem worse by bloating the
> buildfarm logs even more.  What I'd like to see is for the scripts to
> always report something along the lines of "this is what I got, this is
> what I expected to get" --- but only when there is a failure.  The less
> output there is from a successful test, the better, IMO.
>
>   



The problem in the current instance is that the error occurred before it
ever tried to run a test. It died in the setup code for the test set.

cheers

andrew



-- 

Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] logical decoding of two-phase transactions

2017-03-20 Thread Craig Ringer

On 20 March 2017 at 20:57, Stas Kelvich  wrote:
>
>> On 20 Mar 2017, at 15:17, Craig Ringer  wrote:
>>
>>> I thought about having special field (or reusing one of the existing fields)
>>> in snapshot struct to force filtering xmax > snap->xmax or xmin = snap->xmin
>>> as Petr suggested. Then this logic can reside in ReorderBufferCommit().
>>> However this is not solving problem with catcache, so I'm looking into it 
>>> right now.
>>
>> OK, so this is only an issue if we have xacts that change the schema
>> of tables and also insert/update/delete to their heaps. Right?
>>
>> So, given that this is CF3 for Pg10, should we take a step back and
>> impose the limitation that we can decode 2PC with schema changes or
>> data row changes, but not both?
>
> Yep, time is tight. I’ll try today/tomorrow to proceed with this two scan 
> approach.
> If I’ll fail to do that during this time then I’ll just update this patch to 
> decode
> only non-ddl 2pc transactions as you suggested.

I wasn't suggesting not decoding them, but giving the plugin the
option of whether to proceed with decoding or not.

As Simon said, have a pre-decode-prepared callback that lets the
plugin get a lock on the 2pc xact if it wants, or say it doesn't want
to decode it until it commits.

That'd be useful anyway, so we can filter and only do decoding at
prepare transaction time of xacts the downstream wants to know about
before they commit.

>>> Just as before I marking this transaction committed in snapbuilder, but 
>>> after
>>> decoding I delete this transaction from xip (which holds committed 
>>> transactions
>>> in case of historic snapshot).
>>
>> That seems kind of hacky TBH. I didn't much like marking it as
>> committed then un-committing it.
>>
>> I think it's mostly an interface issue though. I'd rather say
>> SnapBuildPushPrepareTransaction and SnapBuildPopPreparedTransaction or
>> something, to make it clear what we're doing.
>
> Yes, that will be less confusing. However there is no any kind of queue, so
> SnapBuildStartPrepare / SnapBuildFinishPrepare should work too.

Yeah, that's better.


-- 
 Craig Ringer   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Tom Lane

Stephen Frost  writes:
> * Andrew Dunstan (andrew.duns...@2ndquadrant.com) wrote:
>> ISTM that the test setup and breakdown code, both in individual tests
>> and in PostgresNode.pm  should be liberally sprinkled with diag() calls
>> to make it easier to narrow down errors..

> While I'm generally in favor of adding diag() info into the testing for
> when things go wrong, what I don't want to do is increase the amount of
> output that these tests produce without good cause.  I really wish there
> was a "quiet" mode for the TAP tests which didn't report anything when
> things are 'ok'.

FWIW, the problem I've got with the TAP tests is that when one fails
in the buildfarm, you've got to dig through megabytes of all-alike-looking
output just to try to determine which one failed; and once you do, you
still know nothing because the script output never really says why it
thinks there was a problem.  If you're lucky, you can identify the
postmaster log file(s) corresponding to the failed test script, and then
you can try to guess from the log entries what went wrong.

I am *absolutely* not in favor of adding anything to the scripts' routine
output, because it will just make this problem worse by bloating the
buildfarm logs even more.  What I'd like to see is for the scripts to
always report something along the lines of "this is what I got, this is
what I expected to get" --- but only when there is a failure.  The less
output there is from a successful test, the better, IMO.

regards, tom lane

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Re: [COMMITTERS] pgsql: Add TAP tests for password-based authentication methods.

2017-03-20 Thread Tom Lane

Heikki Linnakangas  writes:
> On 03/20/2017 02:32 AM, Peter Eisentraut wrote:
>> This is missing an entry for tmp_check/ in .gitignore.  But maybe we can
>> do that globally instead of repeating it in every directory?

> If we could also handle results and log globally, that would be nice. 
> But IMHO those names are too generic to put to a global .gitignore. We 
> could rename them, but this starts to feel like more trouble than it's 
> worth. I'll just go and create a .gitignore for /tmp_check/ to 
> src/test/authentication.

FWIW, that was my thought about it as well.  Handling tmp_check
differently from those other two just seems confusing.

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [COMMITTERS] pgsql: Improve pg_dump regression tests and code coverage

2017-03-20 Thread Robert Haas

On Mon, Mar 20, 2017 at 9:30 AM, Stephen Frost  wrote:
> While I certainly agree with that when it comes to new features, changes
> in work-flow, bug fixes and other things, I'm really not sure that
> requiring posting to the list and waiting for responses every time
> someone wants to add some regression tests is a useful way to spend
> time.

I'm not sure that arguing about whether patches are supposed to have
review and discussion before they're committed is a useful way to
spend time either.  I think most people here accept that as a
requirement. If you really don't understand the committing a
never-before-posted 3000+-line patch out of the blue three weeks after
the patch submission deadline is out of process, maybe you shouldn't
be committing things at all.  I'm glad that you are working on fixing
pg_dump bugs and improving test coverage, but my gladness about that
does not extend to thinking that the processes which other people
follow for their work should be waived for yours.  Sorry.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Re: [COMMITTERS] pgsql: Improve pg_dump regression tests and code coverage

2017-03-20 Thread Andres Freund

On 2017-03-20 10:35:15 -0400, Stephen Frost wrote:
> Robert,
> 
> * Robert Haas (robertmh...@gmail.com) wrote:
> > I'm glad that you are working on fixing
> > pg_dump bugs and improving test coverage, but my gladness about that
> > does not extend to thinking that the processes which other people
> > follow for their work should be waived for yours.  Sorry.
> 
> To be clear, I am not asking for any kind of special exception for
> myself.
> 
> I continue to be of the opinion that this entire discussion is quite
> flipped from how we really should be running things- adding regression
> tests to improve code coverage, particularly when they're simply adding
> to the existing structure for those tests, should be strongly encouraged 
> both before and after feature-freeze.

I don't think posting a preliminary patch, while continuing to polish,
with a note that you're working on that and plan to commit soon, would
slow you down that much.  There's pretty obviously a difference between
an added 10 line test, taking 30ms, and what you did here - and that
doesn't mean what you added is wrong or shouldn't be added.

And I don't think that expectation has anything to do with being
anti-test.

Greetings,

Andres Freund

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Stephen Frost

Alvaro,

* Alvaro Herrera (alvhe...@2ndquadrant.com) wrote:
> Stephen Frost wrote:
> > Is there any hope of getting a "quiet" mode, where all the "ok" lines
> > aren't printed when things work..?
> 
> Well, we currently have --verbose in PROVE_FLAGS.  Maybe you can take it
> out, or even add --quiet or --QUIET (see the prove(1) manpage).

Ok, yes, removing --verbose got rid of the per-test 'ok' lines, making
the output quite a bit nicer, at least to me.

Thanks!

Stephen


signature.asc
Description: Digital signature

Re: [HACKERS] Patch: Write Amplification Reduction Method (WARM)

2017-03-20 Thread Robert Haas

On Sun, Mar 19, 2017 at 3:05 AM, Pavan Deolasee
 wrote:
> On Thu, Mar 16, 2017 at 12:53 PM, Robert Haas  wrote:
>> On Wed, Mar 15, 2017 at 3:44 PM, Pavan Deolasee
>>  wrote:
>> > I couldn't find a better way without a lot of complex infrastructure.
>> > Even
>> > though we now have ability to mark index pointers and we know that a
>> > given
>> > pointer either points to the pre-WARM chain or post-WARM chain, this
>> > does
>> > not solve the case when an index does not receive a new entry. In that
>> > case,
>> > both pre-WARM and post-WARM tuples are reachable via the same old index
>> > pointer. The only way we could deal with this is to mark index pointers
>> > as
>> > "common", "pre-warm" and "post-warm". But that would require us to
>> > update
>> > the old pointer's state from "common" to "pre-warm" for the index whose
>> > keys
>> > are being updated. May be it's doable, but might be more complex than
>> > the
>> > current approach.
>>
>> /me scratches head.
>>
>> Aren't pre-warm and post-warm just (better) names for blue and red?
>>
>
> Yeah, sounds better.

My point here wasn't really about renaming, although I do think
renaming is something that should get done.  My point was that you
were saying we need to mark index pointers as common, pre-warm, and
post-warm.  But you're pretty much already doing that, I think.  I
guess you don't have "common", but you do have "pre-warm" and
"post-warm".

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] logical decoding of two-phase transactions

2017-03-20 Thread Stas Kelvich


> On 20 Mar 2017, at 16:39, Craig Ringer  wrote:
> 
> On 20 March 2017 at 20:57, Stas Kelvich  wrote:
>> 
>>> On 20 Mar 2017, at 15:17, Craig Ringer  wrote:
>>> 
 I thought about having special field (or reusing one of the existing 
 fields)
 in snapshot struct to force filtering xmax > snap->xmax or xmin = 
 snap->xmin
 as Petr suggested. Then this logic can reside in ReorderBufferCommit().
 However this is not solving problem with catcache, so I'm looking into it 
 right now.
>>> 
>>> OK, so this is only an issue if we have xacts that change the schema
>>> of tables and also insert/update/delete to their heaps. Right?
>>> 
>>> So, given that this is CF3 for Pg10, should we take a step back and
>>> impose the limitation that we can decode 2PC with schema changes or
>>> data row changes, but not both?
>> 
>> Yep, time is tight. I’ll try today/tomorrow to proceed with this two scan 
>> approach.
>> If I’ll fail to do that during this time then I’ll just update this patch to 
>> decode
>> only non-ddl 2pc transactions as you suggested.
> 
> I wasn't suggesting not decoding them, but giving the plugin the
> option of whether to proceed with decoding or not.
> 
> As Simon said, have a pre-decode-prepared callback that lets the
> plugin get a lock on the 2pc xact if it wants, or say it doesn't want
> to decode it until it commits.
> 
> That'd be useful anyway, so we can filter and only do decoding at
> prepare transaction time of xacts the downstream wants to know about
> before they commit.

Ah, got that. Okay.

> -- 
> Craig Ringer   http://www.2ndQuadrant.com/
> PostgreSQL Development, 24x7 Support, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Craig Ringer

On 20 Mar. 2017 22:10, "Tom Lane"  wrote:


FWIW, the problem I've got with the TAP tests is that when one fails
in the buildfarm, you've got to dig through megabytes of all-alike-looking
output just to try to determine which one failed; and once you do, you
still know nothing because the script output never really says why it
thinks there was a problem.


Yeah, it's not super helpful.

I'd like to enable Carp's features to use confess for traces, and switch
all use of die to that. We could learn a lot about unplanned-for test
failures where a test script dies rather than failing a test if we used
carp effectively.



  If you're lucky, you can identify the
postmaster log file(s) corresponding to the failed test script


What's the problem with doing that?

Maybe we need to structure the build farm output better. Send an archive of
tmp_check/logs/ or mime-multipart it or something, so it's all cleanly
split up.


I am *absolutely* not in favor of adding anything to the scripts' routine
output, because it will just make this problem worse by bloating the
buildfarm logs even more.  What I'd like to see is for the scripts to
always report something along the lines of "this is what I got, this is
what I expected to get" --- but only when there is a failure.


That's what they -should- do already, with 'ok', 'is', etc tests. Though
sometimes diagnostic output is useful we should be using 'note' to dump it
in the script's log, not in its main output. Whenever possible we should be
using TAP facilities to only emit it when there is a failure - most
usefully just by testing the test return code e.g.

if (!is(some_test, 1, 'test description')) {
  diag "useful info: $relevant_variable";
}

TAP has a diag like function that dumps data structures too, Data::Dumper
style.

Hm. Maybe the tap test readme needs a mini test writing style guide. Very
mini.

BTW I've used diag in a few and those should be either changed to note or
moved to post-fail. Will post a patch.



The less
output there is from a successful test, the better, IMO.


The trouble there is that we don't always know we're going to fail. Nor
will we always fail in a clean, anticipated way. A test script might die
because some setup it does fails with an unexpected ERROR for example.

That's why I think some diagnostic output is good. But it should definitely
be in the script logs not the main TAP output. And it should be moderate.

Re: [HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Alvaro Herrera

Stephen Frost wrote:

> Is there any hope of getting a "quiet" mode, where all the "ok" lines
> aren't printed when things work..?

Well, we currently have --verbose in PROVE_FLAGS.  Maybe you can take it
out, or even add --quiet or --QUIET (see the prove(1) manpage).

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [COMMITTERS] pgsql: Improve pg_dump regression tests and code coverage

2017-03-20 Thread Robert Haas

On Mon, Mar 20, 2017 at 10:35 AM, Stephen Frost  wrote:
> To be clear, I am not asking for any kind of special exception for
> myself.
>
> I continue to be of the opinion that this entire discussion is quite
> flipped from how we really should be running things- adding regression
> tests to improve code coverage, particularly when they're simply adding
> to the existing structure for those tests, should be strongly encouraged
> both before and after feature-freeze.

Any policy which permits a 3000 line code drop, whether to the
regression tests or otherwise, without prior discussion is, IMHO, a
very bad policy.  It's not as if regression tests never break anything
or cause any problems.  Do they need the same level of review as WARM
or rewriting the executor's expression evaluation?  No.  Does that
mean that they should totally bypass all of the review and discussion
that we do for other patches?  No.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] pageinspect and hash indexes

2017-03-20 Thread Ashutosh Sharma

On Mon, Mar 20, 2017 at 9:31 AM, Amit Kapila  wrote:
> On Sat, Mar 18, 2017 at 5:13 PM, Ashutosh Sharma  
> wrote:
>> On Sat, Mar 18, 2017 at 1:34 PM, Amit Kapila  wrote:
>>> On Sat, Mar 18, 2017 at 12:12 AM, Ashutosh Sharma  
>>> wrote:
 On Fri, Mar 17, 2017 at 10:54 PM, Jeff Janes  wrote:
> While trying to figure out some bloating in the newly logged hash indexes,
> I'm looking into the type of each page in the index.  But I get an error:
>
> psql -p 9876 -c "select hash_page_type(get_raw_page('foo_index_idx',x)) 
> from
> generate_series(1650,1650) f(x)"
>
> ERROR:  page is not a hash page
> DETAIL:  Expected ff80, got .
>
> The contents of the page are:
>
> \xa400d8f203bf65c91800f01ff01f0420...
>
> (where the elided characters at the end are all zero)
>
> What kind of page is that actually?

 it is basically either a newly allocated bucket page or a freed overflow 
 page.

>>>
>>> What makes you think that it can be a newly allocated page?
>>> Basically, we always initialize the special space of newly allocated
>>> page, so not sure what makes you deduce that it can be newly allocated
>>> page.
>>
>> I came to know this from the following experiment.
>>
>> I  created a hash index and kept on inserting data in it till the split 
>> happens.
>>
>> When split happened, I could see following values for firstblock and
>> lastblock in _hash_alloc_buckets()
>>
>> Breakpoint 1, _hash_alloc_buckets (rel=0x7f6ac951ee30, firstblock=34,
>> nblocks=32) at hashpage.c:993
>> (gdb) n
>> (gdb) pfirstblock
>> $15 = 34
>> (gdb) pnblocks
>> $16 = 32
>> (gdb) n
>> (gdb) plastblock
>> $17 = 65
>>
>> AFAIU, this bucket split resulted in creation of new bucket pages from
>> block number 34 to 65.
>>
>> The contents for metap are as follows,
>>
>> (gdb) p*metap
>> $18 = {hashm_magic = 105121344,hashm_version = 2, hashm_ntuples =
>> 2593, hashm_ffactor = 81, hashm_bsize = 8152, hashm_bmsize = 4096,
>> hashm_bmshift = 15,
>>   hashm_maxbucket = 32,hashm_highmask = 63, hashm_lowmask = 31,
>> hashm_ovflpoint = 6, hashm_firstfree = 0, hashm_nmaps = 1,
>> hashm_procid = 450,
>>   hashm_spares = {0, 0,0, 0, 0, 1, 1, 0 },
>> hashm_mapp = {33,0 }}
>>
>> Now, if i try to check the page type for block number 65, this is what i see,
>>
>> test=# select * from hash_page_type(get_raw_page('con_hash_index', 65));
>> ERROR:  page is not a hash page
>> DETAIL:  Expected ff80, got .
>> test=#
>>
>
> The contents of such a page should be zero and Jeff has reported some
> valid-looking contents of the page.  If you see this page contents as
> zero, then we can conclude what Jeff is seeing was an freed overflow
> page.

As shown in the mail thread-[1], the contents of metapage are,

(gdb) p*metap
$18 = {hashm_magic = 105121344,hashm_version = 2, hashm_ntuples
=2593, hashm_ffactor = 81, hashm_bsize = 8152, hashm_bmsize = 4096,
hashm_bmshift = 15, hashm_maxbucket = 32,hashm_highmask = 63,
hashm_lowmask = 31, hashm_ovflpoint = 6, hashm_firstfree = 0,
hashm_nmaps = 1,
hashm_procid = 450, hashm_spares = {0, 0,0, 0, 0, 1, 1, 0 }, hashm_mapp = {33,0 }}

postgres=# select spares from
hash_metapage_info(get_raw_page('con_hash_index', 0));
  spares
---
 {0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
(1 row)

Here, if you see the spare page count  is just 1 which corresponds to
bitmap page. So, that means there is no overflow page in hash index
table and neither I have ran any DELETE statements in my experiment
that would result in freeing an overflow page.

Also, the page header content for such a page is,

$9 = {pd_lsn = {xlogid = 0, xrecoff = 23638120}, pd_checksum = 0,
pd_flags = 0,pd_lower = 24, pd_upper = 8176,pd_special = 8176,
  pd_pagesize_version = 8196, pd_prune_xid = 0,pd_linp = 0x1f3aa88}

>From values of pd_lower and pd_upper it is clear that it is an empty page.

The content of this page is,

\xb0308a011800f01ff01f042000.

I think it is not just happening for freed overflow but also for newly
allocated bucket page. It would be good if we could mark  freed
overflow page as UNUSED page rather than just initialising it's header
portion and leaving the page type in special area as it is. Attached
is the patch '0001-mark_freed_ovflpage_as_UNUSED_pagetype.patch' that
marks a freed overflow page as an unused page.

Also, I have now changed pageinspect module to handle unused and empty
hash index page. Attached is the patch
(0002-allow_pageinspect_handle_UNUSED_OR_EMPTY_hash_pages.patch) for
the same.

[1] - 
https://www.postgresql.org/message-id/CAE9k0P%3DN%2BJjzqnHqrURE7ZQMgySRpv%3DBkjafbz%3DpeD4cbCgbhA%40mail.gmail.com

--

Re: [HACKERS] Inadequate traces in TAP tests

2017-03-20 Thread Alvaro Herrera

Stephen Frost wrote:
> Andrew,

> * Andrew Dunstan (andrew.duns...@2ndquadrant.com) wrote:

> > ISTM that the test setup and breakdown code, both in individual tests
> > and in PostgresNode.pm  should be liberally sprinkled with diag() calls
> > to make it easier to narrow down errors..
> 
> While I'm generally in favor of adding diag() info into the testing for
> when things go wrong, what I don't want to do is increase the amount of
> output that these tests produce without good cause.  I really wish there
> was a "quiet" mode for the TAP tests which didn't report anything when
> things are 'ok'.

That's diag's idea; you use it like
"ok() or diag('failed because of snow')".
so nothing is printed unless there is a problem.  You're not supposed to
call it unconditionally.

Something that would probably be helpful would be to put the server log
lines corresponding to the failure in diag(); for example we could keep
the log file open, do a seek(SEEK_END) just before running the test, and
reading from that point onwards; probably stop reading after 5 lines or
so.  They wouldn't be output unless there is a failure.  (Of course,
this'd have to be done in the harness, not the test itself, to avoid
cluttering already ugly individual test files.)

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] [PATCH] Incremental sort

2017-03-20 Thread Heikki Linnakangas


On 03/20/2017 11:33 AM, Alexander Korotkov wrote:

Please, find rebased patch in the attachment.


I had a quick look at this.

* I'd love to have an explanation of what an Incremental Sort is, in the 
file header comment for nodeIncrementalSort.c.


* I didn't understand the maxMem stuff in tuplesort.c. The comments 
there use the phrase "on-disk memory", which seems like an oxymoron. 
Also, "maximum status" seems weird, as it assumes that there's a natural 
order to the states.


* In the below example, the incremental sort is significantly slower 
than the Seq Scan + Sort you get otherwise:


create table foo (a int4, b int4, c int4);
insert into sorttest select g, g, g from generate_series(1, 100) g;
vacuum foo;
create index i_sorttest on sorttest (a, b, c);
set work_mem='100MB';


postgres=# explain select count(*) from (select * from sorttest order by 
a, c) as t;
  QUERY PLAN 


---
 Aggregate  (cost=138655.68..138655.69 rows=1 width=8)
   ->  Incremental Sort  (cost=610.99..124870.38 rows=1102824 width=12)
 Sort Key: sorttest.a, sorttest.c
 Presorted Key: sorttest.a
 ->  Index Only Scan using i_sorttest on sorttest 
(cost=0.43..53578.79 rows=1102824 width=12)

(5 rows)

Time: 0.409 ms
postgres=# select count(*) from (select * from sorttest order by a, c) as t;
  count
-
 100
(1 row)

Time: 387.091 ms


postgres=# explain select count(*) from (select * from sorttest order by 
a, c) as t;
  QUERY PLAN 


---
 Aggregate  (cost=130063.84..130063.85 rows=1 width=8)
   ->  Sort  (cost=115063.84..117563.84 rows=100 width=12)
 Sort Key: sorttest.a, sorttest.c
 ->  Seq Scan on sorttest  (cost=0.00..15406.00 rows=100 
width=12)

(4 rows)

Time: 0.345 ms
postgres=# select count(*) from (select * from sorttest order by a, c) as t;
  count
-
 100
(1 row)

Time: 231.668 ms

According to 'perf', 85% of the CPU time is spent in ExecCopySlot(). To 
alleviate that, it might be worthwhile to add a special case for when 
the group contains exactly one group, and not put the tuple to the 
tuplesort in that case. Or if we cannot ensure that the Incremental Sort 
is actually faster, the cost model should probably be smarter, to avoid 
picking an incremental sort when it's not a win.


- Heikki



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

1 2 >

1 - 100 of 126 matches

Mail list logo