Re: Add pg_file_sync() to adminpack

2020-01-23 Thread Julien Rouhaud
On Fri, Jan 24, 2020 at 8:20 AM Fujii Masao  wrote:
>
> On 2020/01/24 15:38, Arthur Zakirov wrote:
> > On 2020/01/24 14:56, Michael Paquier wrote:
> >> On Fri, Jan 24, 2020 at 01:28:29PM +0900, Arthur Zakirov wrote:
> >>> It is compiled and passes the tests. There is the documentation and
> >>> it is
> >>> built too without an error.
> >>>
> >>> It seems that consensus about the returned type was reached and I
> >>> marked the
> >>> patch as "Ready for Commiter".
> >>
> >> +   fsync_fname_ext(filename, S_ISDIR(fst.st_mode), false, ERROR);
> >> One comment here: should we warn better users in the docs that a fsync
> >> failule will not trigger a PANIC here?  Here, fsync failure on heap
> >> file => ERROR => potential data corruption.
> >
> > Ah, true. It is possible to add couple sentences that pg_file_sync()
> > doesn't depend on data_sync_retry GUC and doesn't raise a PANIC even for
> > database files.
>
> Thanks all for the review!
>
> So, what about the attached patch?
> In the patch, I added the following note to the doc.
>
> 
> Note that
>  has no effect on this function,
> and therefore a PANIC-level error will not be raised even on failure to
> flush database files.
> 

We should explicitly mention that this can cause corruption.  How  about:


Note that
 has no effect on this function,
and therefore a PANIC-level error will not be raised even on failure to
flush database files.  If that happens, the underlying database
objects may be corrupted.





Re: Add pg_file_sync() to adminpack

2020-01-23 Thread Fujii Masao



On 2020/01/24 15:38, Arthur Zakirov wrote:

On 2020/01/24 14:56, Michael Paquier wrote:

On Fri, Jan 24, 2020 at 01:28:29PM +0900, Arthur Zakirov wrote:
It is compiled and passes the tests. There is the documentation and 
it is

built too without an error.

It seems that consensus about the returned type was reached and I 
marked the

patch as "Ready for Commiter".


+   fsync_fname_ext(filename, S_ISDIR(fst.st_mode), false, ERROR);
One comment here: should we warn better users in the docs that a fsync
failule will not trigger a PANIC here?  Here, fsync failure on heap
file => ERROR => potential data corruption.


Ah, true. It is possible to add couple sentences that pg_file_sync() 
doesn't depend on data_sync_retry GUC and doesn't raise a PANIC even for 
database files.


Thanks all for the review!

So, what about the attached patch?
In the patch, I added the following note to the doc.


Note that
 has no effect on this function,
and therefore a PANIC-level error will not be raised even on failure to
flush database files.



Regards,

--
Fujii Masao
NTT DATA CORPORATION
Advanced Platform Technology Group
Research and Development Headquarters
diff --git a/contrib/adminpack/Makefile b/contrib/adminpack/Makefile
index 9a464b11bc..630fea7726 100644
--- a/contrib/adminpack/Makefile
+++ b/contrib/adminpack/Makefile
@@ -7,7 +7,8 @@ OBJS = \
 PG_CPPFLAGS = -I$(libpq_srcdir)
 
 EXTENSION = adminpack
-DATA = adminpack--1.0.sql adminpack--1.0--1.1.sql adminpack--1.1--2.0.sql
+DATA = adminpack--1.0.sql adminpack--1.0--1.1.sql adminpack--1.1--2.0.sql\
+   adminpack--2.0--2.1.sql
 PGFILEDESC = "adminpack - support functions for pgAdmin"
 
 REGRESS = adminpack
diff --git a/contrib/adminpack/adminpack--2.0--2.1.sql 
b/contrib/adminpack/adminpack--2.0--2.1.sql
new file mode 100644
index 00..1c6712e816
--- /dev/null
+++ b/contrib/adminpack/adminpack--2.0--2.1.sql
@@ -0,0 +1,17 @@
+/* contrib/adminpack/adminpack--2.0--2.1.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION adminpack UPDATE TO '2.1'" to load this file. \quit
+
+/* ***
+ * Administrative functions for PostgreSQL
+ * *** */
+
+/* generic file access functions */
+
+CREATE OR REPLACE FUNCTION pg_catalog.pg_file_sync(text)
+RETURNS void
+AS 'MODULE_PATHNAME', 'pg_file_sync'
+LANGUAGE C VOLATILE STRICT;
+
+REVOKE EXECUTE ON FUNCTION pg_catalog.pg_file_sync(text) FROM PUBLIC;
diff --git a/contrib/adminpack/adminpack.c b/contrib/adminpack/adminpack.c
index 710f4ea32d..7b5a531e08 100644
--- a/contrib/adminpack/adminpack.c
+++ b/contrib/adminpack/adminpack.c
@@ -43,6 +43,7 @@ PG_MODULE_MAGIC;
 
 PG_FUNCTION_INFO_V1(pg_file_write);
 PG_FUNCTION_INFO_V1(pg_file_write_v1_1);
+PG_FUNCTION_INFO_V1(pg_file_sync);
 PG_FUNCTION_INFO_V1(pg_file_rename);
 PG_FUNCTION_INFO_V1(pg_file_rename_v1_1);
 PG_FUNCTION_INFO_V1(pg_file_unlink);
@@ -215,6 +216,30 @@ pg_file_write_internal(text *file, text *data, bool 
replace)
return (count);
 }
 
+/* 
+ * pg_file_sync
+ *
+ * We REVOKE EXECUTE on the function from PUBLIC.
+ * Users can then grant access to it based on their policies.
+ */
+Datum
+pg_file_sync(PG_FUNCTION_ARGS)
+{
+   char   *filename;
+   struct stat fst;
+
+   filename = convert_and_check_filename(PG_GETARG_TEXT_PP(0), false);
+
+   if (stat(filename, ) < 0)
+   ereport(ERROR,
+   (errcode_for_file_access(),
+errmsg("could not stat file \"%s\": %m", 
filename)));
+
+   fsync_fname_ext(filename, S_ISDIR(fst.st_mode), false, ERROR);
+
+   PG_RETURN_VOID();
+}
+
 /* 
  * pg_file_rename - old version
  *
diff --git a/contrib/adminpack/adminpack.control 
b/contrib/adminpack/adminpack.control
index 12569dcdd7..ae35d22156 100644
--- a/contrib/adminpack/adminpack.control
+++ b/contrib/adminpack/adminpack.control
@@ -1,6 +1,6 @@
 # adminpack extension
 comment = 'administrative functions for PostgreSQL'
-default_version = '2.0'
+default_version = '2.1'
 module_pathname = '$libdir/adminpack'
 relocatable = false
 schema = pg_catalog
diff --git a/contrib/adminpack/expected/adminpack.out 
b/contrib/adminpack/expected/adminpack.out
index 8747ac69a2..5738b0f6c4 100644
--- a/contrib/adminpack/expected/adminpack.out
+++ b/contrib/adminpack/expected/adminpack.out
@@ -56,6 +56,21 @@ RESET ROLE;
 REVOKE EXECUTE ON FUNCTION pg_file_write(text,text,bool) FROM regress_user1;
 REVOKE pg_read_all_settings FROM regress_user1;
 DROP ROLE regress_user1;
+-- sync
+SELECT pg_file_sync('test_file1'); -- sync file
+ pg_file_sync 
+--
+ 
+(1 row)
+
+SELECT pg_file_sync('pg_stat'); -- sync directory
+ pg_file_sync 
+--
+ 
+(1 row)
+
+SELECT pg_file_sync('test_file2'); -- not there
+ERROR:  

Re: [PATCH] Windows port, fix some resources leaks

2020-01-23 Thread Michael Paquier
On Wed, Jan 22, 2020 at 05:51:51PM -0300, Ranier Vilela wrote:
> After review the patches and build all and run regress checks for each
> patch, those are the ones that don't break.

There is some progress.  You should be careful about your patches,
as they generate compiler warnings.  Here is one quote from gcc-9:
logging.c:87:13: warning: passing argument 1 of ‘free’ discards 
‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers]
   87 |free(sgr_warning);
But there are others.

if (strcmp(name, "error") == 0)
+   {
+   free(sgr_error);
sgr_error = strdup(value);
+   }
I don't see the point of doing that in logging.c.  pg_logging_init()
is called only once per tools, so this cannot happen.  Another point
that may matter here though is that we do not complain about OOMs.
That's really unlikely to happen, and if it happens it leads to
partially colored output.

-   NOERR();
+   if (ISERR())
+   {
+   freedfa(s);
+   return v->err;
+   }
Can you design a query where this is a problem?

pg_log_error("could not allocate SIDs: error code %lu",
GetLastError());
+   CloseHandle(origToken);
+   FreeLibrary(Advapi32Handle);
[...]
pg_log_error("could not open process token: error code %lu",
GetLastError());
+   FreeLibrary(Advapi32Handle);
return 0;
For those two ones, it looks that you are right.  However, I think
that it would be safer to check if Advapi32Handle is NULL for both.

@@ -187,6 +190,7 @@ get_restricted_token(void)
}
exit(x);
}
+   free(cmdline);
Anything allocated with pg_strdup() should be free'd with pg_free(),
that's a matter of consistency.

+++ b/src/backend/postmaster/postmaster.c
@@ -4719,6 +4719,8 @@ retry:
if (cmdLine[sizeof(cmdLine) - 2] != '\0')
{
elog(LOG, "subprocess command line too long");
+   UnmapViewOfFile(param);
+   CloseHandle(paramHandle);
The three ones in postmaster.c are correct guesses. 

+   if (sspictx != NULL)
+   {
+   DeleteSecurityContext(sspictx);
+   free(sspictx);
+   }
+   FreeCredentialsHandle();
This stuff is correctly free'd after calling AcceptSecurityContext()
in the SSPI code, but not the two other code paths.  Looks right.
Actually, for the first one, wouldn't it be better to free those
resources *before* ereport(ERROR) on ERRCODE_PROTOCOL_VIOLATION?
That's an authentication path so it does not really matter but..

ldap_unbind(*ldap);
+   FreeLibrary(ldaphandle);
return STATUS_ERROR;
Yep.  That's consistent to clean up.

+   if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
+   elog(FATAL, "failed to release reserved memory region
(addr=%p): error code %lu",
+   ShmemProtectiveRegion, GetLastError());
return false;
No, that's not right.  I think that it is possible to loop over
ShmemProtectiveRegion in some cases.  And actually, your patch is dead
wrong because this is some code called by the postmaster and it cannot
use FATAL.

> Not all leaks detected by Coverity are fixed.

Coverity is a static analyzer, it misses a lot of things tied to the
context of the code, so you need to take its suggestions with a pinch
of salt.
--
Michael


signature.asc
Description: PGP signature


Re: range_agg

2020-01-23 Thread Pavel Stehule
Hi

st 22. 1. 2020 v 0:55 odesílatel Paul A Jungwirth <
p...@illuminatedcomputing.com> napsal:

> On Sun, Jan 19, 2020 at 9:57 PM Paul A Jungwirth
>  wrote:
> > On Sun, Jan 19, 2020 at 4:38 PM Tom Lane  wrote:
> > > True for casts involving concrete types, mainly because we'd like
> > > the identity "value::typename == typename(value)" to hold without
> > > too much worry about whether the latter is a plain function call
> > > or a special case.  Not sure whether it makes as much sense for
> > > polymorphics, since casting to a polymorphic type is pretty silly:
> > > we do seem to allow you to do that, but it's a no-op.
> > >
> > > ...
> > >
> > > Alternatively, consider this: a cast from some concrete multirange type
> > > to anymultirange is a no-op, while any other sort of cast probably
> ought
> > > to be casting to some particular concrete multirange type.  That would
> > > line up with the existing operations for plain ranges.
> >
> > I agree you wouldn't actually cast by saying x::anymultirange, and the
> > casts we define are already concrete, so instead you'd say
> > x::int4multirange. But I think having a polymorphic function to
> > convert from an anyrange to an anymultirange is useful so you can
> > write generic functions. I can see how calling it "anymultirange" may
> > be preferring the implementor perspective over the user perspective
> > though, and how simply "multirange" would be more empathetic. I don't
> > mind taking that approach.
>
> Here is a patch with anymultirange(anyrange) renamed to
> multirange(anyrange). I also rebased on the latest master, added
> documentation about the multirange(anyrange) function, and slightly
> adjusted the formatting of the range functions table.
>

I think so this patch is ready for commiter.

All tests passed, the doc is good enough (the chapter name "Range functions
and Operators" should be renamed to "Range/multirange functions and
Operators"
The code formatting and comments looks well


Thank you for your work

Regards

Pavel


> Thanks,
> Paul
>


Patching documentation of ALTER TABLE re column type changes on binary-coercible fields

2020-01-23 Thread Mike Lissner
Hi, first patch here and first post to pgsql-hackers. Here goes.

Enclosed please find a patch to tweak the documentation of the ALTER TABLE
page. I believe this patch is ready to be applied to master and backported
all the way to 9.2.

On the ALTER TABLE page, it currently notes that if you change the type of
a column, even to a binary coercible type:

> any indexes on the affected columns must still be rebuilt.

It appears this hasn't been true for about eight years, since 367bc426a.

Here's the discussion of the topic from earlier today and yesterday:

https://www.postgresql.org/message-id/flat/CAMp9%3DExXtH0NeF%2BLTsNrew_oXycAJTNVKbRYnqgoEAT01t%3D67A%40mail.gmail.com

I haven't run tests, but I presume they'll be unaffected by a documentation
change.

I've made an effort to follow the example of other people's patches I
looked at, but I haven't contributed here before. Happy to take another
stab at this if this doesn't hit the mark — though I hope it does. I love
and appreciate Postgresql and hope that I can do my little part to make it
better.

For the moment, I haven't added this to commitfest. I don't know what it
is, but I suspect this is small enough somebody will just pick it up.

Mike
Index: doc/src/sgml/ref/alter_table.sgml
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===
--- doc/src/sgml/ref/alter_table.sgml	(revision 6de7bcb76f6593dcd107a6bfed645f2142bf3225)
+++ doc/src/sgml/ref/alter_table.sgml	(revision 9a813e0896e828900739d95f78b5e4be10dac365)
@@ -1225,10 +1225,9 @@
 existing column, if the USING clause does not change
 the column contents and the old type is either binary coercible to the new
 type or an unconstrained domain over the new type, a table rewrite is not
-needed; but any indexes on the affected columns must still be rebuilt.
-Table and/or index rebuilds may take a
-significant amount of time for a large table; and will temporarily require
-as much as double the disk space.
+needed. Table and/or index rebuilds may take a significant amount of time
+for a large table; and will temporarily require as much as double the disk
+space.

 



Re: Add pg_file_sync() to adminpack

2020-01-23 Thread Arthur Zakirov

On 2020/01/24 14:56, Michael Paquier wrote:

On Fri, Jan 24, 2020 at 01:28:29PM +0900, Arthur Zakirov wrote:

It is compiled and passes the tests. There is the documentation and it is
built too without an error.

It seems that consensus about the returned type was reached and I marked the
patch as "Ready for Commiter".


+   fsync_fname_ext(filename, S_ISDIR(fst.st_mode), false, ERROR);
One comment here: should we warn better users in the docs that a fsync
failule will not trigger a PANIC here?  Here, fsync failure on heap
file => ERROR => potential data corruption.


Ah, true. It is possible to add couple sentences that pg_file_sync() 
doesn't depend on data_sync_retry GUC and doesn't raise a PANIC even for 
database files.


--
Arthur




Re: Add pg_file_sync() to adminpack

2020-01-23 Thread Julien Rouhaud
On Fri, Jan 24, 2020 at 6:56 AM Michael Paquier  wrote:
>
> On Fri, Jan 24, 2020 at 01:28:29PM +0900, Arthur Zakirov wrote:
> > It is compiled and passes the tests. There is the documentation and it is
> > built too without an error.
> >
> > It seems that consensus about the returned type was reached and I marked the
> > patch as "Ready for Commiter".
>
> +   fsync_fname_ext(filename, S_ISDIR(fst.st_mode), false, ERROR);
> One comment here: should we warn better users in the docs that a fsync
> failule will not trigger a PANIC here?  Here, fsync failure on heap
> file => ERROR => potential data corruption.

Definitely yes.




Re: Add pg_file_sync() to adminpack

2020-01-23 Thread Michael Paquier
On Fri, Jan 24, 2020 at 01:28:29PM +0900, Arthur Zakirov wrote:
> It is compiled and passes the tests. There is the documentation and it is
> built too without an error.
> 
> It seems that consensus about the returned type was reached and I marked the
> patch as "Ready for Commiter".

+   fsync_fname_ext(filename, S_ISDIR(fst.st_mode), false, ERROR);
One comment here: should we warn better users in the docs that a fsync
failule will not trigger a PANIC here?  Here, fsync failure on heap
file => ERROR => potential data corruption.
--
Michael


signature.asc
Description: PGP signature


Re: BUG #16109: Postgres planning time is high across version (Expose buffer usage during planning in EXPLAIN)

2020-01-23 Thread Justin Pryzby
On Wed, Nov 13, 2019 at 11:39:04AM +0100, Julien Rouhaud wrote:
> (moved to -hackers)
> 
> On Tue, Nov 12, 2019 at 9:55 PM Andres Freund  wrote:
> >
> > This last point is more oriented towards other PG developers: I wonder
> > if we ought to display buffer statistics for plan time, for EXPLAIN
> > (BUFFERS). That'd surely make it easier to discern cases where we
> > e.g. access the index and scan a lot of the index from cases where we
> > hit some CPU time issue. We should easily be able to get that data, I
> > think, we already maintain it, we'd just need to compute the diff
> > between pgBufferUsage before / after planning.
> 
> That would be quite interesting to have.  I attach as a reference a
> quick POC patch to implement it:

+1

+   result.shared_blks_hit = stop->shared_blks_hit - start->shared_blks_hit;
+   result.shared_blks_read = stop->shared_blks_read - 
start->shared_blks_read;
+   result.shared_blks_dirtied = stop->shared_blks_dirtied -
+   start->shared_blks_dirtied;
[...]

I think it would be more readable and maintainable using a macro:

#define CALC_BUFF_DIFF(x) result.##x = stop->##x - start->##x
CALC_BUFF_DIFF(shared_blks_hit);
CALC_BUFF_DIFF(shared_blks_read);
CALC_BUFF_DIFF(shared_blks_dirtied);
...
#undefine CALC_BUFF_DIFF





Re: [Patch] pg_rewind: options to use restore_command from recovery.conf or command line

2020-01-23 Thread Michael Paquier
On Wed, Jan 22, 2020 at 12:55:30AM +0300, Alexander Korotkov wrote:
> Thank you for your review!
> The revised patch is attached.

Thanks for the new version.

> On Sun, Jan 19, 2020 at 1:24 PM Michael Paquier  wrote:
>> +static int
>> +RestoreArchivedWALFile(const char *path, const char *xlogfname,
>> +  off_t expectedSize, const char *restoreCommand)
>> Not a fan of putting that to pg_rewind/parsexlog.c.  It has nothing to
>> do with WAL parsing, and it seems to me that we could have an argument
>> for making this available as a frontend-only API in src/common/.
> 
> I've put it into src/common/fe_archive.c.

This split makes sense.  You have forgotten to update
@pgcommonfrontendfiles in Mkvcbuild.pm for the MSVC build.  Still, I
can see that we have a lot of duplication between the code of the
frontend and the backend, which is annoying..  The execution part is
tricky to split because the backend has pre- and post- callbacks, the
interruption handling is not the same and there is the problem of
elog() calls, with elevel that can be changed depending on the backend
configuration.  However, I can see one portion of the code which is
fully duplicated and could be refactored out: the construction of the
command to execute, by having in input the restore_command string and
the arguments that we expect to replace in the '%' markers which are
part of the command.  If NULL is specified for one of those values,
then the construction routine returns NULL, synonym of failure.  And
the result of the routine is the built command.  I think that you
would need an extra argument to give space for the error message
generated in the event of an error when building the command.

>> Do we actually need --target-restore-command at all?  It seems to me
>> that we have all we need with --restore-target-wal, and that's not
>> really instinctive to pass down a command via another command..
> 
> I was going to argue that --restore-target-wal is useful.  But I
> didn't find convincing example for that.  Naturally, if one uses
> restore_command during pg_rewind then why the same restore_command
> can't be used in new standby.  So, I've removed --restore-target-wal
> argument.  If we will find convincing use case we can return it any
> moment.

Okay.  Let's consider it if it makes sense.  You can always go through
this restriction by first setting restore_command in the target
cluster, and then run pg_rewind.  And I would think that someone is
going to use the same restore_command even after restarting the
rewound target cluster.

>> +   # Add restore_command to postgresql.conf of target cluster.
>> +   open(my $conf_fd, ">>", $master_conf_path) or die;
>> +   print $conf_fd "\nrestore_command='$restore_command'";
>> +   close $conf_fd;
>> We have append_conf() for that.
> 
> Sure, thank you for pointing!

+++ b/src/include/common/fe_archive.h
+#ifndef ARCHIVE_H
+#define ARCHIVE_H
This should be FE_ARCHIVE_H.

-   while ((c = getopt_long(argc, argv, "D:nNPR", long_options,
_index)) != -1)
+   while ((c = getopt_long(argc, argv, "D:nNPRC:c", long_options,
_index)) != -1)
This still includes 'C', but that should not be the case.

+   pg_rewind with the
-c or
+   -C option to automatically retrieve them from
the WAL
[...]
+  -C restore_command
+  --target-restore-command=restore_command
[...]
+  available, try re-running pg_rewind with
+  the -c or -C option to search
+  for the missing files in the WAL archive.
Three places in the docs need to be cleaned up.

Do we really need to test the new "archive" mode as well for
003_extrafiles and 002_databases?  I would imagine that only 001_basic
is enough. 

+# Moves WAL files to the temporary location and returns restore_command
+# to get them back.
+sub move_wal
+{
+   my ($tmp_dir, $master_pgdata) = @_;
+   my $wal_archive_path = "$tmp_dir/master_wal_archive";
+   my $wal_path = "$master_pgdata/pg_wal";
+   my $wal_dir;
+   my $restore_command;
I think that this routine is wrongly designed.  First, in order to
copy the contents from one path to another, we have
RecursiveCopy::copy_path, and there is a long history about making 
it safe for the TAP tests.  Second, there is in
PostgresNode::enable_restoring already a code path doing the
decision-making on how building restore_command.  We should keep this
code path unique.
--
Michael


signature.asc
Description: PGP signature


Re: Error message inconsistency

2020-01-23 Thread Amit Kapila
On Thu, Jan 23, 2020 at 5:51 PM Mahendra Singh Thalor
 wrote:
>
> I fixed above comment and updated expected .out files. Attaching
> updated patches.
>

LGTM.  I have combined them into the single patch.  What do we think
about backpatching this?  As there are quite a few changes in the
regression tests, so it might be a good idea to keep the back branches
code in sync, but, OTOH, this is more of a change related to providing
more information, so we don't have any pressing need to backpatch
this.  What do others think?

One thing to note is that there are places in code where we use
'table' instead of 'relation' for the same thing in the error messages
as seen in the below places (the first one uses 'relation', the second
one uses 'table') and the patch is using 'relation' which I think is
fine.

1. src/backend/executor/execPartition.c
342 ereport(ERROR,
 343 (errcode(ERRCODE_CHECK_VIOLATION),
 344  errmsg("no partition of relation \"%s\"
found for row",
 345 RelationGetRelationName(rel)),
 346  val_desc ?
 347  errdetail("Partition key of the failing row
contains %s.",
 348val_desc) : 0));


2. src/backend/commands/typecmds.c
2396 ereport(ERROR,
2397 (errcode(ERRCODE_NOT_NULL_VIOLATION),
2398  errmsg("column \"%s\" of table
\"%s\" contains null values",
2399 NameStr(attr->attname),
2400 RelationGetRelationName(testrel)),
2401  errtablecol(testrel, attnum)));

-- 
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


v5-0001-Added-relation-name-in-error-messages-for-constraint.patch
Description: Binary data


Re: proposal: schema variables

2020-01-23 Thread Pavel Stehule
st 22. 1. 2020 v 0:41 odesílatel Tomas Vondra 
napsal:

> Hi,
>
> I did a quick review of this patch today, so let me share a couple of
> comments.
>
> Firstly, the patch is pretty large - it has ~270kB. Not the largest
> patch ever, but large. I think breaking the patch into smaller pieces
> would significantly improve the chnce of it getting committed.
>
> Is it possible to break the patch into smaller pieces that could be
> applied incrementally? For example, we could move the "transactional"
> behavior into a separate patch, but it's not clear to me how much code
> would that actually move to that second patch. Any other parts that
> could be moved to a separate patch?
>

I am sending two patches - 0001 - schema variables, 0002 - transactional
variables

>
> I see one of the main contention points was a rather long discussion
> about transactional vs. non-transactional behavior. I agree with Pavel's
> position that the non-transactional behavior should be the default,
> simply because it's better aligned with what the other databases are
> doing (and supporting migrations seems like one of the main use cases
> for this feature).
>
> I do understand it may not be suitable for some other use cases,
> mentioned by Fabien, but IMHO it's fine to require explicit
> specification of transactional behavior. Well, we can't have both as
> default, and I don't think there's an obvious reason why it should be
> the other way around.
>
> Now, a bunch of comments about the code (some of that nitpicking):
>
>
> 1) This hunk in doc/src/sgml/catalogs.sgml still talks about "table
> creation" instead of schema creation:
>
>   
>vartypmod
>int4
>
>
> vartypmod records type-specific data
> supplied at table creation time (for example, the maximum
> length of a varchar column).  It is passed to
> type-specific input functions and length coercion functions.
> The value will generally be -1 for types that do not need
> vartypmod.
>
>   
>

fixed


>
> 2) This hunk in doc/src/sgml/ref/alter_default_privileges.sgml uses
> "role_name" instead of "variable_name"
>
>  GRANT { READ | WRITE | ALL [ PRIVILEGES ] }
>  ON VARIABLES
>  TO { [ GROUP ] role_name
> | PUBLIC } [, ...] [ WITH GRANT OPTION ]
>

I think so this is correct


>
> 3) I find the syntax in create_variable.sgml a bit too over-complicated:
>
> 
> CREATE [ { TEMPORARY | TEMP } ] [ { TRANSACTIONAL | TRANSACTION } ]
> VARIABLE [ IF NOT EXISTS ]  class="parameter">name [ AS ]  class="parameter">data_type ] [ COLLATE  class="parameter">collation ]
>  [ NOT NULL ] [ DEFAULT  class="parameter">default_expr ] [ { ON COMMIT DROP | ON {
> TRANSACTIONAL | TRANSACTION } END RESET } ]
> 
>
> Do we really need both TRANSACTION and TRANSACTIONAL? Why not just one
> that we already have in the grammar (i.e. TRANSACTION)?
>

It was a Philippe's wish - the implementation is simple, and it is similar
like TEMP, TEMPORARY. I have not any opinion about it.


>
> 4) I think we should rename schemavariable.h to schema_variable.h.
>

done


>
> 5) objectaddress.c has extra line after 'break;' in one switch.
>

fixed


>
> 6) The comment is wrong:
>
>  /*
>   * Find the ObjectAddress for a type or domain
>   */
>  static ObjectAddress
>  get_object_address_variable(List *object, bool missing_ok)
>

fixed


>
> 7) I think the code/comments are really inconsistent in talking about
> "variables" and "schema variables". For example in objectaddress.c we do
> these two things:
>
>  case OCLASS_VARIABLE:
>  appendStringInfoString(, "schema variable");
>  break;
>
> vs.
>
>  case DEFACLOBJ_VARIABLE:
>  appendStringInfoString(,
> " on variables");
>  break;
>
> That's going to be confusing for people.
>
>
fixed


>
> 8) I'm rather confused by CMD_PLAN_UTILITY, which seems to be defined
> merely to support LET. I'm not sure why that's necessary (Why wouldn't
> CMD_UTILITY be sufficient?).
>

Currently out utility statements cannot to hold a execution plan, and
cannot be prepared.

so this enhancing is motivated mainly by performance reasons. I would to
allow any SELECT query there, not just expressions only (see a limits of
CALL statement)



> Having to add conditions checking for CMD_PLAN_UTILITY to various places
> in planner.c is rather annoying, and I wonder how likely it's this will
> unnecessarily break external code in extensions etc.
>
>
> 9) This comment in planner.c seems obsolete (not updated to reflect
> addition of the CMD_PLAN_UTILITY check):
>
>/*
> * If this is an INSERT/UPDATE/DELETE, and we're not being called from
> * inheritance_planner, add the ModifyTable node.
> */
>if (parse->commandType != CMD_SELECT && parse->commandType !=
> CMD_PLAN_UTILITY && !inheritance_update)
>

"If this is an INSERT/UPDATE/DELETE," is related to 

Re: Add pg_file_sync() to adminpack

2020-01-23 Thread Arthur Zakirov

On 2020/01/17 16:05, Fujii Masao wrote:

On 2020/01/17 13:36, Michael Paquier wrote:

Yeah, that should be either ERROR and return a void result, or issue a
WARNING/ERROR (depending on the code path, maybe PANIC?) with a
boolean status returned if there is a WARNING.  Mixing both concepts
with an ERROR all the time and always a true status is just weird,
because you know that if no errors are raised then the status will be
always true.  So there is no point to have a boolean status to begin
with.


OK, so our consensus is to return void on success and throw an error
otherwise. Attached is the updated version of the patch.

Thank you for the new version!

It is compiled and passes the tests. There is the documentation and it 
is built too without an error.


It seems that consensus about the returned type was reached and I marked 
the patch as "Ready for Commiter".


--
Arthur




Re: Improve search for missing parent downlinks in amcheck

2020-01-23 Thread Peter Geoghegan
On Thu, Jan 23, 2020 at 5:40 PM Peter Geoghegan  wrote:
> I suppose the alternative is to get the high key of the parent's left
> sibling, rather than going to the parent's parent (i.e. our
> grandparent). That would probably be the best way to get a separator
> key to compare against the high key in the leftmost cousin page of a
> subtree, if in fact we wanted to *fully* solve the "cousin problem".

I think I've confused myself here. The
"!offset_is_negative_infinity(topaque, pivotkey_offset)" part of the
bt_downlink_connectivity_check() high key check test that I mentioned
now *also* seem unnecessary. Any high key in a page that isn't marked
"half-dead" or marked "deleted" or marked "has incomplete split" can
be targeted by the check. Once the page meets that criteria, there
must be a pivot tuple in the parent page that contains an
identical-to-highkey separator key (this could be the parent's own
high key).

The only thing that you need to do is be careful about rightmost
parent pages of a non-rightmost page -- stuff like that. But, I think
that that's only needed because an amcheck segfault isn't a very nice
way to detect corruption.

The existing comment about negative infinity items within
bt_downlink_check() that I mentioned in my main e-mail (the "Note:"
part) don't quite apply here. You're not verifying a pivot tuple that
has a downlink (which might be negative infinity) against the lower
levels -- you're doing *the opposite*. That is, you're verifying a
high key using the parent. Which seems like the right way to do it --
you can test for the incomplete split flag and so on by doing it
bottom up (not top down). This must have been why I was confused.

Phew!
-- 
Peter Geoghegan




Re: Improve search for missing parent downlinks in amcheck

2020-01-23 Thread Peter Geoghegan
On Thu, Jan 23, 2020 at 5:31 PM Peter Geoghegan  wrote:
> In summary: I suppose that we can also solve "the cousin problem"
> quite easily, but only for rightmost cousins within a subtree --
> leftmost cousins might be too messy to verify for it to be worth it.
> We don't want to have to jump two or three levels up within
> bt_downlink_connectivity_check() just for leftmost cousin pages. But
> maybe you're feeling ambitious! What do you think?

I suppose the alternative is to get the high key of the parent's left
sibling, rather than going to the parent's parent (i.e. our
grandparent). That would probably be the best way to get a separator
key to compare against the high key in the leftmost cousin page of a
subtree, if in fact we wanted to *fully* solve the "cousin problem".
Goetz Graefe recommends keeping both a low key and a high key in every
page for verification purposes. We don't actually have a low key (we
only have a truncated negative infinity item), but this approach isn't
that far off having a low key.

-- 
Peter Geoghegan




Re: Improve search for missing parent downlinks in amcheck

2020-01-23 Thread Peter Geoghegan
On Wed, Jan 22, 2020 at 6:41 PM Alexander Korotkov
 wrote:
> Rebased patch is attached.  Sorry for so huge delay.

I really like this patch. Your interest in amcheck is something that
makes me feel good about having put so much work into it myself.

Here are some review comments:

> +   /*
> +* Rightlink and incomplete split flag of previous block referenced by
> +* downlink.
> +*/
> +   BlockNumber prevrightlink;
> +   boolprevincompletesplit;
> +

What downlink? What does this mean? Do you mean the most recently
followed rightlink on the current level, or InvalidBlockNumber if
target page is the leftmost page on the current level on the scan?

(Thinks some more...)

Actually, these two new fields track the state *one level down* from
the target page level when !readonly (unless target page is on the
leaf level). Right? Comments should be explicit about this. The
current comments about downlinks isn't clear.

> if (offset_is_negative_infinity(topaque, offset))
> +   {
> +   /*
> +* Initialize downlink connectivity check if needed.
> +*/
> +   if (!P_ISLEAF(topaque) && state->readonly)
> +   {
> +   bt_downlink_connectivity_check(state,
> +  offset,
> +  NULL,
> +  topaque->btpo.level);
> +   }
> continue;
> +   }

Don't need the "!P_ISLEAF()" here. Also, you should say something like
"we need to call this here because the usual callsite in
bt_downlink_check() won't be reached".

> /*
> -* * Check if page has a downlink in parent *
> -*
> -* This can only be checked in heapallindexed + readonly case.
> +* If we traversed the whole level to the rightmost page, there might be
> +* missing downlinks for the pages to the right of rightmost downlink.
> +* Check for them.
>  */

You mean "to the right of the child page pointed to by our rightmost downlink"?

I think that the final bt_downlink_connectivity_check() call within
bt_target_page_check() should make it clear that it is kind of special
compared to the other two calls.

> +/*
> + * Check connectivity of downlinks.  Traverse rightlinks from previous 
> downlink
> + * to the current one.  Check that there are no intermediate pages with 
> missing
> + * downlinks.
> + *
> + * If 'loaded_page' is given, it's assumed to be contents of downlink
> + * referenced by 'downlinkoffnum'.
> + */

Say "assumed to be the page pointed to by the downlink", perhaps?

> +static void
> +bt_downlink_connectivity_check(BtreeCheckState *state,
> +  OffsetNumber downlinkoffnum,
> +  Page loaded_page,
> +  uint32 parent_level)
> +{

In amcheck, we always have a current target page. Every page gets to
be the target exactly once, though sometimes other subsidiary pages
are accessed. We try to blame the target page, even with checks that
are technically against its child/sibling/whatever. The target page is
always our conceptual point of reference. Sometimes this is a bit
artificial, but it's still worth doing consistently. So I think you
should change these argument names with that in mind (see below).

> +   /*
> +* If we visit page with high key, check that it should be equal to
> +* the target key next to corresponding downlink.
> +*/

I suggest "...check that it is equal to the target key..."

> +   /*
> +* There might be two situations when we examine high key.  If
> +* current child page is referenced by given downlink, we should
> +* look to the next offset number for matching key.

You mean "the next offset number for the matching key from the target
page"? I find it much easier to keep this stuff in my head if
everything is defined in terms of its relationship with the current
target page. For example, bt_downlink_connectivity_check()'s
"parent_level" argument should be called "target_level" instead, while
its "loaded_page" should be called "loaded_child". Maybe
"downlinkoffnum" should be "target_downlinkoffnum". And downlinkoffnum
should definitely be explained in comments at the top of
bt_downlink_connectivity_check() (e.g., say what it means when it is
InvalidOffsetNumber).

> Alternatively
> +* we might find child with high key while traversing from
> +* previous downlink to current one.  Then matching key resides
> +* the same offset number as current downlink.
> +*/

Not sure what "traversing from previous downlink to current one" means at all.

> +   if (!offset_is_negative_infinity(topaque, pivotkey_offset) &&
> +   pivotkey_offset <= PageGetMaxOffsetNumber(state->target))
> +   {
> +   uint32  cmp = _bt_compare(state->rel,
> +   

Re: doc: alter table references bogus table-specific planner parameters

2020-01-23 Thread Michael Paquier
On Wed, Jan 22, 2020 at 01:53:32PM +0900, Michael Paquier wrote:
>> @@ -714,9 +714,7 @@ WITH ( MODULUS > class="parameter">numeric_literal, REM
>>   
>>SHARE UPDATE EXCLUSIVE lock will be taken for
>>fillfactor, toast and autovacuum storage parameters, as well as the
>> -  following planner related parameters:
>> -  effective_io_concurrency, 
>> parallel_workers, seq_page_cost,
>> -  random_page_cost, n_distinct 
>> and n_distinct_inherited.
>> +  parallel_workers planner parameter.
> 
> Right.  n_distinct_inherited and n_distinct can only be set for
> attribute, and the docs are clear about that.

Okay, fixed that list and backpatched down to 10.
--
Michael


signature.asc
Description: PGP signature


Re: making the backend's json parser work in frontend code

2020-01-23 Thread Andrew Dunstan
On Fri, Jan 24, 2020 at 7:35 AM Mark Dilger
 wrote:
>
>
> > On Jan 22, 2020, at 7:00 PM, Mark Dilger  
> > wrote:
> >
> > I have this done in my local repo to the point that I can build frontend 
> > tools against the json parser that is now in src/common and also run all 
> > the check-world tests without failure.  I’m planning to post my work soon, 
> > possibly tonight if I don’t run out of time, but more likely tomorrow.
>
> Ok, I finished merging with Robert’s patches.  The attached follow his 
> numbering, with my patches intended to by applied after his.
>
> I tried not to change his work too much, but I did a bit of refactoring in 
> 0010, as explained in the commit comment.
>
> 0011 is just for verifying the linking works ok and the json parser can be 
> invoked from a frontend tool without error — I don’t really see the point in 
> committing it.
>
> I ran some benchmarks for json parsing in the backend both before and after 
> these patches, with very slight changes in runtime.  The setup for the 
> benchmark creates an unlogged table with a single text column and loads rows 
> of json formatted text:
>
> CREATE UNLOGGED TABLE benchmark (
> j text
> );
> COPY benchmark (j) FROM '/Users/mark.dilger/bench/json.csv’;
>
>
> FYI:
>
> wc ~/bench/json.csv
>  107 34465023 503364244 /Users/mark.dilger/bench/json.csv
>
> The benchmark itself casts the text column to jsonb, as follows:
>
> SELECT jsonb_typeof(j::jsonb) typ, COUNT(*) FROM benchmark GROUP BY typ;
>
> In summary, the times are:
>
> pristinepatched
> —   —
> 11.985  12.237
> 12.200  11.992
> 11.691  11.896
> 11.847  11.833
> 11.722  11.936
>

OK, nothing noticeable there.

"accept" is a common utility I've used in the past with parsers of
this kind, but inlining it seems perfectly reasonable.

I've reviewed these patches and Robert's, and they seem basically good to me.

But I don't think src/bin is the right place for the test program. I
assume we're not going to ship this program, so it really belongs in
src/test somewhere, I think. It should also have a TAP test.

cheers

andrew



-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services




Re: Busted(?) optimization in ATAddForeignKeyConstraint

2020-01-23 Thread Thomas Munro
On Fri, Jan 24, 2020 at 11:12 AM Tom Lane  wrote:
> I happened to notice this comment in the logic in
> ATAddForeignKeyConstraint that tries to decide if it can skip
> revalidating a foreign-key constraint after a DDL change:
>
>  * Since we require that all collations share the same notion of
>  * equality (which they do, because texteq reduces to bitwise
>  * equality), we don't compare collation here.
>
> Hasn't this been broken by the introduction of nondeterministic
> collations?

Similar words appear in the comment for ri_GenerateQualCollation().




Busted(?) optimization in ATAddForeignKeyConstraint

2020-01-23 Thread Tom Lane
I happened to notice this comment in the logic in
ATAddForeignKeyConstraint that tries to decide if it can skip
revalidating a foreign-key constraint after a DDL change:

 * Since we require that all collations share the same notion of
 * equality (which they do, because texteq reduces to bitwise
 * equality), we don't compare collation here.

Hasn't this been broken by the introduction of nondeterministic
collations?

regards, tom lane




Re: Allow to_date() and to_timestamp() to accept localized names

2020-01-23 Thread Tom Lane
Here's a v7 patch, rebased over my recent hacking, and addressing
most of the complaints I raised in <31691.1579648...@sss.pgh.pa.us>.
However, I've got some new complaints now that I've looked harder
at the code:

* It's not exactly apparent to me why this code should be concerned
about non-normalized characters when noplace else in the backend is.
I think we should either rip that out entirely, or move the logic
into str_tolower (and hence also str_toupper etc).  I'd tend to favor
the former, but of course I don't speak any languages where this would
be an issue.  Perhaps a better idea is to invent a new SQL function
that normalizes a given string, and expect users to call that first
if they'd like to_date() to match unnormalized text.

* I have no faith in this calculation that decides how long the match
length was:

*len = element_len + name_len - norm_len;

I seriously doubt that this does the right thing if either the
normalization or the downcasing changed the byte length of the
string.  I'm not actually sure how we can do that correctly.
There's no good way to know whether the changes happened within
the part of the "name" string that we matched, or the part beyond
what we matched, but we only want to count the former.  So this
seems like a pretty hard problem, and even if this logic is somehow
correct as it stands, it needs a comment explaining why.

* I'm still concerned about the risk of integer overflow in the
string allocations in seq_search_localized().  Those need to be
adjusted to be more paranoid, similar to the code in e.g. str_tolower.

regards, tom lane

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 6c4359d..ff44496 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -5934,7 +5934,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');


 TM prefix
-translation mode (print localized day and month names based on
+translation mode (use localized day and month names based on
  )
 TMMonth

@@ -5966,8 +5966,13 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
  
   
TM does not include trailing blanks.
+  
+ 
+
+ 
+  
to_timestamp and to_date ignore
-   the TM modifier.
+   the case when receiving names as an input.
   
  
 
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 792f9ca..2f39050 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -87,6 +87,7 @@
 
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
+#include "common/unicode_norm.h"
 #include "mb/pg_wchar.h"
 #include "parser/scansup.h"
 #include "utils/builtins.h"
@@ -1059,9 +1060,11 @@ static int	from_char_parse_int_len(int *dest, const char **src, const int len,
 	FormatNode *node, bool *have_error);
 static int	from_char_parse_int(int *dest, const char **src, FormatNode *node,
 bool *have_error);
-static int	seq_search(const char *name, const char *const *array, int *len);
+static int	seq_search_ascii(const char *name, const char *const *array, int *len);
+static int	seq_search_localized(const char *name, char **array, int *len);
 static int	from_char_seq_search(int *dest, const char **src,
  const char *const *array,
+ char **localized_array,
  FormatNode *node, bool *have_error);
 static void do_to_timestamp(text *date_txt, text *fmt, bool std,
 			struct pg_tm *tm, fsec_t *fsec, int *fprec,
@@ -2459,7 +2462,7 @@ from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_er
  * suitable for comparisons to ASCII strings.
  */
 static int
-seq_search(const char *name, const char *const *array, int *len)
+seq_search_ascii(const char *name, const char *const *array, int *len)
 {
 	unsigned char firstc;
 	const char *const *a;
@@ -2505,8 +2508,92 @@ seq_search(const char *name, const char *const *array, int *len)
 }
 
 /*
- * Perform a sequential search in 'array' for an entry matching the first
- * character(s) of the 'src' string case-insensitively.
+ * Sequentially search an array of possibly non-English words for
+ * a case-insensitive match to the initial character(s) of "name".
+ *
+ * This has the same API as seq_search_ascii(), but we use a more general
+ * downcasing transformation to achieve case-insensitivity.
+ *
+ * The array is treated as const, but we don't declare it that way because
+ * the arrays exported by pg_locale.c aren't const.
+ */
+static int
+seq_search_localized(const char *name, char **array, int *len)
+{
+	char	  **a;
+	char	   *lower_name;
+	char	   *norm_name;
+	int			name_len;
+	int			norm_len;
+
+	*len = 0;
+
+	/* empty string can't match anything */
+	if (!*name)
+		return -1;
+
+	name_len = strlen(name);
+
+	/* Normalize name, if working in Unicode */
+	if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		pg_wchar   

Re: progress report for ANALYZE

2020-01-23 Thread Alvaro Herrera
On 2020-Jan-22, Tatsuro Yamada wrote:

> Thanks for reviewing and committing the patch!
> Hope this helps DBA. :-D

I'm sure it does!

> P.S.
> Next up is progress reporting for query execution?!

Actually, I think it's ALTER TABLE.

Also, things like VACUUM could report the progress of the index being
processed ...

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services




Re: [Proposal] Global temporary tables

2020-01-23 Thread Robert Haas
On Sat, Jan 11, 2020 at 8:51 PM Tomas Vondra
 wrote:
> I proposed just ignoring those new indexes because it seems much simpler
> than alternative solutions that I can think of, and it's not like those
> other solutions don't have other issues.

+1.

> For example, I've looked at the "on demand" building as implemented in
> global_private_temp-8.patch, I kinda doubt adding a bunch of index build
> calls into various places in index code seems somewht suspicious.

+1. I can't imagine that's a safe or sane thing to do.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Documentation patch for ALTER SUBSCRIPTION

2020-01-23 Thread David Christensen
Greetings,

Enclosed find a documentation patch that clarifies the behavior of ALTER 
SUBSCRIPTION … REFRESH PUBLICATION with new tables; I ran into a situation 
today where the docs were not clear that existing tables would not be 
re-copied, so remedying this situation.

Should apply back to Pg 10.

Best,

David



0001-Be-explicit-about-the-behavior-of-REFRESH-PUBLICATIO.patch
Description: Binary data

--
David Christensen
Senior Software and Database Engineer
End Point Corporation
da...@endpoint.com
785-727-1171




signature.asc
Description: Message signed with OpenPGP


Re: making the backend's json parser work in frontend code

2020-01-23 Thread Alvaro Herrera
On 2020-Jan-23, Bruce Momjian wrote:

> Yes, good point.  I think my one concern is that someone might specify
> both keys in the JSON, which would be very odd.

Just make that a reason to raise an error.  I think it's even possible
to specify that as a JSON Schema constraint, using a "oneOf" predicate.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services




Re: making the backend's json parser work in frontend code

2020-01-23 Thread Robert Haas
On Thu, Jan 23, 2020 at 2:08 PM Daniel Verite  wrote:
> It could be CSV, which has this problem already solved,
> is easier to parse than JSON, certainly no less popular,
> and is not bound to a specific encoding.

Sure. I don't think that would look quite as nice visually as what I
proposed when inspected by humans, and our default COPY output format
is tab-separated rather than comma-separated. However, if CSV would be
more acceptable, great.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: making the backend's json parser work in frontend code

2020-01-23 Thread Daniel Verite
Robert Haas wrote:

> With the format I proposed, you only have to worry that the
> file name might contain a tab character, because in that format, tab
> is the delimiter

It could be CSV, which has this problem already solved,
is easier to parse than JSON, certainly no less popular,
and is not bound to a specific encoding.


Best regards,
-- 
Daniel Vérité
PostgreSQL-powered mailer: http://www.manitou-mail.org
Twitter: @DanielVerite




Re: making the backend's json parser work in frontend code

2020-01-23 Thread Robert Haas
On Thu, Jan 23, 2020 at 1:22 PM Bruce Momjian  wrote:
> Yes, good point.  I think my one concern is that someone might specify
> both keys in the JSON, which would be very odd.

I think that if a tool other than PostgreSQL chooses to generate a
PostreSQL backup manifest, it must take care to do it in a manner that
is compatible with what PostgreSQL would generate. If it doesn't,
well, that sucks for them, but we can't prevent other people from
writing bad code. On a very good day, we can prevent ourselves from
writing bad code.

There is in general the question of how rigorous PostgreSQL ought to
be when validating a backup manifest. The proposal on the table is to
store four (4) fields per file: name, size, last modification time,
and checksum. So a JSON object representing a file should have four
keys, say "path", "size", "mtime", and "checksum". The "checksum" key
could perhaps be optional, in case the user disables checksums, or we
could represent that case in some other way, like "checksum" => null,
"checksum" => "", or "checksum" => "NONE". There is an almost
unlimited scope for bike-shedding here, but let's leave that to one
side for the moment.

Suppose that someone asks PostgreSQL's backup manifest verification
tool to validate a backup manifest where there's an extra key. Say, in
addition to the four keys listed in the previous paragraph, there is
an additional key, "momjian". On the one hand, our backup manifest
verification tool could take this as a sign that the manifest is
invalid, and accordingly throw an error. Or, it could assume that some
third-party backup tool generated the backup manifest and that the
"momjian" field is there to track something which is of interest to
that tool but not to PostgreSQL core, in which case it should just be
ignored.

Incidentally, some research seems to suggest that the problem of
filenames which don't form a valid UTF-8 sequence cannot occur on
Windows. This blog post may be helpful in understanding the issues:

http://beets.io/blog/paths.html

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: making the backend's json parser work in frontend code

2020-01-23 Thread Bruce Momjian
On Thu, Jan 23, 2020 at 02:00:23PM -0500, Robert Haas wrote:
> Incidentally, some research seems to suggest that the problem of
> filenames which don't form a valid UTF-8 sequence cannot occur on
> Windows. This blog post may be helpful in understanding the issues:
> 
> http://beets.io/blog/paths.html

Is there any danger of assuming a non-UTF8 sequence to be UTF8 even when
it isn't, except that it displays oddly?  I am thinking of a non-UTF8
sequence that is value UTF8.

-- 
  Bruce Momjian  http://momjian.us
  EnterpriseDB http://enterprisedb.com

+ As you are, so once was I.  As I am, so you will be. +
+  Ancient Roman grave inscription +




Re: New feature proposal (trigger)

2020-01-23 Thread Tom Lane
Pavel Stehule  writes:
> čt 23. 1. 2020 v 17:26 odesílatel Sergiu Velescu 
> napsal:
>> I would like to propose a new feature which is missing in PgSQL but quite
>> useful and nice to have (and exists in Oracle and probably in some other
>> RDBMS), I speak about “Database Level” triggers: BeforePgStart,
>> AfterPgStarted, OnLogin, OnSuccessfulLogin, BeforePGshutdown, OnLogOut – I
>> just mentioned some of it but the final events could be different.

> Do you have some examples of these useful triggers?
> I don't know any one.

See also the fate of commit e788bd924, which proposed to add
on-session-start and on-session-end hooks.  Getting that sort of thing
to work safely is a LOT harder than it sounds.  There are all sorts of
definitional and implementation problems, at least if you'd like the
hook or trigger to do anything interesting (like run a transaction).

I rather suspect that exposing such a thing at SQL level would also add
a pile of security considerations (i.e. who's allowed to do what to whom).
The hook proposal didn't have to address that, but a trigger feature
certainly would.

Maybe it's all do-able, but the work to benefit ratio doesn't look
very appetizing to me.

regards, tom lane




Re: making the backend's json parser work in frontend code

2020-01-23 Thread Bruce Momjian
On Thu, Jan 23, 2020 at 03:20:27PM -0300, Alvaro Herrera wrote:
> On 2020-Jan-23, Bruce Momjian wrote:
> 
> > On Thu, Jan 23, 2020 at 01:05:50PM -0500, Robert Haas wrote:
> > > > Another
> > > > problem, though, is how do you _flag_ file names as being
> > > > base64-encoded?  Use another JSON field to specify that?
> > > 
> > > Alvaro's proposed solution in the message to which you replied was to
> > > call the field either 'path' or 'path_base64' depending on whether
> > > base-64 escaping was used. That seems better to me than having a field
> > > called 'path' and a separate field called 'is_path_base64' or
> > > whatever.
> > 
> > Hmm, so the JSON key name is the flag --- interesting.
> 
> Yes, because if you use the same key name, you risk a dumb tool writing
> the file name as the encoded name.  That's worse because it's harder to
> figure out that it's wrong.

Yes, good point.  I think my one concern is that someone might specify
both keys in the JSON, which would be very odd.

-- 
  Bruce Momjian  http://momjian.us
  EnterpriseDB http://enterprisedb.com

+ As you are, so once was I.  As I am, so you will be. +
+  Ancient Roman grave inscription +




Re: making the backend's json parser work in frontend code

2020-01-23 Thread Alvaro Herrera
On 2020-Jan-23, Bruce Momjian wrote:

> On Thu, Jan 23, 2020 at 01:05:50PM -0500, Robert Haas wrote:
> > > Another
> > > problem, though, is how do you _flag_ file names as being
> > > base64-encoded?  Use another JSON field to specify that?
> > 
> > Alvaro's proposed solution in the message to which you replied was to
> > call the field either 'path' or 'path_base64' depending on whether
> > base-64 escaping was used. That seems better to me than having a field
> > called 'path' and a separate field called 'is_path_base64' or
> > whatever.
> 
> Hmm, so the JSON key name is the flag --- interesting.

Yes, because if you use the same key name, you risk a dumb tool writing
the file name as the encoded name.  That's worse because it's harder to
figure out that it's wrong.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services




Re: making the backend's json parser work in frontend code

2020-01-23 Thread Bruce Momjian
On Thu, Jan 23, 2020 at 01:05:50PM -0500, Robert Haas wrote:
> > Another
> > problem, though, is how do you _flag_ file names as being
> > base64-encoded?  Use another JSON field to specify that?
> 
> Alvaro's proposed solution in the message to which you replied was to
> call the field either 'path' or 'path_base64' depending on whether
> base-64 escaping was used. That seems better to me than having a field
> called 'path' and a separate field called 'is_path_base64' or
> whatever.

Hmm, so the JSON key name is the flag --- interesting.

-- 
  Bruce Momjian  http://momjian.us
  EnterpriseDB http://enterprisedb.com

+ As you are, so once was I.  As I am, so you will be. +
+  Ancient Roman grave inscription +




Re: making the backend's json parser work in frontend code

2020-01-23 Thread Robert Haas
On Thu, Jan 23, 2020 at 12:49 PM Bruce Momjian  wrote:
> Another idea is to use base64 for all non-ASCII file names, so we don't
> need to check if the file name is valid UTF8 before outputting --- we
> just need to check for non-ASCII, which is much easier.

I think that we have the infrastructure available to check in a
convenient way whether it's valid as UTF-8, so this might not be
necessary, but I will look into it further unless there is a consensus
to go another direction entirely.

> Another
> problem, though, is how do you _flag_ file names as being
> base64-encoded?  Use another JSON field to specify that?

Alvaro's proposed solution in the message to which you replied was to
call the field either 'path' or 'path_base64' depending on whether
base-64 escaping was used. That seems better to me than having a field
called 'path' and a separate field called 'is_path_base64' or
whatever.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: making the backend's json parser work in frontend code

2020-01-23 Thread Robert Haas
On Thu, Jan 23, 2020 at 12:24 PM Alvaro Herrera
 wrote:
> I do have files with Latin-1-encoded names in my filesystem, even though
> my system is UTF-8, so I understand the problem.  I was wondering if it
> would work to encode any non-UTF8-valid name using something like
> base64; the encoded name will be plain ASCII and can be put in the
> manifest, probably using a different field of the JSON object -- so for
> a normal file you'd have { path => '1234/2345' } but for a
> Latin-1-encoded file you'd have { path_base64 => '4Wx2YXJvLmNvbmYK' }.
> Then it's the job of the tool to ensure it decodes the name to its
> original form when creating/querying for the file.

Right. That's what I meant, a couple of messages back, when I
mentioned an extra layer of escaping, but your explanation here is
better because it's more detailed.

> A problem I have with this idea is that this is very corner-casey, so
> most tool implementors will never realize that there's a need to decode
> certain file names.

That's a valid concern. I would not necessarily have thought that
out-of-core tools would find a lot of use in reading them, provided
PostgreSQL itself both knows how to generate them and how to validate
them, but the interest in this topic suggests that people do care
about that.

Mostly, I think this issue shows the folly of imagining that putting
everything into JSON is a good idea because it gets rid of escaping
problems. Actually, what it does is create multiple kinds of escaping
problems. With the format I proposed, you only have to worry that the
file name might contain a tab character, because in that format, tab
is the delimiter. But, if we use JSON, then we've got the same problem
with JSON's delimiter, namely a double quote, which the JSON parser
will solve for you.  We then have this additional and somewhat obscure
problem with invalidly encoded data, to which JSON itself provides no
solution. We have to invent our own, probably along the lines of what
you have just proposed. I think one can reasonably wonder whether this
is really an improvement over just inventing a way to escape tabs.

That said, there are other reasons to want to go with JSON, most of
all the fact that it's easy to see how to extend the format to
additional fields. Once you decide that each file will have an object,
you can add any keys that you like to that object and things should
scale up nicely. It has been my contention that we probably will not
find the need to add much more here, but such arguments are always
suspect and have a good chance of being wrong. Also, such prophecies
can be self-fulfilling: if the format is easy to extend, then people
may extend it, whereas if it is hard to extend, they may not try, or
they may try and then give up.

At the end of the day, I'm willing to make this work either way. I do
not think that my original proposal was bad, but there were things not
to like about it. There are also things not to like about using a
JSON-based format, and this seems to me to be a fairly significant
one. However, both sets of problems are solvable, and neither design
is awful. It's just a question of which kind of warts we like better.
To be blunt, I've already spent a lot more effort on this problem than
I would have liked, and more than 90% of it has been spent on the
issue of how to format a file that only PostgreSQL needs to read and
write. While I do not think that good file formats are unimportant, I
remain unconvinced that switching to JSON is making things better. It
seems like it's just making them different, while inflating the amount
of coding required by a fairly significant multiple.

That being said, unless somebody objects in the next few days, I am
going to assume that the people who preferred JSON over a
tab-separated file are also happy with the idea of using base-64
encoding as proposed by you above to represent files whose names are
not valid UTF-8 sequences; and I will then go rewrite the patch that
generates the manifests to use that format, rewrite the validator
patch to parse that format using this infrastructure, and hopefully
end up with something that can be reviewed and committed before we run
out of time to get things done for this release. If anybody wants to
vote for another plan, please vote soon.

In the meantime, any review of the new patches I posted here yesterday
would be warmly appreciated.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: Allow to_date() and to_timestamp() to accept localized names

2020-01-23 Thread Tom Lane
Alvaro Herrera  writes:
> Even whitespace is problematic for some languages, such as Afan,

> mon  "Qunxa Garablu";/
>  "Naharsi Kudo";/
>  "Ciggilta Kudo";/

> (etc) but I think whitespace-splitting is going to be more comprehensible
> in the vast majority of cases, even if not perfect.

Interesting.  Given that to_date et al are often willing to ignore
whitespace in input, I wonder whether we won't have some other issues
with names like that --- not so much that basic cases wouldn't work,
as that people might reasonably expect that, say, we'd be flexible
about the amount of whitespace.  Seems like a problem for another day
though.

In the meantime, I agree that "truncate at whitespace" is a better
heuristic for the error messages than what we've got.  I'll go make it so.

regards, tom lane




Re: making the backend's json parser work in frontend code

2020-01-23 Thread Bruce Momjian
On Thu, Jan 23, 2020 at 02:23:14PM -0300, Alvaro Herrera wrote:
> On 2020-Jan-23, Robert Haas wrote:
> 
> > No, that's not it. Suppose that Álvaro Herrera has some custom
> > settings he likes to put on all the PostgreSQL clusters that he uses,
> > so he creates a file álvaro.conf and uses an "include" directive in
> > postgresql.conf to suck in those settings. If he also likes UTF-8,
> > then the file name will be stored in the file system as a 12-byte
> > value of which the first two bytes will be 0xc3 0xa1. In that case,
> > everything will be fine, because JSON is supposed to always be UTF-8,
> > and the file name is UTF-8, and it's all good. But suppose he instead
> > likes LATIN-1.
> 
> I do have files with Latin-1-encoded names in my filesystem, even though
> my system is UTF-8, so I understand the problem.  I was wondering if it
> would work to encode any non-UTF8-valid name using something like
> base64; the encoded name will be plain ASCII and can be put in the
> manifest, probably using a different field of the JSON object -- so for
> a normal file you'd have { path => '1234/2345' } but for a
> Latin-1-encoded file you'd have { path_base64 => '4Wx2YXJvLmNvbmYK' }.
> Then it's the job of the tool to ensure it decodes the name to its
> original form when creating/querying for the file.
> 
> A problem I have with this idea is that this is very corner-casey, so
> most tool implementors will never realize that there's a need to decode
> certain file names.

Another idea is to use base64 for all non-ASCII file names, so we don't
need to check if the file name is valid UTF8 before outputting --- we
just need to check for non-ASCII, which is much easier.  Another
problem, though, is how do you _flag_ file names as being
base64-encoded?  Use another JSON field to specify that?

-- 
  Bruce Momjian  http://momjian.us
  EnterpriseDB http://enterprisedb.com

+ As you are, so once was I.  As I am, so you will be. +
+  Ancient Roman grave inscription +




Re: ssl passphrase callback

2020-01-23 Thread Robert Haas
On Thu, Nov 14, 2019 at 8:54 AM Sehrope Sarkuni  wrote:
> Has the idea of using environment variables (rather than command line
> args) for external commands been brought up before? I couldn't find
> anything in the mailing list archives.

Passing data through environment variables isn't secure. Try 'ps -E'
on MacOS, or something like 'ps axe' on Linux.

If we want to pass data securely to child processes, the way to do it
is via stdin. Data sent back and forth via file descriptors can't
easily be snooped by other users on the system.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: making the backend's json parser work in frontend code

2020-01-23 Thread Alvaro Herrera
On 2020-Jan-23, Robert Haas wrote:

> No, that's not it. Suppose that Álvaro Herrera has some custom
> settings he likes to put on all the PostgreSQL clusters that he uses,
> so he creates a file álvaro.conf and uses an "include" directive in
> postgresql.conf to suck in those settings. If he also likes UTF-8,
> then the file name will be stored in the file system as a 12-byte
> value of which the first two bytes will be 0xc3 0xa1. In that case,
> everything will be fine, because JSON is supposed to always be UTF-8,
> and the file name is UTF-8, and it's all good. But suppose he instead
> likes LATIN-1.

I do have files with Latin-1-encoded names in my filesystem, even though
my system is UTF-8, so I understand the problem.  I was wondering if it
would work to encode any non-UTF8-valid name using something like
base64; the encoded name will be plain ASCII and can be put in the
manifest, probably using a different field of the JSON object -- so for
a normal file you'd have { path => '1234/2345' } but for a
Latin-1-encoded file you'd have { path_base64 => '4Wx2YXJvLmNvbmYK' }.
Then it's the job of the tool to ensure it decodes the name to its
original form when creating/querying for the file.

A problem I have with this idea is that this is very corner-casey, so
most tool implementors will never realize that there's a need to decode
certain file names.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services




Re: Online checksums patch - once again

2020-01-23 Thread Robert Haas
On Thu, Jan 23, 2020 at 6:19 AM Daniel Gustafsson  wrote:
> A bigger question is how to handle the offline capabilities.  pg_checksums can
> enable or disable checksums in an offline cluster, which will put the cluster
> in a state where the pg_control file and the catalog don't match at startup.
> One strategy could be to always trust the pg_control file and alter the 
> catalog
> accordingly, but that still leaves a window of inconsistent cluster state.

I suggest that we define things so that the catalog state is only
meaningful during a state transition. That is, suppose the cluster
state is either "on", "enabling", or "off". When it's "on", checksums
are written and verified. When it is "off", checksums are not written
and not verified. When it's "enabling", checksums are written but not
verified. Also, when and only when the state is "enabling", the
background workers that try to rewrite relations to add checksums run,
and those workers look at the catalog state to figure out what to do.
Once the state changes to "on", those workers don't run any more, and
so the catalog state does not make any difference.

A tricky problem is to handling the case where the state is switched
from "enabling" to "on" and then back to "off" and then to "enabling"
again. You don't want to confuse the state from the previous round of
enabling with the state for the current round of enabling. Suppose in
addition to storing the cluster-wide state of on/off/enabling, we also
store an "enable counter" which is incremented every time the state
goes from "off" to "enabling". Then, for each database and relation,
we store a counter that indicates the value of the enable counter at
the time we last scanned/rewrote that relation to set checksums. Now,
you're covered. And, to save space, it can probably be a 32-bit
counter, since 4 billion disable/reenable cycles ought to be enough
for anybody.

It would not be strictly necessary to store this in pg_class. Another
thing that could be done is to store it in a separate system table
that could even be truncated when enabling is not in progress - though
it would be unwise to assume that it's always truncated at the
beginning of an enabling cycle, since it would be hard to guarantee
that the previous enabling cycle didn't fail when trying to truncate.
So you'd probably still end up with something like the counter
approach. I am inclined to think that inventing a whole new catalog
for this is over-engineering, but someone might think differently.
Note that creating a table while enabling is in progress needs to set
the enabling counter for the new table to the new value of the
enabling counter, not the old one, because the new table starts empty
and won't end up with any pages that don't have valid checksums.
Similarly, TRUNCATE, CLUSTER, VACUUM FULL, and rewriting variants of
ALTER TABLE can set the new value for the enabling counter as a side
effect. That's probably easier and more efficient if it's just value
in pg_class than if they have to go poking around in another catalog.
So I am tentatively inclined to think that just putting it in pg_class
makes more sense.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: [Proposal] Global temporary tables

2020-01-23 Thread Pavel Stehule
čt 23. 1. 2020 v 17:28 odesílatel 曾文旌(义从) 
napsal:

>
>
> 2020年1月22日 下午1:29,曾文旌(义从)  写道:
>
>
>
> 2020年1月21日 下午1:43,Pavel Stehule  写道:
>
> Hi
>
> I have a free time this evening, so I will check this patch
>
> I have a one question
>
> + /* global temp table get relstats from localhash */
> + if (RELATION_IS_GLOBAL_TEMP(rel))
> + {
> + get_gtt_relstats(RelationGetRelid(rel),
> + , , ,
> + NULL, NULL);
> + }
> + else
> + {
> + /* coerce values in pg_class to more desirable types */
> + relpages = (BlockNumber) rel->rd_rel->relpages;
> + reltuples = (double) rel->rd_rel->reltuples;
> + relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
> + }
>
> Isbn't possible to fill the rd_rel structure too, so this branching can be
> reduced?
>
> I'll make some improvements to optimize this part of the code.
>
> I'm trying to improve this part of the implementation in
> global_temporary_table_v7-pg13.patch
> Please check my patch and give me feedback.
>
>
It is looking better, still there are some strange things (I didn't tested
functionality yet)

  elog(ERROR, "invalid relpersistence: %c",
  relation->rd_rel->relpersistence);
@@ -3313,6 +3336,10 @@ RelationBuildLocalRelation(const char *relname,
  rel->rd_backend = BackendIdForTempRelations();
  rel->rd_islocaltemp = true;
  break;
+ case RELPERSISTENCE_GLOBAL_TEMP:
+ rel->rd_backend = BackendIdForTempRelations();
+ rel->rd_islocaltemp = true;
+ break;
  default:

+ rel->rd_islocaltemp = true;  <<< if this is valid, then the name of
field "rd_islocaltemp" is not probably best



regards

Pavel




>
> Thanks
>
> Wenjing
>
>
>
>
>
> Regards
>
> Pavel
>
> po 20. 1. 2020 v 17:27 odesílatel 曾文旌(义从) 
> napsal:
>
>>
>>
>> > 2020年1月20日 上午1:32,Erik Rijkers  写道:
>> >
>> > On 2020-01-19 18:04, 曾文旌(义从) wrote:
>> >>> 2020年1月14日 下午9:20,Pavel Stehule  写道:
>> >>> út 14. 1. 2020 v 14:09 odesílatel 曾文旌(义从) <
>> wenjing@alibaba-inc.com > napsal:
>> >
>> >>> [global_temporary_table_v4-pg13.patch ]
>> >
>> > Hi,
>> >
>> > This patch doesn't quiet apply for me:
>> >
>> > patching file src/backend/access/common/reloptions.c
>> > patching file src/backend/access/gist/gistutil.c
>> > patching file src/backend/access/hash/hash.c
>> > Hunk #1 succeeded at 149 (offset 3 lines).
>> > patching file src/backend/access/heap/heapam_handler.c
>> > patching file src/backend/access/heap/vacuumlazy.c
>> > patching file src/backend/access/nbtree/nbtpage.c
>> > patching file src/backend/access/table/tableam.c
>> > patching file src/backend/access/transam/xlog.c
>> > patching file src/backend/catalog/Makefile
>> > Hunk #1 FAILED at 44.
>> > 1 out of 1 hunk FAILED -- saving rejects to file
>> src/backend/catalog/Makefile.rej
>> > [...]
>> >   (The rest applies without errors)
>> >
>> > src/backend/catalog/Makefile.rej contains:
>> >
>> > 
>> > --- src/backend/catalog/Makefile
>> > +++ src/backend/catalog/Makefile
>> > @@ -44,6 +44,8 @@ OBJS = \
>> >   storage.o \
>> >   toasting.o
>> >
>> > +OBJS += storage_gtt.o
>> > +
>> > BKIFILES = postgres.bki postgres.description postgres.shdescription
>> >
>> > include $(top_srcdir)/src/backend/common.mk
>> > 
>> >
>> > Can you have a look?
>> I updated the code and remade the patch.
>> Please give me feedback if you have any more questions.
>>
>>
>>
>>
>> >
>> >
>> > thanks,
>> >
>> > Erik Rijkers
>> >
>> >
>> >
>> >
>> >
>>
>>
>
>


Re: DROP OWNED CASCADE vs Temp tables

2020-01-23 Thread Alvaro Herrera
On 2020-Jan-14, Alvaro Herrera wrote:

> On 2020-Jan-13, Tom Lane wrote:
> 
> > That seems fundamentally wrong.  By the time we've queued an object for
> > deletion in dependency.c, we have a lock on it, and we've verified that
> > the object is still there (cf. systable_recheck_tuple calls).
> > If shdepDropOwned is doing it differently, I'd say shdepDropOwned is
> > doing it wrong.
> 
> Hmm, it seems to be doing it differently.  Maybe it should be acquiring
> locks on all objects in that nested loop and verified them for
> existence, so that when it calls performMultipleDeletions the objects
> are already locked, as you say.

Yeah, this solves the reported bug.

This is not a 100% solution: there's the cases when the user is removed
from an ACL and from a policy, and those deletions are done directly
instead of accumulating to the end for a mass deletion.

I had to export AcquireDeletionLock (previously a static in
dependency.c).  I wonder if I should export ReleaseDeletionLock too, for
symmetry.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index c4a4df25b8..2ec3e39694 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -199,7 +199,6 @@ static void reportDependentObjects(const ObjectAddresses *targetObjects,
 static void deleteOneObject(const ObjectAddress *object,
 			Relation *depRel, int32 flags);
 static void doDeletion(const ObjectAddress *object, int flags);
-static void AcquireDeletionLock(const ObjectAddress *object, int flags);
 static void ReleaseDeletionLock(const ObjectAddress *object);
 static bool find_expr_references_walker(Node *node,
 		find_expr_references_context *context);
@@ -1530,7 +1529,7 @@ doDeletion(const ObjectAddress *object, int flags)
  * else.  Note that dependency.c is not concerned with deleting any kind of
  * shared-across-databases object, so we have no need for LockSharedObject.
  */
-static void
+void
 AcquireDeletionLock(const ObjectAddress *object, int flags)
 {
 	if (object->classId == RelationRelationId)
diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c
index 2ef792dbd7..20d7ee7d87 100644
--- a/src/backend/catalog/pg_shdepend.c
+++ b/src/backend/catalog/pg_shdepend.c
@@ -1319,12 +1319,16 @@ shdepDropOwned(List *roleids, DropBehavior behavior)
 	elog(ERROR, "unexpected dependency type");
 	break;
 case SHARED_DEPENDENCY_ACL:
+	/* XXX need to lock+accumulate everything and drop later? */
 	RemoveRoleFromObjectACL(roleid,
 			sdepForm->classid,
 			sdepForm->objid);
 	break;
 case SHARED_DEPENDENCY_POLICY:
-	/* If unable to remove role from policy, remove policy. */
+	/*
+	 * Try to remove role from policy; if unable to, remove
+	 * policy.
+	 */
 	if (!RemoveRoleFromObjectPolicy(roleid,
 	sdepForm->classid,
 	sdepForm->objid))
@@ -1332,6 +1336,15 @@ shdepDropOwned(List *roleids, DropBehavior behavior)
 		obj.classId = sdepForm->classid;
 		obj.objectId = sdepForm->objid;
 		obj.objectSubId = sdepForm->objsubid;
+		/*
+		 * Acquire lock on object, then verify this dependency
+		 * is still relevant.  If not, the object might have
+		 * been dropped or the policy modified.  Ignore the
+		 * object in that case.
+		 */
+		AcquireDeletionLock(, 0);
+		if (!systable_recheck_tuple(scan, tuple))
+			break;
 		add_exact_object_address(, deleteobjs);
 	}
 	break;
@@ -1342,6 +1355,10 @@ shdepDropOwned(List *roleids, DropBehavior behavior)
 		obj.classId = sdepForm->classid;
 		obj.objectId = sdepForm->objid;
 		obj.objectSubId = sdepForm->objsubid;
+		/* as above */
+		AcquireDeletionLock(, 0);
+		if (!systable_recheck_tuple(scan, tuple))
+			break;
 		add_exact_object_address(, deleteobjs);
 	}
 	break;
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index 0cd6fcf027..fa9252b07c 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -142,6 +142,8 @@ typedef enum ObjectClass
 
 /* in dependency.c */
 
+extern void AcquireDeletionLock(const ObjectAddress *object, int flags);
+
 extern void performDeletion(const ObjectAddress *object,
 			DropBehavior behavior, int flags);
 


Re: making the backend's json parser work in frontend code

2020-01-23 Thread Robert Haas
On Wed, Jan 22, 2020 at 10:00 PM Mark Dilger
 wrote:
> Hopefully, this addresses Robert’s concern upthread about the filesystem name 
> not necessarily being in utf8 format, though I might be misunderstanding the 
> exact thrust of his concern.  I can think of other possible interpretations 
> of his concern as he expressed it, so I’ll wait for him to clarify.

No, that's not it. Suppose that Álvaro Herrera has some custom
settings he likes to put on all the PostgreSQL clusters that he uses,
so he creates a file álvaro.conf and uses an "include" directive in
postgresql.conf to suck in those settings. If he also likes UTF-8,
then the file name will be stored in the file system as a 12-byte
value of which the first two bytes will be 0xc3 0xa1. In that case,
everything will be fine, because JSON is supposed to always be UTF-8,
and the file name is UTF-8, and it's all good. But suppose he instead
likes LATIN-1. Then the file name will be stored as an 11-byte value
and the first byte will be 0xe1. The second byte, representing a
lower-case 'l', will be 0x6c. But we can't put a byte sequence that
goes 0xe1 0x6c into a JSON manifest stored as UTF-8, because that's
not valid in UTF-8.  UTF-8 requires that every byte from 0xc0-0xff be
followed by one or more bytes in the range 0x80-0xbf, and our
hypothetical file name that starts with 0xe1 0x6c does not meet that
criteria.

Now, you might say "well, why don't we just do an encoding
conversion?", but we can't. When the filesystem tells us what the file
names are, it does not tell us what encoding the person who created
those files had in mind. We don't know that they had *any* encoding in
mind. IIUC, a file in the data directory can have a name that consists
of any sequence of bytes whatsoever, so long as it doesn't contain
prohibited characters like a path separator or \0 byte. But only some
of those possible octet sequences can be stored in a manifest that has
to be valid UTF-8.

The degree to which there is a practical problem here is limited by
the fact that most filenames within the data directory are chosen by
the system, e.g. base/16384/16385, and those file names are only going
to contain ASCII characters (i.e. code points 0-127) and those are
valid in UTF-8 and lots of other encodings. Moreover, most people who
create additional files in the data directory will probably use ASCII
characters for those as well, at least if they are from an
English-speaking country, and if they're not, they're likely going to
use UTF-8, and then they'll still be fine. But there is no rule that
says people have to do that, and if somebody wants to use file names
based around SJIS or whatever, the backup manifest functionality
should not for that reason break.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: Fetching timeline during recovery

2020-01-23 Thread Jehan-Guillaume de Rorthais
On Tue, 07 Jan 2020 15:57:29 +0900 (JST)
Kyotaro Horiguchi  wrote:

> At Mon, 23 Dec 2019 15:38:16 +0100, Jehan-Guillaume de Rorthais
>  wrote in 
> >  1. we could decide to remove this filter to expose the data even when no
> > wal receiver is active. It's the same behavior than pg_stat_subscription
> > view. It could introduce regression from tools point of view, but adds some
> > useful information. I would vote 0 for it.  
> 
> A subscription exists since it is defined and regardless whether it is
> active or not. It is strange that we see a line in the view if
> replication is not configured. But it is reasonable to show if it is
> configured.  We could do that by checking PrimaryConnInfo. (I would
> vote +0.5 for it).

Thanks. I put this on hold for now, I'm waiting for some more opinons as
there's no strong position yet.

> >  2. we could extend it with new replayed lsn/tli fields. I would vote +1 for
> > it.  
> 
> +1. As of now a walsender lives for just one timeline, because it ends
> for disconnection from walsender when the master moves to a new
> timeline.  That being said, we already have the columns for TLI for
> both starting and received-up-to LSN so we would need it also for
> replayed LSN for a consistent looking.

I added applied_lsn and applied_tli to the pg_stat_get_wal_receiver function
output columns.

However, note that applying xlog is the responsibility of the startup process,
not the wal receiver one. Is it OK that pg_stat_get_wal_receiver
returns stats not directly related to the wal receiver?

> The function is going to show "streaming" but conninfo is not shown
> until connection establishes. That state is currently hidden by the
> PID filtering of the view. We might need to keep the WALRCV_STARTING
> state until connection establishes.

Indeed, fixed.

> sender_host and sender_port have bogus values until connection is
> actually established when conninfo is changed. They as well as
> conninfo should be hidden until connection is established, too, I
> think.

Fixed as well.

Please find the new version of the patch in attachment.

Thank you for your review!
>From d1e5d6c33e193626f05911462e66a6c96366bfa6 Mon Sep 17 00:00:00 2001
From: Jehan-Guillaume de Rorthais 
Date: Tue, 31 Dec 2019 18:29:13 +0100
Subject: [PATCH] Always expose available stats from wal receiver

Makes admin function pg_stat_get_wal_receiver() return available data
from WalRcv in shared memory, whatever the state of the wal receiver
process.

This allows supervision or HA tools to gather various physical
replication stats even when the wal receiver is stopped. For example,
the latest timeline the wal receiver was receiving before shutting
down.

The behavior of the pg_stat_wal_receiver view has been kept to avoid
regressions: it returns no row when the wal receiver is shut down.
---
 src/backend/replication/walreceiver.c  | 61 --
 src/include/catalog/pg_proc.dat|  6 +--
 src/test/recovery/t/004_timeline_switch.pl | 12 -
 3 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index a5e85d32f3..e4273b7f55 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -226,7 +226,6 @@ WalReceiverMain(void)
 	}
 	/* Advertise our PID so that the startup process can kill us */
 	walrcv->pid = MyProcPid;
-	walrcv->walRcvState = WALRCV_STREAMING;
 
 	/* Fetch information required to start streaming */
 	walrcv->ready_to_display = false;
@@ -295,6 +294,7 @@ WalReceiverMain(void)
 		strlcpy((char *) walrcv->sender_host, sender_host, NI_MAXHOST);
 
 	walrcv->sender_port = sender_port;
+	walrcv->walRcvState = WALRCV_STREAMING;
 	walrcv->ready_to_display = true;
 	SpinLockRelease(>mutex);
 
@@ -1368,6 +1368,8 @@ pg_stat_get_wal_receiver(PG_FUNCTION_ARGS)
 	TimeLineID	receive_start_tli;
 	XLogRecPtr	received_lsn;
 	TimeLineID	received_tli;
+	XLogRecPtr	applied_lsn;
+	TimeLineID	applied_tli;
 	TimestampTz last_send_time;
 	TimestampTz last_receipt_time;
 	XLogRecPtr	latest_end_lsn;
@@ -1379,6 +1381,7 @@ pg_stat_get_wal_receiver(PG_FUNCTION_ARGS)
 
 	/* Take a lock to ensure value consistency */
 	SpinLockAcquire(>mutex);
+	applied_lsn = GetXLogReplayRecPtr(_tli);
 	pid = (int) WalRcv->pid;
 	ready_to_display = WalRcv->ready_to_display;
 	state = WalRcv->walRcvState;
@@ -1396,13 +1399,6 @@ pg_stat_get_wal_receiver(PG_FUNCTION_ARGS)
 	strlcpy(conninfo, (char *) WalRcv->conninfo, sizeof(conninfo));
 	SpinLockRelease(>mutex);
 
-	/*
-	 * No WAL receiver (or not ready yet), just return a tuple with NULL
-	 * values
-	 */
-	if (pid == 0 || !ready_to_display)
-		PG_RETURN_NULL();
-
 	/* determine result type */
 	if (get_call_result_type(fcinfo, NULL, ) != TYPEFUNC_COMPOSITE)
 		elog(ERROR, "return type must be a row type");
@@ -1411,7 +1407,10 @@ pg_stat_get_wal_receiver(PG_FUNCTION_ARGS)
 	nulls = palloc0(sizeof(bool) * tupdesc->natts);
 
 	/* Fetch values 

Re: Allow to_date() and to_timestamp() to accept localized names

2020-01-23 Thread Alvaro Herrera
On 2020-Jan-23, Tom Lane wrote:

> That particular case could be improved by stopping at a dash ... but
> since this code is also used to match strings like "A.M.", we can't
> just exclude punctuation in general.  Breaking at whitespace seems
> like a reasonable compromise.

Yeah, and there are cases where dashes are used in names -- browsing
through glibc for example I quickly found Akan, for which the month names are:

mon  "Sanda-ppn";/
 "Kwakwar-gyefuo";/
 "Ebw-benem";/

and so on.  Even whitespace is problematic for some languages, such as Afan,

mon  "Qunxa Garablu";/
 "Naharsi Kudo";/
 "Ciggilta Kudo";/
(etc) but I think whitespace-splitting is going to be more comprehensible
in the vast majority of cases, even if not perfect.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services




Re: New feature proposal (trigger)

2020-01-23 Thread Pavel Stehule
čt 23. 1. 2020 v 17:26 odesílatel Sergiu Velescu 
napsal:

> Dear PgSQL-Hackers,
>
>
>
> I would like to propose a new feature which is missing in PgSQL but quite
> useful and nice to have (and exists in Oracle and probably in some other
> RDBMS), I speak about “Database Level” triggers: BeforePgStart,
> AfterPgStarted, OnLogin, OnSuccessfulLogin, BeforePGshutdown, OnLogOut – I
> just mentioned some of it but the final events could be different.
>

>
> These DB Level triggers are quite useful for example if somebogy want to
> set some PG env. variables depends on user belonging to one or another role
> or want to track who/wen logged in/out, start a stored procedure
> AfterPgStarted and so on.
>

Do you have some examples of these useful triggers?

I don't know any one.

Regards

Pavel


>
> Thanks!
>
> The information in this email is confidential and may be legally
> privileged. It is intended solely for the addressee. Any opinions expressed
> are mine and do not necessarily represent the opinions of the Company.
> Emails are susceptible to interference. If you are not the intended
> recipient, any disclosure, copying, distribution or any action taken or
> omitted to be taken in reliance on it, is strictly prohibited and may be
> unlawful. If you have received this message in error, do not open any
> attachments but please notify the Endava Service Desk on (+44 (0)870 423
> 0187), and delete this message from your system. The sender accepts no
> responsibility for information, errors or omissions in this email, or for
> its use or misuse, or for any act committed or omitted in connection with
> this communication. If in doubt, please verify the authenticity of the
> contents with the sender. Please rely on your own virus checkers as no
> responsibility is taken by the sender for any damage rising out of any bug
> or virus infection.
>
> Endava plc is a company registered in England under company number 5722669
> whose registered office is at 125 Old Broad Street, London, EC2N 1AR,
> United Kingdom. Endava plc is the Endava group holding company and does not
> provide any services to clients. Each of Endava plc and its subsidiaries is
> a separate legal entity and has no liability for another such entity's acts
> or omissions.
>


New feature proposal (trigger)

2020-01-23 Thread Sergiu Velescu
Dear PgSQL-Hackers,

I would like to propose a new feature which is missing in PgSQL but quite 
useful and nice to have (and exists in Oracle and probably in some other 
RDBMS), I speak about "Database Level" triggers: BeforePgStart, AfterPgStarted, 
OnLogin, OnSuccessfulLogin, BeforePGshutdown, OnLogOut - I just mentioned some 
of it but the final events could be different.

These DB Level triggers are quite useful for example if somebogy want to set 
some PG env. variables depends on user belonging to one or another role or want 
to track who/wen logged in/out, start a stored procedure AfterPgStarted and so 
on.

Thanks!

The information in this email is confidential and may be legally privileged. It 
is intended solely for the addressee. Any opinions expressed are mine and do 
not necessarily represent the opinions of the Company. Emails are susceptible 
to interference. If you are not the intended recipient, any disclosure, 
copying, distribution or any action taken or omitted to be taken in reliance on 
it, is strictly prohibited and may be unlawful. If you have received this 
message in error, do not open any attachments but please notify the Endava 
Service Desk on (+44 (0)870 423 0187), and delete this message from your 
system. The sender accepts no responsibility for information, errors or 
omissions in this email, or for its use or misuse, or for any act committed or 
omitted in connection with this communication. If in doubt, please verify the 
authenticity of the contents with the sender. Please rely on your own virus 
checkers as no responsibility is taken by the sender for any damage rising out 
of any bug or virus infection.

Endava plc is a company registered in England under company number 5722669 
whose registered office is at 125 Old Broad Street, London, EC2N 1AR, United 
Kingdom. Endava plc is the Endava group holding company and does not provide 
any services to clients. Each of Endava plc and its subsidiaries is a separate 
legal entity and has no liability for another such entity's acts or omissions.


Re: Allow to_date() and to_timestamp() to accept localized names

2020-01-23 Thread Tom Lane
Alvaro Herrera  writes:
> On 2020-Jan-22, Tom Lane wrote:
>> Arthur Zakirov  writes:
>>> Shouldn't we just show all remaining string instead of truncating it? 

>> That would avoid a bunch of arbitrary decisions, for sure.
>> Anybody have an objection?

> I think it would be clearer to search for whitespace starting at the
> start of the bogus token and stop there.  It might not be perfect,
> particularly if any language has whitespace in a month etc name (I don't
> know any example but I guess it's not impossible for it to exist;
> Portuguese weekday names maybe?), but it seems better than either of the
> behaviors shown above.

I'm okay with that.  It won't work so well for cases like
"1-Januzry-1999", but it's still better than what happens now:

regression=# select to_date('1-Januzry-1999', 'DD MONTH ');
ERROR:  invalid value "Januzry-1" for "MONTH"

That particular case could be improved by stopping at a dash ... but
since this code is also used to match strings like "A.M.", we can't
just exclude punctuation in general.  Breaking at whitespace seems
like a reasonable compromise.

regards, tom lane




Re: BUG #16059: Tab-completion of filenames in COPY commands removes required quotes

2020-01-23 Thread Tom Lane
Peter Eisentraut  writes:
> On 2020-01-14 02:38, Tom Lane wrote:
>> On reflection, it seems like the best bet for the moment is to
>> remove double-quote from the rl_completer_quote_characters string,
>> which should restore all behavior around double-quoted strings to
>> what it was before.  (We have to keep single-quote in that string,
>> though, or quoted file names fail entirely.)

> This patch (version 6) looks good to me.

Thanks for reviewing!  Pushed, now we'll see what the buildfarm thinks...

regards, tom lane




Re: Allow to_date() and to_timestamp() to accept localized names

2020-01-23 Thread Alvaro Herrera
On 2020-Jan-22, Tom Lane wrote:

> Arthur Zakirov  writes:
> > On 2020/01/23 7:11, Tom Lane wrote:
> >> Closer examination shows that the "max" argument is pretty bogus as
> >> well.  It doesn't do anything except confuse the reader, because there
> >> are no cases where the value passed is less than the maximum array entry
> >> length, so it never acts to change seq_search's behavior.  So we should
> >> just drop that behavior from seq_search, too, and redefine "max" as
> >> having no purpose except to specify how much of the string to show in
> >> error messages.  There's still a question of what that should be for
> >> non-English cases, but at least we now have a clear idea of what we
> >> need the value to do.
> 
> > Shouldn't we just show all remaining string instead of truncating it? 
> 
> That would avoid a bunch of arbitrary decisions, for sure.
> Anybody have an objection?

I think it would be clearer to search for whitespace starting at the
start of the bogus token and stop there.  It might not be perfect,
particularly if any language has whitespace in a month etc name (I don't
know any example but I guess it's not impossible for it to exist;
Portuguese weekday names maybe?), but it seems better than either of the
behaviors shown above.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services




Re: libxml2 is dropping xml2-config

2020-01-23 Thread Christoph Berg
Re: Tom Lane 2020-01-21 <6994.1579567...@sss.pgh.pa.us>
> > (Putting in support for pkg-config still makes sense, though.)
> 
> Perhaps.  Are there any platforms where libxml2 doesn't install a
> pkg-config file?  What are we supposed to do if there's no pkg-config?

I can't comment on the libxml2 part, but making pkg-config a hard
requirement to build PG would probably be a safe bet nowadays. (I'm
still not arguing that we should.)

Re: David Steele 2020-01-21 <95349047-31dd-c7dc-df17-b488c2d34...@pgmasters.net>
> Yes -- at least Ubuntu < 18.04 does not install pkg-config for libxml2. I
> have not checked Debian yet, but I imagine < 8 will have the same issue.

That is not true, I just verified that both 16.04 and 14.04 (already
EOL) have a working `pkg-config libxml-2.0 --libs`.

> Christoph, are you saying we perhaps won't need to make this change?

I'm saying that the Debian libxml2 maintainer shouldn't try to make
this change unilaterally without libxml2 upstream.

Christoph




Re: BUG #16059: Tab-completion of filenames in COPY commands removes required quotes

2020-01-23 Thread Peter Eisentraut

On 2020-01-14 02:38, Tom Lane wrote:

I wrote:

Peter Eisentraut  writes:

I have found a weird behavior with identifier quoting, which is not the
subject of this patch, but it appears to be affected by it.



I'll think about how to improve this.  Given that we're dequoting
filenames explicitly anyway, maybe we don't need to tell readline that
double-quote is a quoting character.  Another idea is that maybe
we ought to refactor things so that identifier dequoting and requoting
is handled explicitly, similarly to the way filenames work now.
That would make the patch a lot bigger though.


On reflection, it seems like the best bet for the moment is to
remove double-quote from the rl_completer_quote_characters string,
which should restore all behavior around double-quoted strings to
what it was before.  (We have to keep single-quote in that string,
though, or quoted file names fail entirely.)


This patch (version 6) looks good to me.

--
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services




Re: table partitioning and access privileges

2020-01-23 Thread Fujii Masao




On 2020/01/22 16:54, Amit Langote wrote:

Fujii-san,

Thanks for taking a look.

On Fri, Jan 10, 2020 at 10:29 AM Fujii Masao  wrote:

On Tue, Jan 7, 2020 at 5:15 PM Amit Langote  wrote:

I tend to agree that TRUNCATE's permission model for inheritance
should be consistent with that for the other commands.  How about the
attached patch toward that end?


Thanks for the patch!

The patch basically looks good to me.

+GRANT SELECT (f1, fz), UPDATE (fz) ON atestc TO regress_priv_user2;
+REVOKE TRUNCATE ON atestc FROM regress_priv_user2;

These seem not to be necessary for the test.


You're right.  Removed in the attached updated patch.


Thanks for updating the patch! Barring any objection,
I will commit this fix and backport it to all supported versions.


BTW, I found that LOCK TABLE on the parent table checks the permission
of its child tables. This also needs to be fixed (as a separate patch)?


Commit ac33c7e2c13 and a past discussion ([1], [2], resp.) appear to
disagree with that position, but I would like to agree with you
because the behavior you suggest would be consistent with other
commands.  So, I'm attaching a patch for that too, although it would
be better to hear more opinions before accepting it.


Yes. I'd like to hear more opinion about this. But
since the document explains "Inherited queries perform access
permission checks on the parent table only." in ddl.sgml,
that also seems a bug to fix...

Regards,

--
Fujii Masao
NTT DATA CORPORATION
Advanced Platform Technology Group
Research and Development Headquarters




can we use different function in place of atoi in vacuumdb.c file

2020-01-23 Thread Mahendra Singh Thalor
Hi all,
While reviewing one patch, I found that if we give any non-integer string
to atoi (say aa), then it is returning zero(0) as output so we are not
giving any error(assuming 0 as valid argument) and continuing our
operations.

Ex:
Let say, we gave "-P aa" (patch is in review[1]), then it will disable
parallel vacuum because atoi is returning zero as parallel degree but
ideally it should give error or at least it should not disable parallel
vacuum.

I think, in-place of atoi, we should use different function ( defGetInt32,
strtoint) or we can write own function.

Thoughts?

[1]:
https://www.postgresql.org/message-id/CA%2Bfd4k6DgwtQSr4%3DUeY%2BWbGuF7-oD%3Dm-ypHPy%2BsYHiXZc%2BhTUQ%40mail.gmail.com
-- 
Thanks and Regards
Mahendra Singh Thalor
EnterpriseDB: http://www.enterprisedb.com


Re: [HACKERS] Restricting maximum keep segments by repslots

2020-01-23 Thread Kyotaro Horiguchi
At Thu, 23 Jan 2020 21:28:54 +0900 (JST), Kyotaro Horiguchi 
 wrote in 
> > In the same function, I think that setting restBytes to -1 when
> > "useless" is bad style.  I would just leave that variable alone when the
> > returned status is not one that receives the number of bytes.  So the
> > caller is only entitled to read the value if the returned enum value is
> > such-and-such ("keeping" and "streaming" I think).
> 
> That is the only condition. If max_slot_wal_keep_size = -1, The value
> is useless for the two states.  I added that explanation to the
> comment of Get(Lsn)Walavailability().

The reply is bogus since restBytes is no longer a parameter of
GetWalAvailability following the next comment.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center




Re: [HACKERS] Restricting maximum keep segments by repslots

2020-01-23 Thread Kyotaro Horiguchi
Hello, Jehan.

At Wed, 22 Jan 2020 17:47:23 +0100, Jehan-Guillaume de Rorthais 
 wrote in 
> Hi,
> 
> First, it seems you did not reply to Alvaro's concerns in your new set of
> patch. See:
> 
> https://www.postgresql.org/message-id/20190917195800.GA16694%40alvherre.pgsql

Mmmm. Thank you very much for noticing that, Jehan, and sorry for
overlooking, Alvaro.


At Tue, 17 Sep 2019 16:58:00 -0300, Alvaro Herrera  
wrote in 
> suggest a substitute name, because the API itself doesn't convince me; I
> think it would be sufficient to have it return a single slot name,
> perhaps the one that is behind the most ... or maybe the one that is
> behind the least?  This simplifies a lot of code (in particular you do
> away with the bunch of statics, right?), and I don't think the warning
> messages loses anything, because for details the user should really look
> into the monitoring view anyway.

Ok, I removed the fannily-named function. The message become more or
less the following.  The DETAILS might not needed.

| WARNING:  2 replication slots have lost required WAL segments by 5 segments
| DETAIL:  Most affected slot is s1.

> I didn't like GetLsnAvailability() returning a string either.  It seems
> more reasonable to me to define a enum with possible return states, and
> have the enum value be expanded to some string in
> pg_get_replication_slots().

Agreed. Done.

> In the same function, I think that setting restBytes to -1 when
> "useless" is bad style.  I would just leave that variable alone when the
> returned status is not one that receives the number of bytes.  So the
> caller is only entitled to read the value if the returned enum value is
> such-and-such ("keeping" and "streaming" I think).

That is the only condition. If max_slot_wal_keep_size = -1, The value
is useless for the two states.  I added that explanation to the
comment of Get(Lsn)Walavailability().

> I'm somewhat uncomfortable with the API change to GetOldestKeepSegment
> in 0002.  Can't its caller do the math itself instead?

Mmm.  Finally I found that I merged two calculations that have scarce
relation. You're right here. Thanks.

The attached v18 addressed all of your (Alvaro's) comments.



> On Tue, 24 Dec 2019 21:26:14 +0900 (JST)
> Kyotaro Horiguchi  wrote:
> > If we assume "losing" segments as "lost", a segment once "lost" can
> > return to "keeping" or "streaming" state. That is intuitively
> > impossible. On the other hand if we assume it as "keeping", it should
> > not be removed by the next checkpoint but actually it can be
> > removed. The state "losing" means such a unstable state different from
> > both "lost" and "keeping".
> 
> OK, indeed.
> 
> But I'm still unconfortable with this "unstable" state. It would be better if
> we could grab a stable state: either "keeping" or "lost".

I feel the same, but the being-removed WAL segments remain until
checkpoint runs and even after removal replication can continue if
walsender is reading the removed-but-already-opened file.  I'll put
more thought on that.

> > In short, the "streaming/normal" state is useless if
> > max_slot_wal_keep_size < max_wal_size.
> 
> Good catch!

Thanks!:)

> > Finally I used the following wordings.
> > 
> > (there's no "inactive" wal_state)
> > 
> > * normal: required WAL within max_wal_size when max_slot_wal_keep_size
> >   is larger than max_wal_size.
> > * keeping: required segments are held by replication slots or
> >   wal_keep_segments.
> > 
> > * losing: required segments are about to be removed or may be already
> >   removed but streaming is not dead yet.
> 
> As I wrote, I'm still uncomfortable with this state. Maybe we should ask
> other reviewers opinions on this.
> 
> [...]
> > >   WARNING:  some replication slots have lost required WAL segments
> > >   DETAIL:  Slot slot_limit_st lost 177 segment(s)
> > > 
> > > I wonder if this is useful to show these messages for slots that were
> > > already dead before this checkpoint?  
> > 
> > Makes sense. I changed KeepLogSeg so that it emits the message only on
> > slot_names changes.
> 
> Thanks.
> 
> Bellow some code review.

Thank you for the review, I don't have a time right now but address
the below comments them soon.


> In regard with FindOldestXLogFileSegNo(...):
> 
> > /*
> >  * Return the oldest WAL segment file.
> >  *
> >  * The returned value is XLogGetLastRemovedSegno() + 1 when the function
> >  * returns a valid value.  Otherwise this function scans over WAL files and
> >  * finds the oldest segment at the first time, which could be very slow.
> >  */
> > XLogSegNo
> > FindOldestXLogFileSegNo(void)
> 
> The comment is not clear to me. I suppose "at the first time" might better be
> expressed as "if none has been removed since last startup"?
> 
> Moreover, what about patching XLogGetLastRemovedSegno() itself instead of
> adding a function?
> 
> In regard with GetLsnAvailability(...):
> 
> > /*
> >  * Detect availability of the record at given targetLSN.
> >  *
> >  * 

Re: Error message inconsistency

2020-01-23 Thread Mahendra Singh Thalor
On Thu, 23 Jan 2020 at 10:20, Amit Kapila  wrote:
>
> On Wed, Jan 22, 2020 at 8:48 PM Tom Lane  wrote:
> >
> > Alvaro Herrera  writes:
> > > I wonder if we shouldn't be using errtablecol() here instead of (in
> > > addition to?) patching the errmsg() to include the table name.
> >
> > > Discussion: If we really like having the table names in errtable(), then
> > > we should have psql display it by default, and other tools will follow
> > > suit; in that case we should remove the table name from error messages,
> > > or at least not add it to even more messages.
> >
> > > If we instead think that errtable() is there just for programmatically
> > > knowing the affected table, then we should add the table name to all
> > > errmsg() where relevant, as in the patch under discussion.
> >
> > > What do others think?
> >
> > I believe that the intended use of errtable() and friends is so that
> > applications don't have to parse those names out of a human-friendly
> > message.  We should add calls to them in cases where (a) an application
> > can tell from the SQLSTATE that some particular table is involved
> > and (b) it's likely that the app would wish to know which table that is.
> > I don't feel a need to sprinkle every single ereport() in the backend
> > with errtable(), just ones where there's a plausible use-case for the
> > extra cycles that will be spent.
> >
> > On the other side of the coin, whether we use errtable() is not directly
> > a factor in deciding what the human-friendly messages should say.
> > I do find it hard to envision a case where we'd want to use errtable()
> > and *not* put the table name in the error message, just because if
> > applications need to know something then humans probably do too.
> >
>
> makes sense.
>

Thanks all for reviewing and giving comments.

> > > Added relation name for this error.  This can be verified by below 
> > > example:
> > > Ex:
> > > CREATE TABLE list_parted (a int,b char)PARTITION BY LIST (a);
> > > CREATE TABLE part_1 (LIKE list_parted);
> > > INSERT INTO part_1 VALUES (3, 'a');
> > > ALTER TABLE list_parted ATTACH PARTITION part_1 FOR VALUES IN (2);
> > >
> > > Without patch:
> > > ERROR:  partition constraint is violated by some row
> > > With patch:
> > > ERROR:  partition constraint "part_1" is violated by some row
> >
> > Here it seems as if "part_1" is the constraint name.
> >
>
> I agree.
>
> >  It would be
> > better to change it to:
> >
> > partition constraint is violated by some row in relation "part_1" OR
> > partition constraint of relation "part_1" is violated b some row
> >
>
> +1 for the second option suggested by Beena.

I fixed above comment and updated expected .out files. Attaching
updated patches.

To make review simple, I made 3 patches as:

v4_0001_rationalize_constraint_error_messages.patch:
This patch has .c file changes. Added relation name in 6 error
messages for check constraint.

v4_0002_updated-regress-expected-.out-files.patch:
This patch has changes of expected .out files for regress test suite.

v4_0003_updated-constraints.source-file.patch:
This patch has changes of .source file for constraints.sql regress test.

Please review attached patches and let me know your comments.

-- 
Thanks and Regards
Mahendra Singh Thalor
EnterpriseDB: http://www.enterprisedb.com


v4_0001_rationalize_constraint_error_messages.patch
Description: Binary data


v4_0002_updated-regress-expected-.out-files.patch
Description: Binary data


v4_0003_updated-constraints.source-file.patch
Description: Binary data


Re: Online checksums patch - once again

2020-01-23 Thread Daniel Gustafsson
> On 22 Jan 2020, at 23:07, Robert Haas  wrote:
> 
> On Wed, Jan 22, 2020 at 3:28 PM Magnus Hagander  wrote:
>>> I think the argument about adding catalog flags adding overhead is
>>> pretty much bogus. Fixed-width fields in catalogs are pretty cheap.
>> 
>> If that's the general view, then yeah our "cost calculations" were
>> off. I guess I may have been colored by the cost of adding statistics
>> counters, and had that influence the thinking. Incorrect judgement on
>> that cost certainly contributed to the decision. back then.
> 
> For either statistics or for pg_class, the amount of data that we have
> to manage is proportional to the number of relations (which could be
> big) multiplied by the data stored for each relation. But the
> difference is that the stats file has to be rewritten, at least on a
> per-database basis, very frequently, while pg_class goes through
> shared-buffers and so doesn't provoke the same stupid
> write-the-whole-darn-thing behavior. That is a pretty key difference,
> IMHO.

I think the cost is less about performance and more about carrying around an
attribute which wont be terribly interesting during the cluster lifetime,
except for the transition. But, it's as you say probably a manageable expense.

A bigger question is how to handle the offline capabilities.  pg_checksums can
enable or disable checksums in an offline cluster, which will put the cluster
in a state where the pg_control file and the catalog don't match at startup.
One strategy could be to always trust the pg_control file and alter the catalog
accordingly, but that still leaves a window of inconsistent cluster state.

cheers ./daniel



Re: Parallel grouping sets

2020-01-23 Thread Amit Kapila
On Sun, Jan 19, 2020 at 2:23 PM Richard Guo  wrote:
>
> I realized that there are two patches in this thread that are
> implemented according to different methods, which causes confusion.
>

Both the idea seems to be different.  Is the second approach [1]
inferior for any case as compared to the first approach?  Can we keep
both approaches for parallel grouping sets, if so how?  If not, then
won't the code by the first approach be useless once we commit second
approach?


[1] - 
https://www.postgresql.org/message-id/CAN_9JTwtTTnxhbr5AHuqVcriz3HxvPpx1JWE--DCSdJYuHrLtA%40mail.gmail.com

-- 
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com




Re: Duplicate Workers entries in some EXPLAIN plans

2020-01-23 Thread Georgios Kokolatos
The following review has been posted through the commitfest application:
make installcheck-world:  not tested
Implements feature:   not tested
Spec compliant:   not tested
Documentation:not tested

> Ah, I see. I think I got that from ExplainPrintSettings. I've
> corrected my usage--thanks for pointing it out. I appreciate the
> effort to maintain a consistent style.

Thanks, I am just following the reviewing guide to be honest.

> Also, reviewing my code again, I noticed that when I moved the general
> worker output earlier, I missed part of the merge conflict:

Right. I thought that was intentional.

> which ignores the es->hide_workers flag (it did not fail the tests,
> but the intent is pretty clear). I've corrected this in the current
> patch.

Noted and appreciated.

> I also noticed that we can now handle worker buffer output more
> consistently across TEXT and structured formats, so I made that small
> change too:

Looks good.

> I think the "producing plan output for a worker" process is easier to
> reason about now, and while it changes TEXT format worker output
> order, the other changes in this patch are more drastic so this
> probably does not matter.
> 
> I've also addressed the other feedback above, and reworded a couple of
> comments slightly.

Thanks for the above. Looks clean, does what it says in the tin and solves a
problem that needs solving. All relevant installcheck-world pass. As far as I 
am concerned, I think it is ready to be sent to a committer. Updating the status
accordingly.

The new status of this patch is: Ready for Committer


Re: [HACKERS] Block level parallel vacuum

2020-01-23 Thread Mahendra Singh Thalor
On Wed, 22 Jan 2020 at 12:48, Masahiko Sawada
 wrote:
>
> On Wed, 22 Jan 2020 at 11:23, Amit Kapila  wrote:
> >
> > On Wed, Jan 22, 2020 at 7:14 AM Masahiko Sawada
> >  wrote:
> > >
> > > Thank you for updating the patch. Yeah MAXDEADTUPLES is better than
> > > what I did in the previous version patch.
> > >
> >
> > Would you like to resubmit your vacuumdb utility patch for this
> > enhancement?  I see some old version of it and it seems to me that you
> > need to update that patch.
> >
> > + if (optarg != NULL)
> > + {
> > + parallel_workers = atoi(optarg);
> > + if (parallel_workers <= 0)
> > + {
> > + pg_log_error("number of parallel workers must be at least 1");
> > + exit(1);
> > + }
> > + }
> >
> > This will no longer be true.
>
> Attached the updated version patch.
>

Thanks Sawada-san for the re-based patch.

I reviewed and tested this patch.  Patch looks good to me.

-- 
Thanks and Regards
Mahendra Singh Thalor
EnterpriseDB: http://www.enterprisedb.com




Re: Append with naive multiplexing of FDWs

2020-01-23 Thread Thomas Munro
On Thu, Jan 16, 2020 at 9:41 AM Bruce Momjian  wrote:
> On Tue, Jan 14, 2020 at 02:37:48PM +0500, Ahsan Hadi wrote:
> > Summary
> > The patch is pretty good, it works well when there were little data back to
> > the parent node. The patch doesn’t provide parallel FDW scan, it ensures
> > that child nodes can send data to parent in parallel but  the parent can 
> > only
> > sequennly process the data from data nodes.
> >
> > Providing there is no performance degrdation for non FDW append queries,
> > I would recomend to consider this patch as an interim soluton while we are
> > waiting for parallel FDW scan.
>
> Wow, these are very impressive results!

+1

Thanks Ahsan and Movead.  Could you please confirm which patch set you tested?




Re: Duplicate Workers entries in some EXPLAIN plans

2020-01-23 Thread Maciek Sakrejda
On Wed, Jan 22, 2020 at 9:37 AM Georgios Kokolatos  wrote:
> My bad, I should have been more clear. I meant that it is preferable to use
> the C99 standard which calls for declaring variables in the scope that you
> need them.

Ah, I see. I think I got that from ExplainPrintSettings. I've
corrected my usage--thanks for pointing it out. I appreciate the
effort to maintain a consistent style.

>
> >> int indent; /* current indentation level */
> >> List   *grouping_stack; /* format-specific grouping state */
> >> +   boolprint_workers;  /* whether current node has worker 
> >> metadata */
> >>
> >> Hmm.. commit  introduced `hide_workers` in the struct. Having 
> >> both
> >> names in the struct so far apart even seems a bit confusing and sloppy. Do 
> >> you
> >> think it would be possible to combine or rename?
> >
> >
> > I noticed that. I was thinking about combining them, but
> > "hide_workers" seems to be about "pretend there is no worker output
> > even if there is" and "print_workers" is "keep track of whether or not
> > there is worker output to print". Maybe I'll rename to
> > "has_worker_output"?
>
> The rename sounds a bit better in my humble opinion. Thanks.

Also, reviewing my code again, I noticed that when I moved the general
worker output earlier, I missed part of the merge conflict: I had
replaced

-   /* Show worker detail */
-   if (es->analyze && es->verbose && !es->hide_workers &&
-   planstate->worker_instrument)

with

+   /* Prepare worker general execution details */
+   if (es->analyze && es->verbose && planstate->worker_instrument)

which ignores the es->hide_workers flag (it did not fail the tests,
but the intent is pretty clear). I've corrected this in the current
patch.

I also noticed that we can now handle worker buffer output more
consistently across TEXT and structured formats, so I made that small
change too:

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 140d0be426..b23b015594 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1401,8 +1401,6 @@ ExplainNode(PlanState *planstate, List *ancestors,
appendStringInfo(es->str,

  "actual rows=%.0f loops=%.0f\n",

  rows, nloops);
-   if (es->buffers)
-   show_buffer_usage(es,
>bufusage);
}
else
{
@@ -1951,7 +1949,7 @@ ExplainNode(PlanState *planstate, List *ancestors,

/* Prepare worker buffer usage */
if (es->buffers && es->analyze && es->verbose && !es->hide_workers
-   && planstate->worker_instrument && es->format !=
EXPLAIN_FORMAT_TEXT)
+   && planstate->worker_instrument)
{
WorkerInstrumentation *w = planstate->worker_instrument;
int n;
diff --git a/src/test/regress/expected/explain.out
b/src/test/regress/expected/explain.out
index 8034a4e0db..a4eed3067f 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -103,8 +103,8 @@ $$, 'verbose', 'analyze', 'buffers', 'timing off',
'costs off', 'summary off'),
  Sort Key: (ROW("*VALUES*".column1))  +
  Buffers: shared hit=114  +
  Worker 0: actual rows=2 loops=1  +
-   Buffers: shared hit=114+
Sort Method: xxx   +
+   Buffers: shared hit=114+
  ->  Values Scan on "*VALUES*" (actual rows=2 loops=1)+
Output: "*VALUES*".column1, ROW("*VALUES*".column1)+
Worker 0: actual rows=2 loops=1+


I think the "producing plan output for a worker" process is easier to
reason about now, and while it changes TEXT format worker output
order, the other changes in this patch are more drastic so this
probably does not matter.

I've also addressed the other feedback above, and reworded a couple of
comments slightly.
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index d189b8d573..b4108f82ec 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -100,7 +100,7 @@ static void show_sortorder_options(StringInfo buf, Node *sortexpr,
    Oid sortOperator, Oid collation, bool nullsFirst);
 static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
 			 List *ancestors, ExplainState *es);
-static void show_sort_info(SortState *sortstate, ExplainState *es);
+static void show_sort_info(SortState *sortstate, StringInfo* worker_strs, ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
 static void show_tidbitmap_info(BitmapHeapScanState *planstate,
 

Re: progress report for ANALYZE

2020-01-23 Thread Michael Paquier
On Wed, Jan 22, 2020 at 03:06:52PM +0900, Amit Langote wrote:
> Oops, really attached this time.

Thanks, applied.  There were clearly two grammar mistakes in the first
patch sent by Justin.  And your suggestions look fine to me.  On top
of that, I have noticed that the indentation of the two tables in the
docs was rather inconsistent.
--
Michael


signature.asc
Description: PGP signature