Re: plenty code is confused about function level static

2024-04-18 Thread Heikki Linnakangas

On 18/04/2024 00:39, Andres Freund wrote:

Hi,

We have a fair amount of code that uses non-constant function level static
variables for read-only data. Which makes little sense - it prevents the
compiler from understanding

a) that the data is read only and can thus be put into a segment that's shared
between all invocations of the program
b) the data will be the same on every invocation, and thus from optimizing
based on that.

The most common example of this is that all our binaries use
   static struct option long_options[] = { ... };
which prevents long_options from being put into read-only memory.


Is there some reason we went for this pattern in a fair number of places? I
assume it's mostly copy-pasta, but...


In practice it often is useful to use 'static const' instead of just
'const'. At least gcc otherwise soemtimes fills the data on the stack, instead
of having a read-only data member that's already initialized. I'm not sure
why, tbh.


Weird. I guess it can be faster if you assume the data in the read-only 
section might not be in cache, but the few instructions needed to fill 
the data locally in stack are.



Attached are fixes for struct option and a few more occurrences I've found
with a bit of grepping.


+1

--
Heikki Linnakangas
Neon (https://neon.tech)





Re: Support a wildcard in backtrace_functions

2024-04-18 Thread Michael Paquier
On Fri, Apr 12, 2024 at 09:36:36AM +0900, Michael Paquier wrote:
> log_backtrace speaks a bit more to me as a name for this stuff because
> it logs a backtrace.  Now, there is consistency on HEAD as well
> because these GUCs are all prefixed with "backtrace_".  Would
> something like a backtrace_mode where we have an enum rather than a
> boolean be better?  One thing would be to redesign the existing GUC as
> having two values on HEAD as of:
> - "none", to log nothing.
> - "internal", to log backtraces for internal errors.
> 
> Then this could be extended with more modes, to discuss in future
> releases as new features.

As this is an open item, let's move on here.

Attached is a proposal of patch for this open item, switching
backtrace_on_internal_error to backtrace_mode with two values:
- "none", to log no backtraces.
- "internal", to log backtraces for internal errors.

The rest of the proposals had better happen as a v18 discussion, where
extending this GUC is a benefit.
--
Michael
From 348c3d45ddf4c29163c6963cb640c372fdbe72d5 Mon Sep 17 00:00:00 2001
From: Michael Paquier 
Date: Thu, 18 Apr 2024 15:57:10 +0900
Subject: [PATCH] backtrace_on_internal_error -> backtrace_mode

This currently supports two values:
- "none", to log no backtraces.
- "internal", to log a backtrace with an internal error.
---
 src/include/utils/elog.h|  6 ++
 src/include/utils/guc.h |  2 +-
 src/backend/utils/error/elog.c  |  2 +-
 src/backend/utils/misc/guc_tables.c | 33 +++--
 doc/src/sgml/config.sgml| 19 ++---
 src/tools/pgindent/typedefs.list|  1 +
 6 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h
index 054dd2bf62..b132f89e98 100644
--- a/src/include/utils/elog.h
+++ b/src/include/utils/elog.h
@@ -495,6 +495,12 @@ typedef enum
 	PGERROR_VERBOSE,			/* all the facts, ma'am */
 }			PGErrorVerbosity;
 
+typedef enum
+{
+	BACKTRACE_MODE_NONE,		/* no backtraces */
+	BACKTRACE_MODE_INTERNAL,	/* backtraces for internal error code */
+} BacktraceMode;
+
 extern PGDLLIMPORT int Log_error_verbosity;
 extern PGDLLIMPORT char *Log_line_prefix;
 extern PGDLLIMPORT int Log_destination;
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 8d1fe04078..53dd28222a 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -267,7 +267,7 @@ extern PGDLLIMPORT int log_temp_files;
 extern PGDLLIMPORT double log_statement_sample_rate;
 extern PGDLLIMPORT double log_xact_sample_rate;
 extern PGDLLIMPORT char *backtrace_functions;
-extern PGDLLIMPORT bool backtrace_on_internal_error;
+extern PGDLLIMPORT int backtrace_mode;
 
 extern PGDLLIMPORT int temp_file_limit;
 
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 605ff3b045..1c108b86ec 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -501,7 +501,7 @@ errfinish(const char *filename, int lineno, const char *funcname)
 		  backtrace_functions &&
 		  matches_backtrace_functions(edata->funcname)) ||
 		 (edata->sqlerrcode == ERRCODE_INTERNAL_ERROR &&
-		  backtrace_on_internal_error)))
+		  backtrace_mode == BACKTRACE_MODE_INTERNAL)))
 		set_backtrace(edata, 2);
 
 	/*
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index c68fdc008b..d5a9f3b73e 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -79,6 +79,7 @@
 #include "tsearch/ts_cache.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
+#include "utils/elog.h"
 #include "utils/float.h"
 #include "utils/guc_hooks.h"
 #include "utils/guc_tables.h"
@@ -117,6 +118,15 @@ extern bool optimize_bounded_sort;
  * NOTE! Option values may not contain double quotes!
  */
 
+static const struct config_enum_entry backtrace_mode_options[] = {
+	{"none", BACKTRACE_MODE_NONE, false},
+	{"internal", BACKTRACE_MODE_INTERNAL, false},
+	{NULL, 0, false}
+};
+
+StaticAssertDecl(lengthof(backtrace_mode_options) == (BACKTRACE_MODE_INTERNAL + 2),
+ "array length mismatch");
+
 static const struct config_enum_entry bytea_output_options[] = {
 	{"escape", BYTEA_OUTPUT_ESCAPE, false},
 	{"hex", BYTEA_OUTPUT_HEX, false},
@@ -531,7 +541,7 @@ int			log_temp_files = -1;
 double		log_statement_sample_rate = 1.0;
 double		log_xact_sample_rate = 0;
 char	   *backtrace_functions;
-bool		backtrace_on_internal_error = false;
+int			backtrace_mode = BACKTRACE_MODE_NONE;
 
 int			temp_file_limit = -1;
 
@@ -770,16 +780,6 @@ StaticAssertDecl(lengthof(config_type_names) == (PGC_ENUM + 1),
 
 struct config_bool ConfigureNamesBool[] =
 {
-	{
-		{"backtrace_on_internal_error", PGC_SUSET, DEVELOPER_OPTIONS,
-			gettext_noop("Log backtrace for any error with error code XX000 (internal error)."),
-			NULL,
-			GUC_NOT_IN_SAMPLE
-		},
-		&backtrace_on_internal_error,
-		false,
-		NULL, NULL, NULL
-	},
 	{
 		{"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD,
 		

Re: promotion related handling in pg_sync_replication_slots()

2024-04-18 Thread shveta malik
On Tue, Apr 16, 2024 at 2:52 PM Bertrand Drouvot
 wrote:
>
> Hi,
>
> On Tue, Apr 16, 2024 at 02:06:45PM +0530, shveta malik wrote:
> > On Tue, Apr 16, 2024 at 1:55 PM Zhijie Hou (Fujitsu)
> >  wrote:
> > > I personally feel adding the additional check for sync_replication_slots 
> > > may
> > > not improve the situation here. Because the GUC sync_replication_slots can
> > > change at any point, the GUC could be false when performing this addition 
> > > check
> > > and is set to true immediately after the check, so It could not simplify 
> > > the logic
> > > anyway.
> >
> > +1.
> > I feel doc and "cannot synchronize replication slots concurrently"
> > check should suffice.
> >
> > In the scenario which Hou-San pointed out,  if after performing the
> > GUC check in SQL function, this GUC is enabled immediately and say
> > worker is started sooner than the function could get chance to sync,
> > in that case as well, SQL function will ultimately get error "cannot
> > synchronize replication slots concurrently", even though GUC is
> > enabled.  Thus, I feel we should stick with samer error in all
> > scenarios.
>
> Okay, fine by me, let's forget about checking sync_replication_slots then.

Thanks.

While reviewing and testing this patch further, we encountered 2
race-conditions which needs to be handled:

1) For slot sync worker, the order of cleanup execution was a) first
reset 'syncing' flag (slotsync_failure_callback) b) then reset pid and
syncing (slotsync_worker_onexit). But in ShutDownSlotSync(), we rely
only on the 'syncing' flag for wait-exit logic. So it may so happen
that in the window between these two callbacks, ShutDownSlotSync()
proceeds and calls update_synced_slots_inactive_since() which may then
hit assert  Assert((SlotSyncCtx->pid == InvalidPid).

2) Another problem as described by Hou-San off-list:
When the slotsync worker error out after acquiring a slot, it will
first call slotsync_worker_onexit() and then
ReplicationSlotShmemExit(), so in the window between these two
callbacks, it's possible that the SlotSyncCtx->syncing
SlotSyncCtx->pid has been reset but the slot->active_pid is still
valid. The Assert will be broken in this.
@@ -1471,6 +1503,9 @@ update_synced_slots_inactive_since(void)
{
Assert(SlotIsLogical(s));

+   /* The slot must not be acquired by any process */
+   Assert(s->active_pid == 0);
+


To fix above issues, these changes have been made in v7:
1) For worker, replaced slotsync_failure_callback() with
slotsync_worker_disconnect() so that the latter only disconnects and
thus slotsync_worker_onexit() does pid cleanup followed by syncing
flag cleanup. This will make ShutDownSlotSync()'s wait exit reliably.

2) To fix second problem, changes are:

2.1) For worker, moved slotsync_worker_onexit() registration before
BaseInit() (BaseInit is the one doing ReplicationSlotShmemExit
registration). If we do this change in order of registration, then
order of cleanup for worker will be a) slotsync_worker_disconnect() b)
ReplicationSlotShmemExit() c) slotsync_worker_onexit(). This order
ensures that the worker is actually done with slots release and
cleanup before it marks itself as done syncing.

2.2) For SQL function, did ReplicationSlotRelease() and
ReplicationSlotCleanup() as first step in slotsync_failure_callback().

While doing change 2.2, it occurred to us, that it would be a clean
solution to do ReplicationSlotCleanup() even on successful execution
of SQL function. It seems better that the temporary slots are
cleaned-up when SQL function exists, as we do not know when the user
will run this SQL function again and thus leaving temp slots for
longer does not seem a good idea. Thoughts?

thanks
Shveta


v7-0001-Handle-stopSignaled-during-sync-function-call.patch
Description: Binary data


Re: brininsert optimization opportunity

2024-04-18 Thread Michael Paquier
On Thu, Feb 29, 2024 at 01:20:39PM +0100, Alvaro Herrera wrote:
> I think it's not an issue, or rather that we should not try to guess.
> Instead make it a simple rule: if aminsert is called, then
> aminsertcleanup must be called afterwards, period.
> 
> I agree it would be nice to have a way to verify, but it doesn't seem
> 100% essential.  After all, it's not very common to add new calls to
> aminsert.

This thread is listed as an open item.  What's the follow-up plan?
The last email of this thread is dated as of the 29th of February,
which was 6 weeks ago.
--
Michael


signature.asc
Description: PGP signature


Re: BitmapHeapScan streaming read user and prelim refactoring

2024-04-18 Thread Michael Paquier
On Sun, Apr 07, 2024 at 10:54:56AM -0400, Melanie Plageman wrote:
> I've added an open item [1], because what's one open item when you can
> have two? (me)

And this is still an open item as of today.  What's the plan to move
forward here?
--
Michael


signature.asc
Description: PGP signature


Re: improve performance of pg_dump --binary-upgrade

2024-04-18 Thread Daniel Gustafsson
> On 18 Apr 2024, at 06:17, Nathan Bossart  wrote:

> The attached work-in-progress patch speeds up 'pg_dump --binary-upgrade'
> for this case.  Instead of executing the query in every call to the
> function, we can execute it once during the first call and store all the
> required information in a sorted array that we can bsearch() in future
> calls.

That does indeed seem like a saner approach.  Since we look up the relkind we
can also remove the is_index parameter to binary_upgrade_set_pg_class_oids
since we already know that without the caller telling us?

> One downside of this approach is the memory usage.

I'm not too worried about the worst-case performance of this.

> This was more-or-less
> the first approach that crossed my mind, so I wouldn't be surprised if
> there's a better way.  I tried to keep the pg_dump output the same, but if
> that isn't important, maybe we could dump all the pg_class OIDs at once
> instead of calling binary_upgrade_set_pg_class_oids() for each one.

Without changing the backend handling of the Oid's we can't really do that
AFAICT, the backend stores the Oid for the next call so it needs to be per
relation like now?

For Greenplum we moved this to the backend by first dumping all Oids which were
read into backend cache, and during relation creation the Oid to use was looked
up in the backend.  (This wasn't a performance change, it was to allow multiple
shared-nothing clusters to have a unified view of Oids, so I never benchmarked
it all that well.) The upside of that is that the magic Oid variables in the
backend can be removed, but it obviously adds slight overhead in others.

--
Daniel Gustafsson





Re: Retiring is_pushed_down

2024-04-18 Thread Richard Guo
Here is another rebase over 3af7040985.  Nothing else has changed.

Thanks
Richard


v4-0001-Retiring-is_pushed_down.patch
Description: Binary data


Re: fix tablespace handling in pg_combinebackup

2024-04-18 Thread Michael Paquier
On Wed, Apr 17, 2024 at 02:50:21PM -0700, Andres Freund wrote:
> On 2024-04-17 16:16:55 -0400, Robert Haas wrote:
>> In the "Differential code coverage between 16 and HEAD" thread, Andres
>> pointed out that there wasn't test case coverage for
>> pg_combinebackup's code to handle files in tablespaces. I looked at
>> adding that, and as nobody could possibly have predicted, found a bug.
> 
> Ha ;)

Note: open_item_counter++
--
Michael


signature.asc
Description: PGP signature


Re: plenty code is confused about function level static

2024-04-18 Thread Peter Eisentraut

On 17.04.24 23:39, Andres Freund wrote:

Is there some reason we went for this pattern in a fair number of places? I
assume it's mostly copy-pasta, but...


Right.  I don't think it is commonly understood that adding const 
qualifiers can help compiler optimization, and it's difficult to 
systematically check for omissions or verify the optimization effects. 
So I think we just have to keep trying to do our best manually for now.



Attached are fixes for struct option and a few more occurrences I've found
with a bit of grepping.


These look good to me.





Re: plenty code is confused about function level static

2024-04-18 Thread Andrey M. Borodin



> On 18 Apr 2024, at 02:39, Andres Freund  wrote:
> 
> There are lots of places that could benefit from adding 'static
> const'.

+1 for helping compiler.
GCC has a -Wsuggest-attribute=const, we can count these warnings and threat 
increase as an error :)


Best regards, Andrey Borodin.



Re: Cannot find a working 64-bit integer type on Illumos

2024-04-18 Thread Peter Eisentraut

On 18.04.24 02:31, Thomas Munro wrote:

On Sat, Mar 23, 2024 at 3:23 PM Tom Lane  wrote:

Thomas Munro  writes:

. o O ( int64_t, PRIdi64, etc were standardised a quarter of a century ago )


Yeah.  Now that we require C99 it's probably reasonable to assume
that those things exist.  I wouldn't be in favor of ripping out our
existing notations like UINT64CONST, because the code churn would be
substantial and the gain minimal.  But we could imagine reimplementing
that stuff atop  and then getting rid of the configure-time
probes.


I played around with this a bit, but am not quite there yet.


Looks promising.


printf() is a little tricky.  The standard wants us to use
's PRId64 etc, but that might confuse our snprintf.c (in
theory, probably not in practice).  "ll" should have the right size on
all systems, but gets warnings from the printf format string checker
on systems where "l" is the right type.


I'm not sure I understand the problem here.  Do you mean that in theory 
a platform's PRId64 could be something other than "l" or "ll"?



For limits, why do we have this:

- * stdint.h limits aren't guaranteed to have compatible types with our fixed
- * width types. So just define our own.

?  I mean, how could they not have compatible types?


Maybe this means something like our int64 is long long int but the 
system's int64_t is long int underneath, but I don't see how that would 
matter for the limit macros.



I noticed that configure.ac checks if int64 (no "_t") might be defined
already by system header pollution, but meson.build doesn't.  That's
an inconsistency that should be fixed, but which way?  Hmm, commit
15abc7788e6 said that was done for BeOS, which we de-supported.  So
maybe we should get rid of that?


I had a vague recollection that it was for AIX, but the commit indeed 
mentions BeOS.  Could be removed in either case.






Re: Support a wildcard in backtrace_functions

2024-04-18 Thread Peter Eisentraut

On 18.04.24 09:02, Michael Paquier wrote:

On Fri, Apr 12, 2024 at 09:36:36AM +0900, Michael Paquier wrote:

log_backtrace speaks a bit more to me as a name for this stuff because
it logs a backtrace.  Now, there is consistency on HEAD as well
because these GUCs are all prefixed with "backtrace_".  Would
something like a backtrace_mode where we have an enum rather than a
boolean be better?  One thing would be to redesign the existing GUC as
having two values on HEAD as of:
- "none", to log nothing.
- "internal", to log backtraces for internal errors.

Then this could be extended with more modes, to discuss in future
releases as new features.


As this is an open item, let's move on here.

Attached is a proposal of patch for this open item, switching
backtrace_on_internal_error to backtrace_mode with two values:
- "none", to log no backtraces.
- "internal", to log backtraces for internal errors.

The rest of the proposals had better happen as a v18 discussion, where
extending this GUC is a benefit.


Why exactly is this an open item?  Is there anything wrong with the 
existing feature?






Re: pgsql: meson: Add initial version of meson based build system

2024-04-18 Thread Christoph Berg
Re: Andres Freund
> > This commit broke VPATH builds when the original source directory
> > contains symlinks.
> 
> I.e. a symlink to the source directory, not a symlink inside the source
> directory.

Yes.

> Argh, this is missing spaces around the '=', leading to the branch always
> being entered.

Glad I found at least something :)

> Uh, I don't think it does? Afaict this failure is entirely unrelated to 'touch
> meson.build'?  From what I can tell the problem is that config/prep_buildtree
> is invoked with the symlinked path, and that that doesn't seem to work:

Apparently I messed up both the git bisect run and manually
confirm the problem later. Trying again now, the problem has been
existing at least since 2002, probably earlier.

I've been using this directory layout for years, apparently so far
I've always only used non-VPATH builds or dpkg-buildpackage, which
probably canonicalizes the path before building, given it works.

Since no one else has been complaining, it might not be worth fixing.

Sorry for the noise!

Christoph




Re: POC: GROUP BY optimization

2024-04-18 Thread Andrei Lepikhov

On 4/12/24 06:44, Tom Lane wrote:

* Speaking of pathnodes.h, PathKeyInfo is a pretty uninformative node
type name, and the comments provided for it are not going to educate
anybody.  What is the "association" between the pathkeys and clauses?
I guess the clauses are supposed to be SortGroupClauses, but not all
pathkeys match up to SortGroupClauses, so what then?  This is very
underspecified, and fuzzy thinking about data structures tends to lead
to bugs.
I'm not the best in English documentation and naming style. So, feel 
free to check my version.


So I'm quite afraid that there are still bugs lurking here.
What's more, given that the plans this patch makes apparently
seldom win when you don't put a thumb on the scales, it might
take a very long time to isolate those bugs.  If the patch
produces a faulty plan (e.g. not sorted properly) we'd never
notice if that plan isn't chosen, and even if it is chosen
it probably wouldn't produce anything as obviously wrong as
a crash.

I added checkings on the proper order of pathkeys and clauses.
 If you really care about that, we should spend additional time and 
rewrite the code to generate an order of clauses somewhere in the plan 
creation phase. For example, during the create_group_plan call, we could 
use the processed_groupClause list, cycle through subpath->pathkeys, set 
the order, and detect whether the pathkeys list corresponds to the 
group-by or is enough to build a grouping plan.

Anyway, make this part of code more resistant to blunders is another story.

--
regards,
Andrei Lepikhov
Postgres Professional
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index aa80f6486c..9c079270ec 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -461,7 +461,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
 /*
  * pathkeys_are_duplicate
  *		Check if give pathkeys are already contained the list of
- *		PathKeyInfo's.
+ *		GroupByOrdering's.
  */
 static bool
 pathkeys_are_duplicate(List *infos, List *pathkeys)
@@ -470,7 +470,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 
 	foreach(lc, infos)
 	{
-		PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
+		GroupByOrdering *info = lfirst_node(GroupByOrdering, lc);
 
 		if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
 			return true;
@@ -482,7 +482,7 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
  * get_useful_group_keys_orderings
  *		Determine which orderings of GROUP BY keys are potentially interesting.
  *
- * Returns a list of PathKeyInfo items, each representing an interesting
+ * Returns a list of GroupByOrdering items, each representing an interesting
  * ordering of GROUP BY keys.  Each item stores pathkeys and clauses in the
  * matching order.
  *
@@ -495,15 +495,15 @@ pathkeys_are_duplicate(List *infos, List *pathkeys)
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 {
-	Query		   *parse = root->parse;
-	List		   *infos = NIL;
-	PathKeyInfo	   *info;
+	Query			   *parse = root->parse;
+	List			   *infos = NIL;
+	GroupByOrdering	   *info;
 
 	List	   *pathkeys = root->group_pathkeys;
 	List	   *clauses = root->processed_groupClause;
 
 	/* always return at least the original pathkeys/clauses */
-	info = makeNode(PathKeyInfo);
+	info = makeNode(GroupByOrdering);
 	info->pathkeys = pathkeys;
 	info->clauses = clauses;
 	infos = lappend(infos, info);
@@ -539,7 +539,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -564,7 +564,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 			(enable_incremental_sort || n == list_length(root->sort_pathkeys)) &&
 			!pathkeys_are_duplicate(infos, pathkeys))
 		{
-			info = makeNode(PathKeyInfo);
+			info = makeNode(GroupByOrdering);
 			info->pathkeys = pathkeys;
 			info->clauses = clauses;
 
@@ -574,18 +574,29 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 
 #ifdef USE_ASSERT_CHECKING
 	{
-		PathKeyInfo	   *pinfo = linitial_node(PathKeyInfo, infos);
+		GroupByOrdering	   *pinfo = linitial_node(GroupByOrdering, infos);
 		ListCell	   *lc;
 
 		/* Test consistency of info structures */
 		for_each_from(lc, infos, 1)
 		{
-			info = lfirst_node(PathKeyInfo, lc);
+			ListCell *lc1, *lc2;
+
+			info = lfirst_node(GroupByOrdering, lc);
 
 			Assert(list_length(info->clauses) == list_length(pinfo->clauses));
 			Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
 			Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
 			Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
+
+			forboth(lc1, info->clauses, lc2, info->pathkeys)
+			{
+SortGroupClause *sgc =

Re: plenty code is confused about function level static

2024-04-18 Thread Peter Eisentraut

On 18.04.24 10:43, Andrey M. Borodin wrote:

On 18 Apr 2024, at 02:39, Andres Freund  wrote:

There are lots of places that could benefit from adding 'static
const'.


+1 for helping compiler.
GCC has a -Wsuggest-attribute=const, we can count these warnings and threat 
increase as an error :)


This is different.  It's an attribute, not a qualifier, and it's for 
functions, not variables.  But it could undoubtedly also have a 
performance benefit.






Re: brininsert optimization opportunity

2024-04-18 Thread Tomas Vondra
On 4/18/24 09:07, Michael Paquier wrote:
> On Thu, Feb 29, 2024 at 01:20:39PM +0100, Alvaro Herrera wrote:
>> I think it's not an issue, or rather that we should not try to guess.
>> Instead make it a simple rule: if aminsert is called, then
>> aminsertcleanup must be called afterwards, period.
>>
>> I agree it would be nice to have a way to verify, but it doesn't seem
>> 100% essential.  After all, it's not very common to add new calls to
>> aminsert.
> 
> This thread is listed as an open item.  What's the follow-up plan?
> The last email of this thread is dated as of the 29th of February,
> which was 6 weeks ago.

Apologies, I got distracted by the other patches. The bug is still
there, I believe the patch shared by Alvaro in [1] is the right way to
deal with it. I'll take care of that today/tomorrow.


[1]
https://www.postgresql.org/message-id/202401091043.e3nrqiad6gb7@alvherre.pgsql

regards

-- 
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: Disallow changing slot's failover option in transaction block

2024-04-18 Thread shveta malik
On Thu, Apr 18, 2024 at 11:40 AM Hayato Kuroda (Fujitsu)
 wrote:
>
> Dear Shveta,
>
> Sorry for delay response. I missed your post.
>
> > +1.
> >
> > Similar to ALTER SUB, CREATE SUB also needs to be fixed. This is
> > because we call alter_replication_slot in CREATE SUB as well, for the
> > case when slot_name is provided and create_slot=false. But the tricky
> > part is we call alter_replication_slot() when creating subscription
> > for both failover=true and false. That means if we want to fix it on
> > the similar line of ALTER SUB, we have to disallow user from executing
> > the CREATE SUBSCRIPTION (slot_name = xx) in a txn block, which seems
> > to break some existing use cases. (previously, user can execute such a
> > command inside a txn block).
> >
> > So, we need to think if there are better ways to fix it.  After
> > discussion with Hou-San offlist, here are some ideas:
> > 1. do not alter replication slot's failover option when CREATE
> > SUBSCRIPTION   WITH failover=false. This means we alter replication
> > slot only when failover is set to true. And thus we can disallow
> > CREATE SUB WITH (slot_name =xx, failover=true, create_slot=false)
> > inside a txn block.
> >
> > This option allows user to run CREATE-SUB(create_slot=false) with
> > failover=false in txn block like earlier. But on the downside, it
> > makes the behavior inconsistent for otherwise simpler option like
> > failover,  i.e. with failover=true, CREATE SUB is not allowed in txn
> > block while with failover=false, it is allowed. It makes it slightly
> > complex to be understood by user.
> >
> > 2. let's not disallow CREATE SUB in txn block as earlier, just don't
> > perform internal alter-failover during CREATE SUB for existing
> > slots(create_slot=false, slot_name=xx)  i.e. when create_slot is
> > false, we will ignore failover parameter of CREATE SUB and it is
> > user's responsibility to set it appropriately using ALTER SUB cmd. For
> > create_slot=true, the behaviour of CREATE-SUB remains same as earlier.
> >
> > This option does not add new restriction for CREATE SUB wrt txn block.
> > In context of failover with create_slot=false, we already have a
> > similar restriction (documented one) for ALTER SUB, i.e. with 'ALTER
> > SUBSCRIPTION SET(slot_name = new)', user needs to alter the new slot's
> > failover by himself. CREAT SUB can also be documented in similar way.
> > This seems simpler to be understood considering existing ALTER SUB's
> > behavior as well. Plus, this will make CREATE-SUB code slightly
> > simpler and thus easily manageable.
> >
> > 3. add a alter_slot option for CREATE SUBSCRIPTION, we can only alter
> > the  slot's failover if alter_slot=true. And so we can disallow CREATE
> > SUB WITH (slot_name =xx, alter_slot=true) inside a txn block.
> >
> > This seems a clean way, as everything will be as per user's consent
> > based on alter_slot parameter. But on the downside, this will need
> > introducing additional parameter and also adding new restriction of
> > running CREATE-sub in txn  block for a specific case.
> >
> > 4. Don't alter replication in subscription DDLs. Instead, try to alter
> > replication slot's failover in the apply worker. This means we need to
> > execute alter_replication_slot each time before starting streaming in
> > the apply worker.
> >
> > This does not seem appealing to execute alter_replication_slot
> > everytime the apply worker starts. But if others think it as a better
> > option, it can be further analyzed.
>
> Thanks for describing, I also prefer 2, because it seems bit strange that
> CREATE statement leads ALTER.

Thanks for feedback.

> > Currently, our preference is option 2, as that looks a clean solution
> > and also aligns with ALTER-SUB behavior which is already documented.
> > Thoughts?
> >
> > 
> > Note that we could not refer to the design of two_phase here, because
> > two_phase can be considered as a streaming option, so it's fine to
> > change the two_phase along with START_REPLICATION command. (the
> > two_phase is not changed in subscription DDLs, but get changed in
> > START_REPLICATION command). But the failover is closely related to a
> > replication slot itself.
> > 

Sorry for causing confusion. This is not the statement which is
documented one, this was an additional note to support our analysis.

> Sorry, I cannot find statements. Where did you refer?

When I said that option 2 aligns with ALTER-SUB documented behaviour,
I meant the doc described in [1] stating "When altering the slot_name,
the failover and two_phase property values of the named slot may
differ from the counterpart failover and two_phase parameters
specified in the subscription"

[1]: 
https://www.postgresql.org/docs/devel/sql-altersubscription.html#SQL-ALTERSUBSCRIPTION-PARAMS-SET

thanks
Shveta




Re: Combine headerscheck and cpluspluscheck scripts

2024-04-18 Thread Peter Eisentraut

On 16.04.24 17:17, Anton Voloshin wrote:

On 10/03/2024 12:03, Peter Eisentraut wrote:

Committed, thanks.


This commit (7b8e2ae2f) have turned cpluspluscheck script into a 
--cplusplus option for headerscheck.  I propose to update the 
src/tools/pginclude/README correspondingly, please see the attached patch.


Fixed, thanks!





Re: BitmapHeapScan streaming read user and prelim refactoring

2024-04-18 Thread Tomas Vondra
On 4/18/24 09:10, Michael Paquier wrote:
> On Sun, Apr 07, 2024 at 10:54:56AM -0400, Melanie Plageman wrote:
>> I've added an open item [1], because what's one open item when you can
>> have two? (me)
> 
> And this is still an open item as of today.  What's the plan to move
> forward here?

AFAIK the plan is to replace the asserts with actually resetting the
rs_empty_tuples_pending field to 0, as suggested by Melanie a week ago.
I assume she was busy with the post-freeze AM reworks last week, so this
was on a back burner.


regards

-- 
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company




Re: Support a wildcard in backtrace_functions

2024-04-18 Thread Jelte Fennema-Nio
On Thu, 18 Apr 2024 at 10:50, Peter Eisentraut  wrote:
> Why exactly is this an open item?  Is there anything wrong with the
> existing feature?

The name of the GUC backtrace_on_internal_error is so specific that
it's impossible to extend our backtrace behaviour in future releases
without adding yet another backtrace GUC. You started the discussion
on renaming it upthread:

On Fri, 8 Mar 2024 at 15:51, Peter Eisentraut  wrote:
> What is the relationship of these changes with the recently added
> backtrace_on_internal_error?  We had similar discussions there, I feel
> like we are doing similar things here but slightly differently.  Like,
> shouldn't backtrace_functions_min_level also affect
> backtrace_on_internal_error?  Don't you really just want
> backtrace_on_any_error?  You are sneaking that in through the backdoor
> via backtrace_functions.  Can we somehow combine all these use cases
> more elegantly?  backtrace_on_error = {all|internal|none}?




Re: WIP Incremental JSON Parser

2024-04-18 Thread Andrew Dunstan


On 2024-04-18 Th 02:04, Michael Paquier wrote:

On Wed, Apr 10, 2024 at 07:47:38AM -0400, Andrew Dunstan wrote:

Here's v2 of the cleanup patch 4, that fixes some more typos kindly pointed
out to me by Alexander Lakhin.

I can see that this has been applied as of 42fa4b660143 with some
extra commits.

Anyway, I have noticed another thing in the surroundings that's
annoying.  003 has this logic:
useFile::Temp  qw(tempfile);
[...]
my ($fh, $fname) = tempfile();

print $fh $stdout,"\n";

close($fh);

This creates a temporary file in /tmp/ that remains around, slowing
bloating the temporary file space on a node while leaving around some
data.



My bad, I should have used the UNLINK option like in the other tests.




  Why usingFile::Temp::tempfile  here?  Couldn't you just use a
file in a PostgreSQL::Test::Utils::tempdir() that would be cleaned up
once the test finishes?



That's another possibility, but I think the above is the simplest.




Per [1], escape_json() has no coverage outside its default path.  Is
that intended?



Not particularly. I'll add some stuff to get complete coverage.




Documenting all these test files with a few comments would be welcome,
as well, with some copyright notices...



ok




 json_file = fopen(testfile, "r");
 fstat(fileno(json_file), &statbuf);
 bytes_left = statbuf.st_size;

No checks on failure of fstat() here?



ok will fix




 json_file = fopen(argv[2], "r");

Second one in test_json_parser_perf.c, with more stuff for fread().



ok will fix

cheers

andrew

--
Andrew Dunstan
EDB:https://www.enterprisedb.com


Re: Support a wildcard in backtrace_functions

2024-04-18 Thread Jelte Fennema-Nio
On Thu, 18 Apr 2024 at 09:02, Michael Paquier  wrote:
>
> On Fri, Apr 12, 2024 at 09:36:36AM +0900, Michael Paquier wrote:
> > log_backtrace speaks a bit more to me as a name for this stuff because
> > it logs a backtrace.  Now, there is consistency on HEAD as well
> > because these GUCs are all prefixed with "backtrace_".  Would
> > something like a backtrace_mode where we have an enum rather than a
> > boolean be better?

I guess it depends what we want consistency with. If we want naming
consistency with all other LOGGING_WHAT GUCs or if we want naming
consistency with the current backtrace_functions GUC. I personally
like log_backtrace slightly better, but I don't have a super strong
opinion on this either. Another option could be plain "backtrace".

> > One thing would be to redesign the existing GUC as
> > having two values on HEAD as of:
> > - "none", to log nothing.
> > - "internal", to log backtraces for internal errors.

If we go for backtrace_mode or backtrace, then I think I'd prefer
"disabled"/"off" and "internal_error" for these values.


> The rest of the proposals had better happen as a v18 discussion, where
> extending this GUC is a benefit.

agreed




Re: Add SPLIT PARTITION/MERGE PARTITIONS commands

2024-04-18 Thread Alexander Korotkov
Hi, Dmitry!

On Mon, Apr 15, 2024 at 6:26 PM Dmitry Koval  wrote:
>
> Hi!
>
> > Please, find a my version of this fix attached.
>
> Is it possible to make a small addition to the file v6-0001 ... .patch
> (see attachment)?
>
> Most important:
> 1) Line 19:
>
> + mergePartName = makeRangeVar(cmd->name->schemaname, tmpRelName, -1);
>
> (temporary table should use the same schema as the partition);
>
> 2) Lines 116-123:
>
> +RESET search_path;
> +
> +-- Can't merge persistent partitions into a temporary partition
> +ALTER TABLE t MERGE PARTITIONS (tp_0_1, tp_1_2) INTO pg_temp.tp_0_2;
> +
> +SET search_path = pg_temp, public;
>
> (Alexandr Lakhin's test for using of pg_temp schema explicitly).
>
>
> The rest of the changes in v6_afterfix.diff are not very important and
> can be ignored.

Thank you.  I've integrated your changes.

The revised patchset is attached.
1) I've split the fix for the CommandCounterIncrement() issue and the
fix for relation persistence issue into a separate patch.
2) I've validated that the lock on the new partition is held in
createPartitionTable() after ProcessUtility() as pointed out by
Robert.  So, no need to place the lock again.
3) Added fix for problematic error message as a separate patch [1].
4) Added rename "salemans" => "salesmen" for tests as a separate patch.

I think these fixes are reaching committable shape, but I'd like
someone to check it before I push.

Links.
1. 
https://postgr.es/m/20240408.152402.1485994009160660141.horikyota.ntt%40gmail.com

--
Regards,
Alexander Korotkov


v6-0002-Verify-persistence-of-new-partitions-during-MERGE.patch
Description: Binary data


v6-0004-Grammar-fix-for-tests-of-partition-MERGE-SPLIT-op.patch
Description: Binary data


v6-0003-Fix-error-message-in-check_partition_bounds_for_s.patch
Description: Binary data


v6-0001-Add-missing-CommandCounterIncrement-to-ATExecMerg.patch
Description: Binary data


Re: Cutting support for OpenSSL 1.0.1 and 1.0.2 in 17~?

2024-04-18 Thread Peter Eisentraut

On 16.04.24 10:17, Daniel Gustafsson wrote:

I forgot (and didn't check) that we backpatched 01e6f1a842f4, with that in mind
I agree that we should backpatch 0003 as well to put LibreSSL on par as much as
we can.  0004 is a fix for the LibreSSL support, not adding anything new, so
pushing that to master now makes sense.  Unless objections are raised I'll push
0001, 0003 and 0004 shortly.  0002 and 0005 can hopefully be addressed in the
July commitfest.


Review of the latest batch:

* v9-0001-Doc-Use-past-tense-for-things-which-happened-in-t.patch

Ok

8 v9-0002-Remove-support-for-OpenSSL-1.0.2.patch

Ok, but maybe make the punctuation consistent here:

+  # Function introduced in OpenSSL 1.0.2, not in LibreSSL.
+  ['SSL_CTX_set_cert_cb'],
+
+  # Function introduced in OpenSSL 1.1.1, not in LibreSSL
   ['X509_get_signature_info'],

* v9-0003-Support-disallowing-SSL-renegotiation-in-LibreSSL.patch

ok

* v9-0004-Support-SSL_R_VERSION_TOO_LOW-on-LibreSSL.patch

Seems ok, but the reason isn't clear to me.  Are there LibreSSL versions 
that have SSL_R_VERSION_TOO_LOW but not SSL_R_VERSION_TOO_HIGH?  Maybe 
this could be explained better.


Also, "OpenSSL 7.2" in the commit message probably meant "OpenBSD"?

* v9-0005-Remove-pg_strong_random-initialization.patch

I don't understand the reason for this phrase in the commit message: 
"1.1.1 is being increasingly phased out from production use".  Did you 
mean 1.1.0 there?


Conditionally sticking the RAND_poll() into pg_strong_random(), does 
that have the effect we want?  It wouldn't reinitialize after a fork, 
AFAICT.



If everything is addressed, I agree that 0001, 0003, and 0004 can go 
into PG17, the rest later.






Re: POC: GROUP BY optimization

2024-04-18 Thread Alexander Korotkov
Hi, Andrei!

On Thu, Apr 18, 2024 at 11:54 AM Andrei Lepikhov
 wrote:
> On 4/12/24 06:44, Tom Lane wrote:
> > * Speaking of pathnodes.h, PathKeyInfo is a pretty uninformative node
> > type name, and the comments provided for it are not going to educate
> > anybody.  What is the "association" between the pathkeys and clauses?
> > I guess the clauses are supposed to be SortGroupClauses, but not all
> > pathkeys match up to SortGroupClauses, so what then?  This is very
> > underspecified, and fuzzy thinking about data structures tends to lead
> > to bugs.
> I'm not the best in English documentation and naming style. So, feel
> free to check my version.
> >
> > So I'm quite afraid that there are still bugs lurking here.
> > What's more, given that the plans this patch makes apparently
> > seldom win when you don't put a thumb on the scales, it might
> > take a very long time to isolate those bugs.  If the patch
> > produces a faulty plan (e.g. not sorted properly) we'd never
> > notice if that plan isn't chosen, and even if it is chosen
> > it probably wouldn't produce anything as obviously wrong as
> > a crash.
> I added checkings on the proper order of pathkeys and clauses.
>   If you really care about that, we should spend additional time and
> rewrite the code to generate an order of clauses somewhere in the plan
> creation phase. For example, during the create_group_plan call, we could
> use the processed_groupClause list, cycle through subpath->pathkeys, set
> the order, and detect whether the pathkeys list corresponds to the
> group-by or is enough to build a grouping plan.
> Anyway, make this part of code more resistant to blunders is another story.

Thank you for the fixes you've proposed.  I didn't look much into
details yet, but I think the main concern Tom expressed in [1] is
whether the feature is reasonable at all.  I think at this stage the
most important thing is to come up with convincing examples showing
how huge performance benefits it could cause.  I will return to this
later today and will try to provide some convincing examples.

Links
1. https://www.postgresql.org/message-id/266850.1712879082%40sss.pgh.pa.us

--
Regards,
Alexander Korotkov




Re: AIX support

2024-04-18 Thread Sriram RK
Thanks Noah and Team,

We (IBM-AIX team) looked into this issue

https://www.postgresql.org/message-id/20240225194322...@rfd.leadboat.com

This is related to the compiler issue. The compilers xlc(13.1) and gcc(8.0) 
have issues. But we verified that this issue is resolved with the newer 
compiler versions openXL(xlc17.1) and gcc(12.0) onwards.

We reported this issue to the xlc team and they have noted this issue. A fix 
might be possible in May for this issue in xlc v16.  We would like to 
understand if the community can start using the latest compilers to build the 
source.

Also as part of the support, we will help in fixing all the issues related to 
AIX and continue to support AIX for Postgres. If we need another CI environment 
we can work to make one available. But for time being can we start reverting 
the patch that has removed AIX support.

We want to make a note that postgres is used extensively in our IBM product and 
is being exploited by multiple customers.

Please let us know if there are any specific details you'd like to discuss 
further.

Regards,
Sriram.


Re: Transparent column encryption

2024-04-18 Thread Peter Eisentraut

On 10.04.24 16:14, Jelte Fennema-Nio wrote:

(The CEK can't be rotated easily, since
that would require reading out all the data from a table/column and
reencrypting it.  We could/should add some custom tooling for that,
but it wouldn't be a routine operation.)


This seems like something that requires some more thought because CEK
rotation seems just as important as CMK rotation (often both would be
compromised at the same time).


Hopefully, the reason for key rotation is mainly that policies require 
key rotation, not that keys get compromised all the time.  That's the 
reason for having this two-tier key system in the first place.  This 
seems pretty standard to me.  For example, I can change the password on 
my laptop's file system encryption, which somehow wraps a lower-level 
key, but I can't reencrypt the actual file system in place.



+The plaintext inside
+the ciphertext is always in text format, but this is invisible to the
+protocol.

It seems like it would be useful to have a way of storing the
plaintext in binary form too. I'm not saying this should be part of
the initial version, but it would be good to keep that in mind with
the design.


Two problems here: One, for deterministic encryption, everyone needs to 
agree on the representation, otherwise equality comparisons won't work. 
 Two, if you give clients the option of storing text or binary, then 
clients also get back a mix of text or binary, and it will be a mess.
Just giving the option of storing the payload in binary wouldn't be that 
hard, but it's not clear what you can sensibly do with that in the end.



+ The session-specific identifier of the key.

Is it necessary for this identifier to be session-specific? Why not
use a global identifier like an oid? Anything session specific makes
the job of transaction poolers quite a bit harder. If this identifier
would be global, then the message can be forwarded as is to the client
instead of re-mapping identifiers between clients and servers (like is
needed for prepared statements).


The point was just to avoid saying specifically that the OID will be 
sent, because then that would tie the catalog representation to the 
protocol, which seems unnecessary.  Maybe we can reword that somehow.


In terms of connection pooling, this feature as it is conceived right 
now would only work in session pooling anyway.  Even if the identifiers 
somehow were global (but OIDs can also change and are not guaranteed 
unique forever), the state of which keys have already been sent is 
session state.



+   Additional algorithms may be added to this protocol specification without a
+   change in the protocol version number.

What's the reason for not requiring a version bump for this?


This is kind of like SASL or TLS can add new methods dynamically without 
requiring a new version.  I mean, as we are learning, making new 
protocol versions is kind of hard, so the point was to avoid it.



+  If the protocol extension _pq_.column_encryption is
+  enabled (see ), then
+  there is also the following for each parameter:

It seems a little bit wasteful to include these for all columns, even
the ones that don't require encryption. How about only adding these
fields when format code is 0x11


I guess you could do that, but wouldn't that making the decoding of 
these messages much more complicated?  You would first have to read the 
"short" variant, decode the format, and then decide to read the rest. 
Seems weird.



Finally, I'm trying to figure out if this really needs to be a
protocol extension or if a protocol version bump would work as well
without introducing a lot of work for clients/poolers that don't care
about it (possibly with some modifications to the proposed protocol
changes).


That's not something this patch cares about, but the philosophical 
discussions in the other thread on protocol versioning etc. appear to 
lean toward protocol extension.



What makes this a bit difficult for me is that there's not
much written in the documentation on what is supposed to happen for
encrypted columns when the protocol extension is not enabled. Is the
content just returned/written like it would be with a bytea?


Yes, that's what would happen, and that's the intention, so that for 
example you can use pg_dump to back up encrypted columns without having 
to decrypt them.



A related question to this is that currently libpq throws an error if
e.g. a master key realm is not defined but another one is. Is that
really what we want? Is not having one of the realms really that
different from not providing any realms at all?


Can you provide a more concrete example of what scenario you have a 
concern about?






Re: AIX support

2024-04-18 Thread Heikki Linnakangas
On 18 April 2024 14:15:43 GMT+03:00, Sriram RK  wrote:
>Thanks Noah and Team,
>
>We (IBM-AIX team) looked into this issue
>
>https://www.postgresql.org/message-id/20240225194322...@rfd.leadboat.com
>
>This is related to the compiler issue. The compilers xlc(13.1) and gcc(8.0) 
>have issues. But we verified that this issue is resolved with the newer 
>compiler versions openXL(xlc17.1) and gcc(12.0) onwards.
>
>We reported this issue to the xlc team and they have noted this issue. A fix 
>might be possible in May for this issue in xlc v16.  We would like to 
>understand if the community can start using the latest compilers to build the 
>source.
>
>Also as part of the support, we will help in fixing all the issues related to 
>AIX and continue to support AIX for Postgres. If we need another CI 
>environment we can work to make one available. But for time being can we start 
>reverting the patch that has removed AIX support.

Let's start by setting up a new AIX buildfarm member. Regardless of what we do 
with v17, we continue to support AIX on the stable branches, and we really need 
a buildfarm member to keep the stable branches working anyway.

>We want to make a note that postgres is used extensively in our IBM product 
>and is being exploited by multiple customers.

Noted. I'm glad to hear you are interested to put in some effort for this. The 
situation from the current maintainers is that none of us have much interest, 
or resources or knowledge to keep the AIX build working, so we'll definitely 
need the help.

No promises on v17, but let's at least make sure the stable branches keep 
working. And with the patches and buildfarm support from you, maybe v17 is 
feasible too.


- Heikki




Re: AIX support

2024-04-18 Thread Sriram RK

> Let's start by setting up a new AIX buildfarm member. Regardless of what we 
> do with v17, we continue to support AIX on the stable branches, and we really 
> need a buildfarm member to keep the stable branches working anyway.

Thanks Heikki. We had already build the source code(v17+ bcdfa5f2e2f) on our 
local nodes. We will try to setup the buildfarm and let you know.
Is there any specific configuration we are looking for?

Regards,
Sriram.


Re: [PATCH] Replace magic constant 3 with NUM_MERGE_MATCH_KINDS

2024-04-18 Thread Aleksander Alekseev
Hi,

> I guess the argument against inserting an enum element at the end is
> that a switch statement on the enum value might generate a compiler
> warning if it didn't have a default clause.

Fair point. PFA the alternative version of the patch.

-- 
Best regards,
Aleksander Alekseev


v2-0001-Replace-constant-3-with-NUM_MERGE_MATCH_KINDS.patch
Description: Binary data


Re: promotion related handling in pg_sync_replication_slots()

2024-04-18 Thread shveta malik
On Thu, Apr 18, 2024 at 12:35 PM shveta malik  wrote:
>
> To fix above issues, these changes have been made in v7:

Please find v8 attached. Changes are:

1) It fixes ShutDownSlotSync() issue, where we perform
kill(SlotSyncCtx->pid). There are chances that after we release
spin-lock and before we perform kill, slot-sync worker has error-ed
out and has set SlotSyncCtx->pid to InvalidPid (-1) already. And thus
kill(-1) could result in abnormal process kills on some platforms.
Now, we get pid under spin-lock and then use it to perform kill to
avoid pid=-1 kill. This is on a similar line of how ShutdownWalRcv()
does it.

2) Improved comments in code.

3) Updated commit message with new fixes. I had missed to update it in
the previous version.

thanks
Shveta


v8-0001-Handle-stopSignaled-during-sync-function-call.patch
Description: Binary data


Re: plenty code is confused about function level static

2024-04-18 Thread Ranier Vilela
 On 18/04/2024 00:39, Andres Freund wrote:

>We have a fair amount of code that uses non-constant function level static
>variables for read-only data. Which makes little sense - it prevents the
>compiler from understanding

>a) that the data is read only and can thus be put into a segment that's
shared
>between all invocations of the program
>b) the data will be the same on every invocation, and thus from optimizing
>based on that.

>The most common example of this is that all our binaries use
>static struct option long_options[] = { ... };
>which prevents long_options from being put into read-only memory.

+1 static const allows the compiler to make additional optimizations.

>There are lots of places that could benefit from adding 'static
>const'.

I found a few more places.

Patch 004

The opposite would also help, adding static.
In these places, I believe it is safe to add static,
allowing the compiler to transform into read-only, definitively.

Patch 005

best regards,

Ranier Vilela


0004-static-const-convert-plpython.patch
Description: Binary data


0005-const-convert-static-const.patch
Description: Binary data


Re: Trigger violates foreign key constraint

2024-04-18 Thread Aleksander Alekseev
Hi,

> Laurenz Albe  writes:
> > Patch v3 is attached.
>
> I agree with documenting this hazard, but I think it'd be better
> to do so in the "Triggers" chapter.  There is no hazard unless
> you are writing user-defined triggers, which is surely far fewer
> people than use foreign keys.  So I suggest something like the
> attached.

I don't mind changing the chapter, but I prefer the wording chosen in
v3. The explanation in v4 is somewhat hard to follow IMO.

-- 
Best regards,
Aleksander Alekseev




Re: Solaris tar issues, or other reason why margay fails 010_pg_basebackup?

2024-04-18 Thread Marcel Hofstetter



Thank you tom.

SKIP_READLINE_TESTS works. margay is now green again.

Best regards,
Marcel


Am 17.04.2024 um 21:12 schrieb Tom Lane:

Thomas Munro  writes:

This test suite is passing on pollock because it doesn't have IO::Pty
installed.  Could you try uninstalling that perl package for now, so
we can see what breaks next?


If that's inconvenient for some reason, you could also skip the
tab-completion test by setting SKIP_READLINE_TESTS in the
animal's build_env options.

regards, tom lane






Re: fix tablespace handling in pg_combinebackup

2024-04-18 Thread Robert Haas
On Wed, Apr 17, 2024 at 5:50 PM Andres Freund  wrote:
> > +If there are tablespace present in the backup, include tablespace_map as
> > +a keyword parameter whose values is a hash. When tar_program is used, the
> > +hash keys are tablespace OIDs; otherwise, they are the tablespace pathnames
> > +used in the backup. In either case, the values are the tablespace pathnames
> > +that should be used for the target cluster.
>
> Where would one get these oids?

You pretty much have to pick them out of the tar file names. It sucks,
but it's not this patch's fault. That's just how pg_basebackup works.
If you do a directory format backup, you can use -T to relocate
tablespaces on the fly, using the pathnames from the origin server.
That's a weird convention, and we probably should have based on the
tablespace names and not exposed the server pathnames to the client at
all, but we didn't. But it's still better than what happens when you
do a tar-format backup. In that case you just get a bunch of $OID.tar
files. No trace of the server pathnames remains, and the only way you
could learn the tablespace names is if you rooted through whatever
file contains the contents of the pg_tablespace system catalog. So
you've just got a bunch of OID-named things and it's all up to you to
figure out which one is which and what to put in the tablespace_map
file. I'd call this terrible UI design, but I think it's closer to
absence of UI design.

I wonder if we (as a project) would consider a patch that redesigned
this whole mechanism. Produce ${TABLESPACE_NAME}.tar in tar-format,
instead of ${OID}.tar. In directory-format, relocate via
-T${TABLESPACE_NAME}=${DIR} instead of -T${SERVERDIR}=${DIR}. That
would be a significant compatibility break, and you'd somehow need to
solve the problem of what to put in the tablespace_map file, which
requires OIDs. But it seems like if you could finesse that issue in
some elegant way, the result would just be a heck of a lot more usable
than what we have today.

> Could some of this be simplified by using allow_in_place_tablespaces instead?
> Looks like it'd simplify at least the extended test somewhat?

I don't think we can afford to assume that allow_in_place_tablespaces
doesn't change the behavior. I said (at least off-list) when that
feature was introduced that there was no way it was going to remain an
isolated development hack, and I think that's proved to be 100%
correct. We keep needing code to support it in more places, and I
expect that to continue. Probably we're going to need to start testing
everything both ways, which I think was a pretty predictable result of
introducing it in the first place.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: documentation structure

2024-04-18 Thread jian he
On Thu, Apr 18, 2024 at 2:37 AM Dagfinn Ilmari Mannsåker
 wrote:
>
> Andres Freund  writes:
>
> > Hi,
> >
> > On 2024-04-17 12:07:24 +0100, Dagfinn Ilmari Mannsåker wrote:
> >> Andres Freund  writes:
> >> > I think the manual work for writing signatures in sgml is not 
> >> > insignificant,
> >> > nor is the volume of sgml for them. Manually maintaining the signatures 
> >> > makes
> >> > it impractical to significantly improve the presentation - which I don't 
> >> > think
> >> > is all that great today.
> >>
> >> And it's very inconsistent.  For example, some functions use 
> >> tags for optional parameters, others use square brackets, and some use
> >> VARIADIC to indicate variadic parameters, others use
> >> ellipses (sometimes in  tags or brackets).
> >
> > That seems almost inevitably the outcome of many people having to manually
> > infer the recommended semantics, for writing something boring but 
> > nontrivial,
> > from a 30k line file.
>
> As Corey mentioned elsethread, having a markup style guide (maybe a
> comment at the top of the file?) would be nice.
>
> >> > And the lack of argument names in the pg_proc entries is occasionally 
> >> > fairly
> >> > annoying, because a \df+ doesn't provide enough information to use 
> >> > functions.
> >>
> >> I was also annoyed by this the other day (specifically wrt. the boolean
> >> arguments to pg_ls_dir),
> >
> > My bane is regexp_match et al, I have given up on remembering the argument
> > order.
>
> There's a thread elsewhere about those specifically, but I can't be
> bothered to find the link right now.
>
> >> and started whipping up a Perl script to parse func.sgml and generate
> >> missing proargnames values for pg_proc.dat, which is how I discovered the
> >> above.
> >
> > Nice.
> >
> >> The script currently has a pile of hacky regexes to cope with that,
> >> so I'd be happy to submit a doc patch to turn it into actual markup to get
> >> rid of that, if people think that's a worhtwhile use of time and won't 
> >> clash
> >> with any other plans for the documentation.
> >
> > I guess it's a bit hard to say without knowing how voluminious the changes
> > would be. If we end up rewriting the whole file the tradeoff is less clear
> > than if it's a dozen inconsistent entries.
>
> It turned out to not be that many that used [] for optional parameters,
> see the attached patch.
>

hi.
I manually checked the html output. It looks good to me.




Re: cataloguing NOT NULL constraints

2024-04-18 Thread Alvaro Herrera
On 2024-Jan-25, Andrew Bille wrote:

> Starting from b0e96f31, pg_upgrade fails with inherited NOT NULL constraint:
> For example upgrade from 9c13b6814a (or REL_12_STABLE .. REL_16_STABLE) to
> b0e96f31 (or master) with following two tables (excerpt from
> src/test/regress/sql/rules.sql)
> 
> create table test_0 (id serial primary key);
> create table test_1 (id integer primary key) inherits (test_0);

I have pushed a fix which should hopefully fix this problem
(d9f686a72e).  Please give this a look.  Thanks for reporting the issue.

-- 
Álvaro HerreraBreisgau, Deutschland  —  https://www.EnterpriseDB.com/
"I apologize for the confusion in my previous responses.
 There appears to be an error."   (ChatGPT)




Re: pg17 issues with not-null contraints

2024-04-18 Thread Alvaro Herrera
On 2024-Apr-15, Justin Pryzby wrote:

> Here's a couple more issues affecting upgrades from v16 to v17.
> 
> postgres=# CREATE TABLE a(i int NOT NULL); CREATE TABLE b(i int PRIMARY KEY) 
> INHERITS (a);
> pg_restore: error: could not execute query: ERROR:  constraint 
> "pgdump_throwaway_notnull_0" of relation "b" does not exist
> 
> postgres=# CREATE TABLE a(i int CONSTRAINT a NOT NULL PRIMARY KEY); CREATE 
> TABLE b()INHERITS(a); ALTER TABLE b ADD CONSTRAINT pkb PRIMARY KEY (i);
> pg_restore: error: could not execute query: ERROR:  cannot drop inherited 
> constraint "pgdump_throwaway_notnull_0" of relation "b"

I pushed a fix now, and it should also cover these two issues, which
required only minor changes over what I posted yesterday.  Also, thank
you for pointing out that the patch also fixed Andrew's problem.  It
did, except there was a locking problem which required an additional
tweak.

Thanks for reporting these.

-- 
Álvaro HerreraBreisgau, Deutschland  —  https://www.EnterpriseDB.com/
"At least to kernel hackers, who really are human, despite occasional
rumors to the contrary" (LWN.net)




Re: pg_combinebackup does not detect missing files

2024-04-18 Thread Robert Haas
On Wed, Apr 17, 2024 at 7:09 PM David Steele  wrote:
> I think here:
>
> +   pg_basebackup only attempts to verify
>
> you mean:
>
> +   pg_combinebackup only attempts to verify
>
> Otherwise this looks good to me.

Good catch, thanks. Committed with that change.

> Fair enough. I accept that your reasoning is not random, but I'm still
> not very satisfied that the user needs to run a separate and rather
> expensive process to do the verification when pg_combinebackup already
> has the necessary information at hand. My guess is that most users will
> elect to skip verification.

I think you're probably right that a lot of people will skip it; I'm
just less convinced than you are that it's a bad thing. It's not a
*great* thing if people skip it, but restore time is actually just
about the worst time to find out that you have a problem with your
backups. I think users would be better served by verifying stored
backups periodically when they *don't* need to restore them. Also,
saying that we have all of the information that we need to do the
verification is only partially true:

- we do have to parse the manifest anyway, but we don't have to
compute checksums anyway, and I think that cost can be significant
even for CRC-32C and much more significant for any of the SHA variants

- we don't need to read all of the files in all of the backups. if
there's a newer full, the corresponding file in older backups, whether
full or incremental, need not be read

- incremental files other than the most recent only need to be read to
the extent that we need their data; if some of the same blocks have
been changed again, we can economize

How much you save because of these effects is pretty variable. Best
case, you have a 2-backup chain with no manifest checksums, and all
verification will have to do that you wouldn't otherwise need to do is
walk each older directory tree in toto and cross-check which files
exist against the manifest. That's probably cheap enough that nobody
would be too fussed. Worst case, you have a 10-backup (or whatever)
chain with SHA512 checksums and, say, a 50% turnover rate. In that
case, I think having verification happen automatically could be a
pretty major hit, both in terms of I/O and CPU. If your database is
1TB, it's ~5.5TB of read I/O (because one 1TB full backup and 9 0.5TB
incrementals) instead of ~1TB of read I/O, plus the checksumming.

Now, obviously you can still feel that it's totally worth it, or that
someone in that situation shouldn't even be using incremental backups,
and it's a value judgement, so fair enough. But my guess is that the
efforts that this implementation makes to minimize the amount of I/O
required for a restore are going to be important for a lot of people.

> At least now they'll have the information they need to make an informed
> choice.

Right.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: improve performance of pg_dump --binary-upgrade

2024-04-18 Thread Nathan Bossart
On Thu, Apr 18, 2024 at 02:08:28AM -0400, Corey Huinker wrote:
> Bar-napkin math tells me in a worst-case architecture and braindead byte
> alignment, we'd burn 64 bytes per struct, so the 100K tables cited would be
> about 6.25MB of memory.

That doesn't seem too terrible.

> The obvious low-memory alternative would be to make a prepared statement,
> though that does nothing to cut down on the roundtrips.
> 
> I think this is a good trade off.

Cool.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




clang's sanitizer makes stringToNode() extremely slow

2024-04-18 Thread Alexander Lakhin

Hello hackers,

When using a server built with clang (15, 18) with sanitizers enabled,
the last query in the following script:
SET parallel_setup_cost = 0;
SET min_parallel_table_scan_size = 0;

SELECT a::text INTO t FROM generate_series(1, 1000) a;
\timing on
SELECT string_agg(a, ',') FROM t WHERE a = REPEAT('0', 40);

runs for more than a minute on my workstation (a larger repeat count gives
much longer duration):
Time: 66017.594 ms (01:06.018)

The typical stack trace for a running parallel worker is:
#0  0x559a23671885 in __sanitizer::internal_strlen(char const*) ()
#1  0x559a236568ed in StrtolFixAndCheck(void*, char const*, char**, char*, 
int) ()
#2  0x559a236423dc in __interceptor_strtol ()
#3  0x559a240027d9 in atoi (...) at readfuncs.c:629
#5  0x559a23fb03f0 in _readConst () at readfuncs.c:275
#6  parseNodeString () at ./readfuncs.switch.c:29
#7  0x559a23faa421 in nodeRead (
    token=0x7fee75cf3bd2 "{CONST :consttype 25 :consttypmod -1 :constcollid 100 :constlen -1 :constbyval false 
:constisnull false :location -1 :constvalue 44 [ 16 106 24 0 48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 48 48"...,

    tok_len=1) at read.c:338
#8  0x559a23faa916 in nodeRead (...) at read.c:452
#9  0x559a23fb3f34 in _readOpExpr () at ./readfuncs.funcs.c:279
#10 0x559a23fb0842 in parseNodeString () at ./readfuncs.switch.c:47
#11 0x559a23faa421 in nodeRead (...) at read.c:338
#12 0x559a23faa916 in nodeRead (...) at read.c:452
#13 0x559a23fefb74 in _readSeqScan () at ./readfuncs.funcs.c:3954
#14 0x559a23fb2b97 in parseNodeString () at ./readfuncs.switch.c:559
#15 0x559a23faa421 in nodeRead (...) at read.c:338
#16 0x559a23ffc033 in _readAgg () at ./readfuncs.funcs.c:4685
#17 0x559a23fb2dd3 in parseNodeString () at ./readfuncs.switch.c:611
#18 0x559a23faa421 in nodeRead (...) at read.c:338
#19 0x559a23feb340 in _readPlannedStmt () at ./readfuncs.funcs.c:3685
#20 0x559a23fb2ad1 in parseNodeString () at ./readfuncs.switch.c:541
#21 0x559a23faa421 in nodeRead (...) at read.c:338
#22 0x559a23fa99d8 in stringToNodeInternal (...) at read.c:92
#24 0x559a23d66609 in ExecParallelGetQueryDesc (...) at execParallel.c:1250
#25 ParallelQueryMain (...) at execParallel.c:1424
#26 0x559a238cfe13 in ParallelWorkerMain (...) at parallel.c:1516
#27 0x559a241e5b6a in BackgroundWorkerMain (...) at bgworker.c:848
#28 0x559a241ec254 in postmaster_child_launch (...) at launch_backend.c:265
#29 0x559a241f1c15 in do_start_bgworker (...) at postmaster.c:4270
#30 maybe_start_bgworkers () at postmaster.c:4501
#31 0x559a241f486e in process_pm_pmsignal () at postmaster.c:3774
#32 ServerLoop () at postmaster.c:1667
#33 0x559a241f0ed6 in PostmasterMain (...) at postmaster.c:1372
#34 0x559a23ebe16d in main (...) at main.c:197

The flamegraph (which shows that readDatum() -> __interceptor_strtol() ->
StrtolFixAndCheck() -> __sanitizer::internal_strlen () takes >99% of time)
is attached.
(I could not reproduce this behaviour with the gcc's sanitizer.)

Moreover, this query cannot be canceled, you can only kill -SIGKILL
parallel workers to interrupt it.

Best regards,
Alexander

Re: pg17 issues with not-null contraints

2024-04-18 Thread Justin Pryzby
On Tue, Apr 16, 2024 at 08:11:49PM +0200, Alvaro Herrera wrote:
> This is still missing some cleanup and additional tests, of course.
> Speaking of which, I wonder if I should modify pg16's tests so that they
> leave behind tables set up in this way, to immortalize pg_upgrade
> testing.

That seems like it could be important.  I considered but never actually
test your patch by pg_upgrading across major versions.

BTW, this works up to v16 (although maybe it should not):

| CREATE TABLE ip(id int PRIMARY KEY); CREATE TABLE ic(id int) INHERITS (ip); 
ALTER TABLE ic ALTER id DROP NOT NULL;

Under v17, this fails. Maybe that's okay, but it should probably be
called out in the release notes.
| ERROR:  cannot drop inherited constraint "ic_id_not_null" of relation "ic"

That's the issue that I mentioned in the 6 year old thread.  In the
future (upgrading *from* v17) it won't be possible anymore, right?  It'd
still be nice to detect the issue in advance rather than failing halfway
through the upgrade.  I have a rebased patch while I'll send on that
thread.  I guess it's mostly unrelated to your patch but it'd be nice if
you could take a look.

-- 
Justin




Re: pg_combinebackup fails on file named INCREMENTAL.*

2024-04-18 Thread Robert Haas
On Wed, Apr 17, 2024 at 7:56 PM David Steele  wrote:
> Thanks! I've tested this and it works as advertised.
>
> Ideally I'd want an error on backup if there is a similar file in any
> data directories that would cause an error on combine, but I admit that
> it is vanishingly rare for users to put files anywhere but the root of
> PGDATA, so this approach seems OK to me.

OK, committed. Thanks for the review.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: Transparent column encryption

2024-04-18 Thread Jelte Fennema-Nio
On Thu, 18 Apr 2024 at 13:25, Peter Eisentraut  wrote:
> Hopefully, the reason for key rotation is mainly that policies require
> key rotation, not that keys get compromised all the time.

These key rotation policies are generally in place to reduce the
impact of a key compromise by limiting the time a compromised key is
valid.

> This
> seems pretty standard to me.  For example, I can change the password on
> my laptop's file system encryption, which somehow wraps a lower-level
> key, but I can't reencrypt the actual file system in place.

I think the threat model for this proposal and a laptop's file system
encryption are different enough that the same choices/tradeoffs don't
automatically translate. Specifically in this proposal the unencrypted
CEK is present on all servers that need to read/write those encrypted
values. And a successful attacker would then be able to read the
encrypted values forever with this key, because it effectively cannot
be rotated. That is a much bigger attack surface and risk than a
laptop's disk encryption. So, I feel quite strongly that shipping the
proposed feature without being able to re-encrypt columns in an online
fashion would be a mistake.

> That's the
> reason for having this two-tier key system in the first place.

If we allow for online-rotation of the actual encryption key, then
maybe we don't even need this two-tier system ;)

Not having this two tier system would have a few benefits in my opinion:
1. We wouldn't need to be sending encrypted key material from the
server to every client. Which seems nice from a security, bandwidth
and client implementation perspective.
2. Asymmetric encryption of columns is suddenly an option. Allowing
certain clients to enter encrypted data into the database but not read
it.


> Two problems here: One, for deterministic encryption, everyone needs to
> agree on the representation, otherwise equality comparisons won't work.
>   Two, if you give clients the option of storing text or binary, then
> clients also get back a mix of text or binary, and it will be a mess.
> Just giving the option of storing the payload in binary wouldn't be that
> hard, but it's not clear what you can sensibly do with that in the end.

How about defining at column creation time if the underlying value
should be binary or not? Something like:

CREATE TABLE t(
mytime timestamp ENCRYPTED WITH (column_encryption_key = cek1, binary=true)
);

> Even if the identifiers
> somehow were global (but OIDs can also change and are not guaranteed
> unique forever),

OIDs of existing rows can't just change while a connection is active,
right? (all I know is upgrades can change them but that seems fine)
Also they are unique within a catalog table, right?

> the state of which keys have already been sent is
> session state.

I agree that this is the case. But it's state that can be tracked
fairly easily by a transaction pooler. Similar to how prepared
statements can be tracked. And this is easier to do when at the IDs of
the same keys are the same across each session to the server, because
if they differ then you need to do re-mapping of IDs.

> This is kind of like SASL or TLS can add new methods dynamically without
> requiring a new version.  I mean, as we are learning, making new
> protocol versions is kind of hard, so the point was to avoid it.

Fair enough

> I guess you could do that, but wouldn't that making the decoding of
> these messages much more complicated?  You would first have to read the
> "short" variant, decode the format, and then decide to read the rest.
> Seems weird.

I see your point. But with the current approach even for queries that
don't return any encrypted columns, these useless fields would be part
of the RowDescryption. It seems quite annoying to add extra network
and parsing overhead all of your queries even if only a small
percentage use the encryption feature. Maybe we should add a new
message type instead like EncryptedRowDescription, or add some flag
field at the start of RowDescription that can be used to indicate that
there is encryption info for some of the columns.

> Yes, that's what would happen, and that's the intention, so that for
> example you can use pg_dump to back up encrypted columns without having
> to decrypt them.

Okay, makes sense. But I think it would be good to document that.

> > A related question to this is that currently libpq throws an error if
> > e.g. a master key realm is not defined but another one is. Is that
> > really what we want? Is not having one of the realms really that
> > different from not providing any realms at all?
>
> Can you provide a more concrete example of what scenario you have a
> concern about?

A server has table A and B. A is encrypted with a master key realm X
and B is encrypted with master key realm Y. If libpq is only given a
key for realm X, and it then tries to read table B, an error is
thrown. While if you don't provide any realm at all, you can read from
table B just fine,

Re: improve performance of pg_dump --binary-upgrade

2024-04-18 Thread Nathan Bossart
On Thu, Apr 18, 2024 at 09:24:53AM +0200, Daniel Gustafsson wrote:
>> On 18 Apr 2024, at 06:17, Nathan Bossart  wrote:
> 
>> The attached work-in-progress patch speeds up 'pg_dump --binary-upgrade'
>> for this case.  Instead of executing the query in every call to the
>> function, we can execute it once during the first call and store all the
>> required information in a sorted array that we can bsearch() in future
>> calls.
> 
> That does indeed seem like a saner approach.  Since we look up the relkind we
> can also remove the is_index parameter to binary_upgrade_set_pg_class_oids
> since we already know that without the caller telling us?

Yeah.  It looks like that's been possible since commit 9a974cb, so I can
write a prerequisite patch for this.

>> One downside of this approach is the memory usage.
> 
> I'm not too worried about the worst-case performance of this.

Cool.  That seems to be the general sentiment.

>> This was more-or-less
>> the first approach that crossed my mind, so I wouldn't be surprised if
>> there's a better way.  I tried to keep the pg_dump output the same, but if
>> that isn't important, maybe we could dump all the pg_class OIDs at once
>> instead of calling binary_upgrade_set_pg_class_oids() for each one.
> 
> Without changing the backend handling of the Oid's we can't really do that
> AFAICT, the backend stores the Oid for the next call so it needs to be per
> relation like now?

Right, this would require additional changes.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Trigger violates foreign key constraint

2024-04-18 Thread Tom Lane
Aleksander Alekseev  writes:
>> I agree with documenting this hazard, but I think it'd be better
>> to do so in the "Triggers" chapter.  There is no hazard unless
>> you are writing user-defined triggers, which is surely far fewer
>> people than use foreign keys.  So I suggest something like the
>> attached.

> I don't mind changing the chapter, but I prefer the wording chosen in
> v3. The explanation in v4 is somewhat hard to follow IMO.

Well, I didn't like v3 because I thought it was wrong, or at least
fairly misleading.  The fact that we use triggers rather than some
other mechanism to invoke referential updates isn't really relevant.
The issue is that the update actions fire user-defined triggers,
and it's those that are the (potential) problem.

Perhaps we should leave the system triggers out of the discussion
entirely?  More or less like:

If a foreign key constraint specifies referential actions (that
is, cascading updates or deletes), those actions are performed via
ordinary SQL update or delete commands on the referencing table.
In particular, any triggers that exist on the referencing table
will be fired for those changes.  If such a trigger modifies or
blocks the effect of one of these commands, the end result could
be to break referential integrity.  It is the trigger programmer's
responsibility to avoid that.

regards, tom lane




Re: pgsql: Fix restore of not-null constraints with inheritance

2024-04-18 Thread Alvaro Herrera
On 2024-Apr-18, Alvaro Herrera wrote:

> On 2024-Apr-18, Alvaro Herrera wrote:
> 
> > Lastly, make two changes to pg_dump: 1) do not try to drop a not-null
> > constraint that's marked as inherited; this allows a dump to restore
> > with no errors if a table with a PK inherits from another which also has
> > a PK; 2) avoid giving inherited constraints throwaway names, for the
> > rare cases where such a constraint survives after the restore.
> 
> Hmm, this last bit broke pg_upgrade on crake.  I'll revert this part,
> meanwhile I'll be installing 9.2 to see if it can be fixed in a better way.

Eh, so:

1. running "make check" on pg_upgrade using an oldinstall pointing to
9.2 fails, because PostgreSQL::Test::Cluster doesn't support that
version -- it only goes back to 9.2.  How difficult was it to port it
back to all these old versions?

2. running "make check" with an oldinstall pointing to 10 fails, because
the "invalid database" checks fail:

not ok 7 - invalid database causes failure status (got 0 vs expected 1)

#   Failed test 'invalid database causes failure status (got 0 vs expected 1)'
#   at t/002_pg_upgrade.pl line 407.
not ok 8 - invalid database causes failure stdout /(?^:invalid)/


3. Lastly, even if I put back the code that causes the failures on crake
and restore from 10 (and ignore those two problems), I cannot reproduce
the issues it reported.  Is crake running some funky code that's not
what "make check" on pg_upgrade does, perchance?


I think we should SKIP the tests with invalid databases when running
with an oldinstall 10 and older, because that commit only patches back
to 11:

Author: Andres Freund 
Branch: master [c66a7d75e] 2023-07-13 13:03:28 -0700
Branch: REL_16_STABLE Release: REL_16_0 [a4b4cc1d6] 2023-07-13 13:03:30 -0700
Branch: REL_15_STABLE Release: REL_15_4 [f66403749] 2023-07-13 13:04:45 -0700
Branch: REL_14_STABLE Release: REL_14_9 [d11efe830] 2023-07-13 13:03:33 -0700
Branch: REL_13_STABLE Release: REL_13_12 [81ce6] 2023-07-13 13:03:34 -0700
Branch: REL_12_STABLE Release: REL_12_16 [034a9fcd2] 2023-07-13 13:03:36 -0700
Branch: REL_11_STABLE Release: REL_11_21 [1c38e7ae1] 2023-07-13 13:03:37 -0700

Handle DROP DATABASE getting interrupted

-- 
Álvaro HerreraBreisgau, Deutschland  —  https://www.EnterpriseDB.com/
"Cada quien es cada cual y baja las escaleras como quiere" (JMSerrat)




Re: Optimizing nbtree ScalarArrayOp execution, allowing multi-column ordered scans, skip scan

2024-04-18 Thread Peter Geoghegan
On Thu, Apr 18, 2024 at 2:13 AM Donghang Lin  wrote:
> It's triggered when a scankey's strategy is set to invalid. While for a 
> descending ordered column,
> the strategy needs to get fixed to its commute strategy. That doesn't work if 
> the strategy is invalid.

The problem is that _bt_fix_scankey_strategy shouldn't have been doing
anything with already-eliminated array scan keys in the first place
(whether or not they're on a DESC index column). I just pushed a fix
along those lines.

Thanks for the report!

-- 
Peter Geoghegan




PostgreSQL 17 Beta 1 release date

2024-04-18 Thread Jonathan S. Katz

Hi,

PostgreSQL 17 Beta 1 is planned to be release on May 23, 2024. Please 
continue your hard work on closing out open items[1] ahead of the 
release and have the fixes targeted for the release committed by May 18, 
2024.


Thanks - it's very exciting that we're at this point in the release cycle!

Jonathan

[1] https://wiki.postgresql.org/wiki/PostgreSQL_17_Open_Items


OpenPGP_signature.asc
Description: OpenPGP digital signature


Re: Add SPLIT PARTITION/MERGE PARTITIONS commands

2024-04-18 Thread Alexander Lakhin

Hi Alexander,

18.04.2024 13:35, Alexander Korotkov wrote:


The revised patchset is attached.
1) I've split the fix for the CommandCounterIncrement() issue and the
fix for relation persistence issue into a separate patch.
2) I've validated that the lock on the new partition is held in
createPartitionTable() after ProcessUtility() as pointed out by
Robert.  So, no need to place the lock again.
3) Added fix for problematic error message as a separate patch [1].
4) Added rename "salemans" => "salesmen" for tests as a separate patch.

I think these fixes are reaching committable shape, but I'd like
someone to check it before I push.


I think the feature implementation should also provide tab completion for
SPLIT/MERGE.
(ALTER TABLE t S
fills in only SET now.)

Also, the following MERGE operation:
CREATE TABLE t (i int, PRIMARY KEY(i)) PARTITION BY RANGE (i);
CREATE TABLE tp_0 PARTITION OF t FOR VALUES FROM (0) TO (1);
CREATE TABLE tp_1 PARTITION OF t FOR VALUES FROM (1) TO (2);
ALTER TABLE t MERGE PARTITIONS (tp_0, tp_1) INTO tp_0;

leaves a strange constraint:
\d+ t*
  Table "public.tp_0"
...
Not-null constraints:
    "merge-16385-26BCB0-tmp_i_not_null" NOT NULL "i"

Best regards,
Alexander




Re: ALTER TABLE SET ACCESS METHOD on partitioned tables

2024-04-18 Thread Alvaro Herrera
On 2024-Apr-18, Michael Paquier wrote:

> On Wed, Apr 17, 2024 at 10:31:52AM +0200, Alvaro Herrera wrote:

> > Hmm, maybe we should do a RESET of default_table_access_method before
> > printing the CREATE TABLE to avoid the confusion.
> 
> A hard reset would make the business around currTableAM that decides
> when to generate the SET default_table_access_method queries slightly
> more complicated, while increasing the number of queries run on the
> server.

Hmm, okay.  (I don't think we really care too much about the number of
queries, do we?)

> Fine by me to use two routines to generate the two different commands.
> I am finishing with the attached for now, making dumps, restores and
> upgrades work happily as far as I've tested.

Great.

> I was also worrying about a need to dump the protocol version to be
> able to track the relkind in the toc entries, but a45c78e3284b has
> already done one.  The difference in AM handling between relations
> without storage and relations with storage pushes the relkind logic
> more within the internals of pg_backup_archiver.c.

Hmm, does this mean that every dump taking since a45c78e3284b (April
1st) and before this commit will be unrestorable?  This doesn't worry me
too much, because we aren't even in beta yet ... and I think we don't
have a strict policy about it.

> --- a/src/bin/pg_dump/t/002_pg_dump.pl
> +++ b/src/bin/pg_dump/t/002_pg_dump.pl
> @@ -4591,11 +4591,9 @@ my %tests = (
>   CREATE TABLE dump_test.regress_pg_dump_table_am_child_2
> PARTITION OF 
> dump_test.regress_pg_dump_table_am_parent FOR VALUES IN (2);',
>   regexp => qr/^
> - \QSET default_table_access_method = regress_table_am;\E
> - (\n(?!SET[^;]+;)[^\n]*)*
> - \n\QCREATE TABLE 
> dump_test.regress_pg_dump_table_am_parent (\E
> - (.*\n)*
>   \QSET default_table_access_method = heap;\E
> + (.*\n)*
> + \QALTER TABLE dump_test.regress_pg_dump_table_am_parent 
> SET ACCESS METHOD regress_table_am;\E
>   (\n(?!SET[^;]+;)[^\n]*)*
>   \n\QCREATE TABLE 
> dump_test.regress_pg_dump_table_am_child_1 (\E
>   (.*\n)*

This looks strange -- why did you remove matching for the CREATE TABLE
of the parent table?  That line should appear shortly before the ALTER
TABLE SET ACCESS METHOD for the same table, shouldn't it?  Maybe your
intention was to remove only the SET default_table_access_method
= regress_table_am line ... but it's not clear to me why we have the
"SET default_table_access_method = heap" line before the ALTER TABLE SET
ACCESS METHOD.

-- 
Álvaro HerreraBreisgau, Deutschland  —  https://www.EnterpriseDB.com/
"If it is not right, do not do it.
If it is not true, do not say it." (Marcus Aurelius, Meditations)




Re: pg17 issues with not-null contraints

2024-04-18 Thread Alvaro Herrera
On 2024-Apr-18, Justin Pryzby wrote:

> That seems like it could be important.  I considered but never actually
> test your patch by pg_upgrading across major versions.

It would be a welcome contribution for sure.  I've been doing it rather
haphazardly, which is not great.

> BTW, this works up to v16 (although maybe it should not):
> 
> | CREATE TABLE ip(id int PRIMARY KEY); CREATE TABLE ic(id int) INHERITS (ip); 
> ALTER TABLE ic ALTER id DROP NOT NULL;
> 
> Under v17, this fails. Maybe that's okay, but it should probably be
> called out in the release notes.

Sure, we should mention that.

> | ERROR:  cannot drop inherited constraint "ic_id_not_null" of relation "ic"
> 
> That's the issue that I mentioned in the 6 year old thread.  In the
> future (upgrading *from* v17) it won't be possible anymore, right?

Yeah, trying to drop the constraint in 17 fails as it should; it was one
of the goals of this whole thing in fact.

> It'd still be nice to detect the issue in advance rather than failing
> halfway through the upgrade.

Maybe we can have pg_upgrade --check look for cases we might have
trouble upgrading.  (I mean: such cases would fail if you have rows with
nulls in the affected columns, but the schema upgrade should be
successful.  Is that what you have in mind?)

> I have a rebased patch while I'll send on that thread.  I guess it's
> mostly unrelated to your patch but it'd be nice if you could take a
> look.

Okay.

-- 
Álvaro Herrera PostgreSQL Developer  —  https://www.EnterpriseDB.com/
"Industry suffers from the managerial dogma that for the sake of stability
and continuity, the company should be independent of the competence of
individual employees."  (E. Dijkstra)




Re: Speed up clean meson builds by ~25%

2024-04-18 Thread Andres Freund
On 2024-04-17 23:10:53 -0400, Tom Lane wrote:
> Jelte Fennema-Nio  writes:
> > As I expected this problem was indeed fairly easy to address by still
> > building "backend/parser" before "interfaces". See attached patch.
> 
> I think we should hold off on this.  I found a simpler way to address
> ecpg's problem than what I sketched upthread.  I have a not-ready-to-
> show-yet patch that allows the vast majority of ecpg's grammar
> productions to use the default semantic action.  Testing on my M1
> Macbook with clang 16.0.6 from MacPorts, I see the compile time for
> preproc.o in HEAD as about 1m44 sec; but with this patch, 0.6 sec.

That's pretty amazing.




Re: Transparent column encryption

2024-04-18 Thread Robert Haas
On Wed, Apr 10, 2024 at 6:13 AM Peter Eisentraut  wrote:
> Obviously, it's early days, so there will be plenty of time to have
> discussions on various other aspects of this patch.  I'm keeping a keen
> eye on the discussion of protocol extensions, for example.

I think the way that you handled that is clever, and along the lines
of what I had in mind when I invented the _pq_ stuff.

More specifically, the way that the ColumnEncryptionKey and
ColumnMasterKey messages are handled is exactly the way that I was
imagining things would work. The client uses _pq_.column_encryption to
signal that it can understand those messages, and the server responds
by including them. I assume that if the client doesn't signal
understanding, then the server simply omits sending those messages. (I
have not checked the code.)

I'm less certain about the changes to the ParameterDescription and
RowDescription messages. I see a couple of potential problems. One is
that, if you say you can understand column encryption messages, the
extra fields are included even for unencrypted columns. The client
must choose at connection startup whether it ever wishes to read any
encrypted data; if so, it pays a portion of that overhead all the
time. Another potential problem is with the scalability of this
design. Suppose that we could not only encrypt columns, but also
compress, fold, mutilate, and spindle them. Then there might end up
being a dizzying array of variation in the format of what is supposed
to be the same message. Perhaps it's not so bad: as long as the
documentation is clear about in which order the additional fields will
appear in the relevant messages when more than one relevant feature is
used, it's probably not too difficult for clients to cope. And it is
probably also true that the precise size of, say, a RowDescription
message will rarely be performance-critical. But another thought is
that we might try to redesign this so that we simply add more message
types rather than mutating message types i.e. after sending the
RowDescription message, if any columns are encrypted, we additionally
send a RowEncryptionDescription message. Then this treatment becomes
symmetric with the handling of ColumnEncryptionKey and ColumnMasterKey
messages, and there's no overhead when the feature is unused.

With regard to the Bind message, I suggest that we regard the protocol
change as reserving a currently-unused bit in the message to indicate
whether the value is pre-encrypted, without reference to the protocol
extension. It could be legal for a client that can't understand
encryption message from the server to supply an encrypted value to be
inserted into a column. And I don't think we would ever want the bit
that's being reserved here to be used by some other extension for some
other purpose, even when this extension isn't used. So I don't see a
need for this to be tied into the protocol extension.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: pg17 issues with not-null contraints

2024-04-18 Thread Justin Pryzby
On Thu, Apr 18, 2024 at 06:23:30PM +0200, Alvaro Herrera wrote:
> On 2024-Apr-18, Justin Pryzby wrote:
> 
> > BTW, this works up to v16 (although maybe it should not):
> > 
> > | CREATE TABLE ip(id int PRIMARY KEY); CREATE TABLE ic(id int) INHERITS 
> > (ip); ALTER TABLE ic ALTER id DROP NOT NULL;
> 
> > It'd still be nice to detect the issue in advance rather than failing
> > halfway through the upgrade.
> 
> Maybe we can have pg_upgrade --check look for cases we might have
> trouble upgrading.  (I mean: such cases would fail if you have rows with
> nulls in the affected columns, but the schema upgrade should be
> successful.  Is that what you have in mind?)

Before v16, pg_upgrade failed in the middle of restoring the schema,
without being caught during --check.  The patch to implement that was
forgotten and never progressed.

I'm not totally clear on what's intended in v17 - maybe it'd be dead
code, and maybe it shouldn't even be applied to master branch.  But I do
think it's worth patching earlier versions (even though it'll be less
useful than having done so 5 years ago).

-- 
Justin




Re: Add SPLIT PARTITION/MERGE PARTITIONS commands

2024-04-18 Thread Dagfinn Ilmari Mannsåker
Alexander Lakhin  writes:

> Hi Alexander,
>
> 18.04.2024 13:35, Alexander Korotkov wrote:
>>
>> The revised patchset is attached.
>> 1) I've split the fix for the CommandCounterIncrement() issue and the
>> fix for relation persistence issue into a separate patch.
>> 2) I've validated that the lock on the new partition is held in
>> createPartitionTable() after ProcessUtility() as pointed out by
>> Robert.  So, no need to place the lock again.
>> 3) Added fix for problematic error message as a separate patch [1].
>> 4) Added rename "salemans" => "salesmen" for tests as a separate patch.
>>
>> I think these fixes are reaching committable shape, but I'd like
>> someone to check it before I push.
>
> I think the feature implementation should also provide tab completion for
> SPLIT/MERGE.
> (ALTER TABLE t S
> fills in only SET now.)

Here's a patch for that.  One thing I noticed while testing it was that
the tab completeion for partitions (Query_for_partition_of_table) shows
all the schemas in the DB, even ones that don't contain any partitions
of the table being altered.

- ilmari

>From 26db03b7a7675aa7dbff1f18ee084296caa1e181 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= 
Date: Thu, 18 Apr 2024 17:47:22 +0100
Subject: [PATCH] =?UTF-8?q?Add=20tab=20completion=20for=20ALTER=20TABLE=20?=
 =?UTF-8?q?=E2=80=A6=20SPLIT|MERGE=20PARTITION(S)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/bin/psql/tab-complete.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 6fee3160f0..97cd5d9f62 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -2353,6 +2353,7 @@ psql_completion(const char *text, int start, int end)
 	  "OWNER TO", "SET", "VALIDATE CONSTRAINT",
 	  "REPLICA IDENTITY", "ATTACH PARTITION",
 	  "DETACH PARTITION", "FORCE ROW LEVEL SECURITY",
+	  "SPLIT PARTITION", "MERGE PARTITIONS (",
 	  "OF", "NOT OF");
 	/* ALTER TABLE xxx ADD */
 	else if (Matches("ALTER", "TABLE", MatchAny, "ADD"))
@@ -2609,10 +2610,10 @@ psql_completion(const char *text, int start, int end)
 		COMPLETE_WITH("FROM (", "IN (", "WITH (");
 
 	/*
-	 * If we have ALTER TABLE  DETACH PARTITION, provide a list of
+	 * If we have ALTER TABLE  DETACH|SPLIT PARTITION, provide a list of
 	 * partitions of .
 	 */
-	else if (Matches("ALTER", "TABLE", MatchAny, "DETACH", "PARTITION"))
+	else if (Matches("ALTER", "TABLE", MatchAny, "DETACH|SPLIT", "PARTITION"))
 	{
 		set_completion_reference(prev3_wd);
 		COMPLETE_WITH_SCHEMA_QUERY(Query_for_partition_of_table);
@@ -2620,6 +2621,19 @@ psql_completion(const char *text, int start, int end)
 	else if (Matches("ALTER", "TABLE", MatchAny, "DETACH", "PARTITION", MatchAny))
 		COMPLETE_WITH("CONCURRENTLY", "FINALIZE");
 
+	/* ALTER TABLE  SPLIT PARTITION  */
+	else if (Matches("ALTER", "TABLE", MatchAny, "SPLIT", "PARTITION", MatchAny))
+		COMPLETE_WITH("INTO ( PARTITION");
+
+	/* ALTER TABLE  MERGE PARTITIONS ( */
+	else if (Matches("ALTER", "TABLE", MatchAny, "MERGE", "PARTITIONS", "("))
+	{
+		set_completion_reference(prev4_wd);
+		COMPLETE_WITH_SCHEMA_QUERY(Query_for_partition_of_table);
+	}
+	else if (Matches("ALTER", "TABLE", MatchAny, "MERGE", "PARTITIONS", "(*)"))
+		COMPLETE_WITH("INTO");
+
 	/* ALTER TABLE  OF */
 	else if (Matches("ALTER", "TABLE", MatchAny, "OF"))
 		COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_composite_datatypes);
-- 
2.39.2



Re: Add notes to pg_combinebackup docs

2024-04-18 Thread Robert Haas
On Tue, Apr 9, 2024 at 4:00 AM Martín Marqués  wrote:
> I've attached two patches, the first one is just neat-picking things I
> found when I first read the docs.

I pushed a commit to remove the spurious "the" that you found, but I
don't really agree with the other changes. They all seem grammatically
correct, but I think they're grammatically correct now, too, and I
find it more readable the way it is.

I'll write a separate email about the checksum issue.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: plenty code is confused about function level static

2024-04-18 Thread Andres Freund
Hi,

On 2024-04-18 10:33:30 +0200, Peter Eisentraut wrote:
> > Attached are fixes for struct option and a few more occurrences I've found
> > with a bit of grepping.
>
> These look good to me.

Thoughts about when to apply these? Arguably they're fixing mildly broken
code, making it appropriate to fix in 17, but it's also something that we
could end up fixing for a while...


There are some variations of this that are a bit harder to fix, btw. We have

objdump -j .data -t src/backend/postgres|sort -k5
...
01474d00 g O .data  15f0  ConfigureNamesReal
01479a80 g O .data  1fb0  ConfigureNamesEnum
01476300 g O .data  3778  
ConfigureNamesString
...
014682e0 g O .data  5848  ConfigureNamesBool
0146db40 g O .data  71c0  ConfigureNamesInt

Not that thta's all *that* much these days, but it's still pretty silly to use
~80kB of memory in every postgres instance just because we didn't set
  conf->gen.vartype = PGC_BOOL;
etc at compile time.

Large modifiable arrays with callbacks are also quite useful for exploitation,
as one doesn't need to figure out precise addresses.

Greetings,

Andres Freund




Re: plenty code is confused about function level static

2024-04-18 Thread Andres Freund
Hi,

On 2024-04-18 09:07:43 -0300, Ranier Vilela wrote:
>  On 18/04/2024 00:39, Andres Freund wrote:
> >There are lots of places that could benefit from adding 'static
> >const'.
> 
> I found a few more places.

Good catches.


> Patch 004
> 
> The opposite would also help, adding static.
> In these places, I believe it is safe to add static,
> allowing the compiler to transform into read-only, definitively.

I don't think this would even compile? E.g. LockTagTypeNames, pg_wchar_table
are declared in a header and used across translation units.

Greetings,

Andres Freund




Re: plenty code is confused about function level static

2024-04-18 Thread Ranier Vilela
Em qui., 18 de abr. de 2024 às 14:16, Andres Freund 
escreveu:

> Hi,
>
> On 2024-04-18 09:07:43 -0300, Ranier Vilela wrote:
> >  On 18/04/2024 00:39, Andres Freund wrote:
> > >There are lots of places that could benefit from adding 'static
> > >const'.
> >
> > I found a few more places.
>
> Good catches.
>
>
> > Patch 004
> >
> > The opposite would also help, adding static.
> > In these places, I believe it is safe to add static,
> > allowing the compiler to transform into read-only, definitively.
>
> I don't think this would even compile?

Compile, at least with msvc 2022.
Pass ninja test.


E.g. LockTagTypeNames, pg_wchar_table
> are declared in a header and used across translation units.
>
Sad.
There should be a way to export a read-only (static const) variable.
Better remove these.

v1-0005 attached.

best regards,
Ranier Vilela


v1-0005-const-convert-static-const.patch
Description: Binary data


Re: fix tablespace handling in pg_combinebackup

2024-04-18 Thread Andres Freund
Hi,

On 2024-04-18 09:03:21 -0400, Robert Haas wrote:
> On Wed, Apr 17, 2024 at 5:50 PM Andres Freund  wrote:
> > > +If there are tablespace present in the backup, include tablespace_map as
> > > +a keyword parameter whose values is a hash. When tar_program is used, the
> > > +hash keys are tablespace OIDs; otherwise, they are the tablespace 
> > > pathnames
> > > +used in the backup. In either case, the values are the tablespace 
> > > pathnames
> > > +that should be used for the target cluster.
> >
> > Where would one get these oids?
> 
> You pretty much have to pick them out of the tar file names. It sucks,
> but it's not this patch's fault.

I was really just remarking on this from the angle of a test writer. I know
that our interfaces around this suck...

For tests, do we really need to set anything on a per-tablespace basis? Can't
we instead just reparent all of them to a new directory?


> I wonder if we (as a project) would consider a patch that redesigned
> this whole mechanism. Produce ${TABLESPACE_NAME}.tar in tar-format,
> instead of ${OID}.tar. In directory-format, relocate via
> -T${TABLESPACE_NAME}=${DIR} instead of -T${SERVERDIR}=${DIR}. That
> would be a significant compatibility break, and you'd somehow need to
> solve the problem of what to put in the tablespace_map file, which
> requires OIDs. But it seems like if you could finesse that issue in
> some elegant way, the result would just be a heck of a lot more usable
> than what we have today.

For some things that'd definitely be nicer - but not sure it work well for
everything. Consider cases where you actually have external directories on
different disks, and you want to restore a backup after some data loss. Now
you need to list all the tablespaces separately, to put them back into their
own location.

One thing I've been wondering about is an option to put the "new" tablespaces
into a location relative to each of the old ones.
  --tablespace-relative-location=../restore-2024-04-18
which would rewrite all the old tablespaces to that new location.


> > Could some of this be simplified by using allow_in_place_tablespaces 
> > instead?
> > Looks like it'd simplify at least the extended test somewhat?
> 
> I don't think we can afford to assume that allow_in_place_tablespaces
> doesn't change the behavior.

I think we can't assume that absolutely everywhere, but we don't need to test
it in a lot of places.


> I said (at least off-list) when that feature was introduced that there was
> no way it was going to remain an isolated development hack, and I think
> that's proved to be 100% correct.

Hm, I guess I kinda agree. But not because I think it wasn't good for
development, but because it'd often be much saner to use relative tablespaces
than absolute ones even for prod.

My only point here was that the test would be simpler if you
a) didn't need to create a temp directory for the tablespace, both for
   primary and standby
b) didn't need to "gin up" a tablespace map, because all the paths are
   relative


Just to be clear: I don't want the above to block merging your test. If you
think you want to do it the way you did, please do.

Greetings,

Andres Freund




Re: Add SPLIT PARTITION/MERGE PARTITIONS commands

2024-04-18 Thread Alvaro Herrera
On 2024-Apr-18, Alexander Lakhin wrote:

> I think the feature implementation should also provide tab completion
> for SPLIT/MERGE.

I don't think that we should be imposing on feature authors or
committers the task of filling in tab-completion for whatever features
they contribute.  I mean, if they want to add that, cool; but if not,
somebody else can do that, too.  It's not a critical piece.

Now, if we're talking about whether a patch to add tab-completion to a
feature post feature-freeze is acceptable, I think it absolutely is
(even though you could claim that it's a new psql feature).  But for
sure we shouldn't mandate that a feature be reverted just because it
lacks tab-completion -- such lack is not an open-item against the
feature in that sense.

-- 
Álvaro HerreraBreisgau, Deutschland  —  https://www.EnterpriseDB.com/
"That sort of implies that there are Emacs keystrokes which aren't obscure.
I've been using it daily for 2 years now and have yet to discover any key
sequence which makes any sense."(Paul Thomas)




Re: Transparent column encryption

2024-04-18 Thread Jelte Fennema-Nio
On Thu, 18 Apr 2024 at 18:46, Robert Haas  wrote:
> With regard to the Bind message, I suggest that we regard the protocol
> change as reserving a currently-unused bit in the message to indicate
> whether the value is pre-encrypted, without reference to the protocol
> extension. It could be legal for a client that can't understand
> encryption message from the server to supply an encrypted value to be
> inserted into a column. And I don't think we would ever want the bit
> that's being reserved here to be used by some other extension for some
> other purpose, even when this extension isn't used. So I don't see a
> need for this to be tied into the protocol extension.

I think this is an interesting idea. I can indeed see use cases for
e.g. inserting a new row based on another row (where the secret is the
same).

IMHO that means that we should also bump the protocol version for this
change, because it's changing the wire protocol by adding a new
parameter format code. And it does so in a way that does not depend on
the new protocol extension.




Re: documentation structure

2024-04-18 Thread Corey Huinker
>
> I havent dealt with variadic yet, since the two styles are visually
> different, not just markup (... renders as [...]).
>
> The two styles for variadic are the what I call caller-style:
>
>concat ( val1 "any" [, val2 "any" [, ...] ] )
>format(formatstr text [, formatarg "any" [, ...] ])
>

While this style is obviously clumsier for us to compose, it does avoid
relying on the user understanding what the word variadic means. Searching
through online documentation of the python *args parameter, the word
variadic never comes up, the closest they get is "variable length
argument". I realize that python is not SQL, but I think it's a good point
of reference for what concepts the average reader is likely to know.

Looking at the patch, I think it is good, though I'd consider doing some
indentation for the nested s to allow the author to do more
visual tag-matching. The ']'s were sufficiently visually distinct that we
didn't really need or want nesting, but  is just another tag to
my eyes in a sea of tags.


Re: plenty code is confused about function level static

2024-04-18 Thread Peter Eisentraut

On 18.04.24 19:11, Andres Freund wrote:

Thoughts about when to apply these? Arguably they're fixing mildly broken
code, making it appropriate to fix in 17, but it's also something that we
could end up fixing for a while...


Yeah, let's keep these for later.  They are not regressions, and there 
is no end in sight yet.  I have some other related stuff queued up, so 
if we're going to start adjusting these kinds of things now, it would 
open a can of worms.





Re: AIX support

2024-04-18 Thread Andres Freund
Hi,

On 2024-04-18 11:15:43 +, Sriram RK wrote:
> We (IBM-AIX team) looked into this issue
>
> https://www.postgresql.org/message-id/20240225194322...@rfd.leadboat.com
>
> This is related to the compiler issue. The compilers xlc(13.1) and gcc(8.0)
> have issues. But we verified that this issue is resolved with the newer
> compiler versions openXL(xlc17.1) and gcc(12.0) onwards.

The reason we used these compilers was that those were the only ones we had
kinda somewhat reasonable access to, via the gcc projects'
"compile farm" https://portal.cfarm.net/
We have to rely on whatever the aix machines there provide. They're not
particularly plentiful resource-wise either.


This is generally one of the big issues with AIX support. There are other
niche-y OSs that don't have a lot of users, e.g. openbsd, but in contrast to
AIX I can just start an openbsd VM within a few minutes and iron out whatever
portability issue I'm dealing with.

Not being AIX customers we also can't raise bugs about compiler bugs, so we're
stuck doing bad workarounds.


> Also as part of the support, we will help in fixing all the issues related
> to AIX and continue to support AIX for Postgres. If we need another CI
> environment we can work to make one available. But for time being can we
> start reverting the patch that has removed AIX support.

The state when was removed was not in a state that I am OK with adding back.


> We want to make a note that postgres is used extensively in our IBM product
> and is being exploited by multiple customers.

To be blunt: Then it'd have been nice to see some investment in that before
now. Both on the code level and the infrastructure level (i.e. access to
machines etc).

Greetings,

Andres Freund




Re: Add notes to pg_combinebackup docs

2024-04-18 Thread Robert Haas
On Tue, Apr 9, 2024 at 5:46 AM Tomas Vondra
 wrote:
> What we could do is detect this in pg_combinebackup, and either just
> disable checksums with a warning and hint to maybe enable them again. Or
> maybe just print that the user needs to disable them.
>
> I was thinking maybe we could detect this while taking the backups, and
> force taking a full backup if checksums got enabled since the last
> backup. But we can't do that because we only have the manifest from the
> last backup, and the manifest does not include info about checksums.

So, as I see it, we have at least five options here:

1. Document that this doesn't work. By "this doesn't work" I think
what we mean is: (A) If any of the backups you'd need to combine were
taken with checksums off but the last one was taken with checksums on,
then you might get checksum failures on the cluster written by
pg_combinebackup. (B) Therefore, after enabling checksums, you should
take a new full backup and make future incrementals dependent
thereupon. (C) But if you don't, then you can (I presume) run recovery
with ignore_checksum_failure=true to bring the cluster to a consistent
state, stop the database, shut checksums off, optionally also turn
them on again, and restart the database. Then presumably everything
should be OK, except that you'll have wiped out any real checksum
failures that weren't artifacts of the reconstruction process along
with the fake ones.

2. As (1), but make check_control_files() emit a warning message when
the problem case is detected.

3. As (2), but also add a command-line option to pg_combinebackup to
flip the checksum flag to false in the control file. Then, if you have
the problem case, instead of following the procedure described above,
you can just use this option, and enable checksums afterward if you
want. It still has the same disadvantage as the procedure described
above: any "real" checksum failures will be suppressed, too. From a
design perspective, this feels like kind of an ugly wart to me: hey,
in this one scenario, you have to add --do-something-random or it
doesn't work! But I see it's got some votes already, so maybe it's the
right answer.

4. Add the checksum state to the backup manifest. Then, if someone
tries to take an incremental backup with checksums on and the
precursor backup had checksums off, we could fail. A strength of this
proposal is that it actually stops the problem from happening at
backup time, which in general is a whole lot nicer than not noticing a
problem until restore time. A possible weakness is that it stops you
from doing something that is ... actually sort of OK. I mean, in the
strict sense, the incremental backup isn't valid, because it's going
to cause checksum failures after reconstruction, but it's valid apart
from the checksums, and those are fixable. I wonder whether users who
encounter this error message will say "oh, I'm glad PostgreSQL
prevented me from doing that" or "oh, I'm annoyed that PostgreSQL
prevented me from doing that."

5. At reconstruction time, notice which backups have checksums
enabled. If the final backup in the chain has them enabled, then
whenever we take a block from an earlier backup with checksums
disabled, re-checksum the block. As opposed to any of the previous
options, this creates a fully correct result, so there's no real need
to document any restrictions on what you're allowed to do. We might
need to document the performance consequences, though: fast file copy
methods will have to be disabled, and we'll have to go block by block
while paying the cost of checksum calculation for each one. You might
be sad to find out that your reconstruction is a lot slower than you
were expecting.

While I'm probably willing to implement any of these, I have some
reservations about attempting (4) or especially (5) after feature
freeze. I think there's a pretty decent chance that those fixes will
turn out to have issues of their own which we'll then need to fix in
turn. We could perhaps consider doing (2) for now and (5) for a future
release, or something like that.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




RE: Popcount optimization using AVX512

2024-04-18 Thread Shankaran, Akash
> It was brought to my attention [0] that we probably should be checking for 
> the OSXSAVE bit instead of the XSAVE bit when determining whether there's 
> support for the XGETBV instruction.  IIUC that should indicate that both the 
> OS and the processor have XGETBV support (not just the processor).
> I've attached a one-line patch to fix this.

> [0] https://github.com/pgvector/pgvector/pull/519#issuecomment-2062804463

Good find. I confirmed after speaking with an intel expert, and from the intel 
AVX-512 manual [0] section 14.3, which recommends to check bit27. From the 
manual:

"Prior to using Intel AVX, the application must identify that the operating 
system supports the XGETBV instruction,
the YMM register state, in addition to processor's support for YMM state 
management using XSAVE/XRSTOR and
AVX instructions. The following simplified sequence accomplishes both and is 
strongly recommended.
1) Detect CPUID.1:ECX.OSXSAVE[bit 27] = 1 (XGETBV enabled for application use1).
2) Issue XGETBV and verify that XCR0[2:1] = '11b' (XMM state and YMM state are 
enabled by OS).
3) detect CPUID.1:ECX.AVX[bit 28] = 1 (AVX instructions supported).
(Step 3 can be done in any order relative to 1 and 2.)"

It also seems that step 1 and step 2 need to be done prior to the CPUID OSXSAVE 
check in the popcount code.

[0]: https://cdrdv2.intel.com/v1/dl/getContent/671200

- Akash Shankaran





Re: Oom on temp (un-analyzed table caused by JIT) V16.1 [ NOT Fixed ]

2024-04-18 Thread Kirk Wolak
On Thu, Feb 22, 2024 at 4:49 PM Michael Banck  wrote:

> Hi,
>
> On Wed, Jan 24, 2024 at 02:50:52PM -0500, Kirk Wolak wrote:
> > On Mon, Jan 22, 2024 at 1:30 AM Kirk Wolak  wrote:
> > > On Fri, Jan 19, 2024 at 7:03 PM Daniel Gustafsson 
> wrote:
> > >> > On 19 Jan 2024, at 23:09, Kirk Wolak  wrote:
> > > Thank you, that made it possible to build and run...
> > > UNFORTUNATELY this has a CLEAR memory leak (visible in htop)
> > > I am watching it already consuming 6% of my system memory.
> ...
> I had a look at this (and blogged about it here[1]) and was also
> wondering what was going on with 17devel and the recent back-branch
> releases, cause I could also reproduce those continuing memory leaks.
>
> Adding some debug logging when llvm_inline_reset_caches() is called
> solves the mystery: as you are calling a function, the fix that is in
> 17devel and the back-branch releases is not applicable and only after
> the function returns llvm_inline_reset_caches() is being called (as
> llvm_jit_context_in_use_count is greater than zero, presumably, so it
> never reaches the call-site of llvm_inline_reset_caches()).
>
> If you instead run your SQL in a DO-loop (as in the blog post) and not
> as a PL/PgSQL function, you should see that it no longer leaks. This
> might be obvious to some (and Andres mentioned it in
>
> https://www.postgresql.org/message-id/20210421002056.gjd6rpe6toumiqd6%40alap3.anarazel.de
> )
> but it took me a while to figure out/find.
>
> Thanks for confirming.  Inside a routine is required.  But disabling JIT
was good enough for us.

Curious if there was another way to end up calling the cleanup?  But it had
that "feel" that the context of the cleanup was
being lost between iterations of the loop?


Re: Idea Feedback: psql \h misses -> Offers Links?

2024-04-18 Thread Peter Eisentraut

On 17.04.24 19:47, Kirk Wolak wrote:

*Example:*
\h current_setting
No help available for "current_setting".
Try \h with no arguments to see available help.

https://www.postgresql.org/search/?u=%2Fdocs%2F16%2F&q=current_setting 



One problem is that this search URL does not actually produce any useful 
information about current_setting.






Re: fix tablespace handling in pg_combinebackup

2024-04-18 Thread Robert Haas
On Thu, Apr 18, 2024 at 1:45 PM Andres Freund  wrote:
> I was really just remarking on this from the angle of a test writer. I know
> that our interfaces around this suck...
>
> For tests, do we really need to set anything on a per-tablespace basis? Can't
> we instead just reparent all of them to a new directory?

I don't know what you mean by this.

> > I wonder if we (as a project) would consider a patch that redesigned
> > this whole mechanism. Produce ${TABLESPACE_NAME}.tar in tar-format,
> > instead of ${OID}.tar. In directory-format, relocate via
> > -T${TABLESPACE_NAME}=${DIR} instead of -T${SERVERDIR}=${DIR}. That
> > would be a significant compatibility break, and you'd somehow need to
> > solve the problem of what to put in the tablespace_map file, which
> > requires OIDs. But it seems like if you could finesse that issue in
> > some elegant way, the result would just be a heck of a lot more usable
> > than what we have today.
>
> For some things that'd definitely be nicer - but not sure it work well for
> everything. Consider cases where you actually have external directories on
> different disks, and you want to restore a backup after some data loss. Now
> you need to list all the tablespaces separately, to put them back into their
> own location.

I mean, don't you need to do that anyway, just in a more awkward way?
I don't understand who ever wants to keep track of their tablespaces
by either (a) source pathname or (b) OID rather than (c) user-visible
tablespace name.

> One thing I've been wondering about is an option to put the "new" tablespaces
> into a location relative to each of the old ones.
>   --tablespace-relative-location=../restore-2024-04-18
> which would rewrite all the old tablespaces to that new location.

I think this would probably get limited use outside of testing scenarios.

> > I said (at least off-list) when that feature was introduced that there was
> > no way it was going to remain an isolated development hack, and I think
> > that's proved to be 100% correct.
>
> Hm, I guess I kinda agree. But not because I think it wasn't good for
> development, but because it'd often be much saner to use relative tablespaces
> than absolute ones even for prod.
>
> My only point here was that the test would be simpler if you
> a) didn't need to create a temp directory for the tablespace, both for
>primary and standby
> b) didn't need to "gin up" a tablespace map, because all the paths are
>relative
>
> Just to be clear: I don't want the above to block merging your test. If you
> think you want to do it the way you did, please do.

I think I will go ahead and do that soon, then. I'm totally fine with
it if somebody wants to do the testing in some other way, but this was
what made sense to me as the easiest way to adapt what's already
there. I think it's lame that init_from_backup() disclaims support for
tablespaces, as if tablespaces weren't a case that needs to be tested
heavily with respect to backups. And I think it's better to get
something merged that fixes the bug ASAP, and adds some test coverage,
and then if there's a better way to do that test coverage down the
road, well and good.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: Can't find not null constraint, but \d+ shows that

2024-04-18 Thread Alvaro Herrera
On 2024-Apr-13, jian he wrote:

> I wonder is there any incompatibility issue, or do we need to say something
> about the new behavior when dropping a key column?

Umm, yeah, maybe we should document it in ALTER TABLE DROP PRIMARY KEY
and in the release notes to note the different behavior.

> only minor cosmetic issue:
> + if (unconstrained_cols)
> i would like change it to
> + if (unconstrained_cols != NIL)
> 
> + foreach(lc, unconstrained_cols)
> we can change to
> + foreach_int(attnum, unconstrained_cols)
> per commit
> https://git.postgresql.org/cgit/postgresql.git/commit/?id=14dd0f27d7cd56ffae9ecdbe324965073d01a9ff

Ah, yeah.  I did that, rewrote some comments and refined the tests a
little bit to ensure the pg_upgrade behavior is sane.  I intend to get
this pushed tomorrow, if nothing ugly comes up.

CI run: https://cirrus-ci.com/build/5471117953990656

-- 
Álvaro Herrera PostgreSQL Developer  —  https://www.EnterpriseDB.com/
"La gente vulgar sólo piensa en pasar el tiempo;
el que tiene talento, en aprovecharlo"
>From 28393502b3d6fcf430264c10f931e948bedc Mon Sep 17 00:00:00 2001
From: Alvaro Herrera 
Date: Thu, 11 Apr 2024 12:29:34 +0200
Subject: [PATCH v2] Better handle indirect constraint drops

It is possible for certain cases to remove not-null constraints without
maintaining the attnotnull in its correct state; for example if you drop
a column that's part of the primary key, and the other columns of the PK don't
have not-null constraints, then we should reset the attnotnull flags for
those other columns; up to this commit, we didn't.  Handle those cases
better by doing the attnotnull reset in RemoveConstraintById() instead
of in dropconstraint_internal().

However, there are some cases where we must not do so.  For example if
those other columns are in replica identity indexes or are generated
identity columns, we must keep attnotnull set, even though it results in
the catalog inconsistency that no not-null constraint supports that.

Because the attnotnull reset now happens in more places than before, for
instance when a column of the primary key changes type, we need an
additional trick to reinstate it as necessary.  Introduce a new
alter-table pass that does this, which needs simply reschedule some
AT_SetAttNotNull subcommands that were already being generated and
ignored.

Because of the exceptions in which attnotnull is not reset noted above,
we also include a pg_dump hack to include a not-null constraint when the
attnotnull flag is set even if no pg_constraint row exists.  This part
is undesirable but necessary, because failing to handle the case can
result in unrestorable dumps.

Reported-by: Tender Wang 
Co-authored-by: Tender Wang 
Reviewed-by: jian he 
Discussion: https://postgr.es/m/CAHewXN=hMbNa3d43NOR=ocgdgptt18s-1fmuecoegesyek4...@mail.gmail.com
---
 src/backend/catalog/pg_constraint.c   | 116 ++-
 src/backend/commands/tablecmds.c  | 135 +++---
 src/bin/pg_dump/pg_dump.c |  30 +++--
 src/test/regress/expected/constraints.out |  78 +
 src/test/regress/sql/constraints.sql  |  39 +++
 5 files changed, 322 insertions(+), 76 deletions(-)

diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c
index 778b7c381d..56c083fe21 100644
--- a/src/backend/catalog/pg_constraint.c
+++ b/src/backend/catalog/pg_constraint.c
@@ -19,6 +19,7 @@
 #include "access/htup_details.h"
 #include "access/sysattr.h"
 #include "access/table.h"
+#include "access/xact.h"
 #include "catalog/catalog.h"
 #include "catalog/dependency.h"
 #include "catalog/heap.h"
@@ -933,6 +934,8 @@ RemoveConstraintById(Oid conId)
 	Relation	conDesc;
 	HeapTuple	tup;
 	Form_pg_constraint con;
+	bool		dropping_pk = false;
+	List	   *unconstrained_cols = NIL;
 
 	conDesc = table_open(ConstraintRelationId, RowExclusiveLock);
 
@@ -957,7 +960,9 @@ RemoveConstraintById(Oid conId)
 		/*
 		 * We need to update the relchecks count if it is a check constraint
 		 * being dropped.  This update will force backends to rebuild relcache
-		 * entries when we commit.
+		 * entries when we commit.  For not-null and primary key constraints,
+		 * obtain the list of columns affected, so that we can reset their
+		 * attnotnull flags below.
 		 */
 		if (con->contype == CONSTRAINT_CHECK)
 		{
@@ -984,6 +989,36 @@ RemoveConstraintById(Oid conId)
 
 			table_close(pgrel, RowExclusiveLock);
 		}
+		else if (con->contype == CONSTRAINT_NOTNULL)
+		{
+			unconstrained_cols = list_make1_int(extractNotNullColumn(tup));
+		}
+		else if (con->contype == CONSTRAINT_PRIMARY)
+		{
+			Datum		adatum;
+			ArrayType  *arr;
+			int			numkeys;
+			bool		isNull;
+			int16	   *attnums;
+
+			dropping_pk = true;
+
+			adatum = heap_getattr(tup, Anum_pg_constraint_conkey,
+  RelationGetDescr(conDesc), &isNull);
+			if (isNull)
+elog(ERROR, "null conkey for constraint %u", con->oid);
+			arr = DatumGetArrayTypeP(adatum);	/* ensure not toasted 

Re: Transparent column encryption

2024-04-18 Thread Robert Haas
On Thu, Apr 18, 2024 at 1:49 PM Jelte Fennema-Nio  wrote:
> I think this is an interesting idea. I can indeed see use cases for
> e.g. inserting a new row based on another row (where the secret is the
> same).
>
> IMHO that means that we should also bump the protocol version for this
> change, because it's changing the wire protocol by adding a new
> parameter format code. And it does so in a way that does not depend on
> the new protocol extension.

I think we're more or less covering the same ground we did on the
other thread here -- in theory I don't love the fact that we never
bump the protocol version when we change stuff, but in practice if we
start bumping it every time we do anything I think it's going to just
break a bunch of stuff without any real benefit.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: pgsql: Fix restore of not-null constraints with inheritance

2024-04-18 Thread Andrew Dunstan


On 2024-04-18 Th 11:39, Alvaro Herrera wrote:

On 2024-Apr-18, Alvaro Herrera wrote:


On 2024-Apr-18, Alvaro Herrera wrote:


Lastly, make two changes to pg_dump: 1) do not try to drop a not-null
constraint that's marked as inherited; this allows a dump to restore
with no errors if a table with a PK inherits from another which also has
a PK; 2) avoid giving inherited constraints throwaway names, for the
rare cases where such a constraint survives after the restore.

Hmm, this last bit broke pg_upgrade on crake.  I'll revert this part,
meanwhile I'll be installing 9.2 to see if it can be fixed in a better way.

Eh, so:

1. running "make check" on pg_upgrade using an oldinstall pointing to
9.2 fails, because PostgreSQL::Test::Cluster doesn't support that
version -- it only goes back to 9.2.  How difficult was it to port it
back to all these old versions?



It's not that hard to make it go back to 9.2. Here's a version that's a 
couple of years old, but it supports versions all the way back to 7.2 :-)


If there's interest I'll work on supporting our official "old" versions 
(i.e. 9.2 and up)





2. running "make check" with an oldinstall pointing to 10 fails, because
the "invalid database" checks fail:

not ok 7 - invalid database causes failure status (got 0 vs expected 1)

#   Failed test 'invalid database causes failure status (got 0 vs expected 1)'
#   at t/002_pg_upgrade.pl line 407.
not ok 8 - invalid database causes failure stdout /(?^:invalid)/







3. Lastly, even if I put back the code that causes the failures on crake
and restore from 10 (and ignore those two problems), I cannot reproduce
the issues it reported.  Is crake running some funky code that's not
what "make check" on pg_upgrade does, perchance?



It's running the buildfarm cross version upgrade module. See 



It's choking on the change in constraint names between the dump of the 
pre-upgrade database and the dump of the post-upgrade database, e.g.



 CREATE TABLE public.rule_and_refint_t2 (
-id2a integer CONSTRAINT pgdump_throwaway_notnull_0 NOT NULL NO INHERIT,
-id2c integer CONSTRAINT pgdump_throwaway_notnull_1 NOT NULL NO INHERIT
+id2a integer CONSTRAINT rule_and_refint_t2_id2a_not_null NOT NULL NO 
INHERIT,
+id2c integer CONSTRAINT rule_and_refint_t2_id2c_not_null NOT NULL NO 
INHERIT
 );


look at the dumpdiff-REL9_2_STABLE file for the full list.

I assume that means pg_dump is generating names that pg_upgrade throws 
away? That seems ... unfortunate.


There is a perl module at 
src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm. This is used to adjust 
the dump files before we diff them. Maybe you can remedy the problem by 
adding some code in there.




cheers


andrew

--
Andrew Dunstan
EDB:https://www.enterprisedb.com


Cluster.pm
Description: Perl program


Re: [18] clarify the difference between pg_wchar, wchar_t, and Unicode code points

2024-04-18 Thread Peter Eisentraut

On 16.04.24 01:40, Jeff Davis wrote:

I'm not sure I understand all of the history behind pg_wchar, but it
seems to be some blend of:

   (a) Postgres's own internal representation of a decoded character
   (b) libc's wchar_t
   (c) Unicode code point

For example, Postgres has its own encoding/decoding routines, so (a) is
the most obvious definition.


(a) is the correct definition, I think.  The other ones are just 
occasional conveniences, and occasionally wrong.



When using ICU, we also pass a pg_wchar directly to ICU routines, which
depends on definition (c), and can lead to problems like:

https://www.postgresql.org/message-id/e7b67d24288f811aebada7c33f9ae629dde0def5.ca...@j-davis.com


That's just a plain bug, I think.  It's missing the encoding check that 
for example pg_strncoll_icu() does.



The comment at the top of pg_regc_locale.c explains some of the above,
but not all. I'd like to organize this a bit better:

   * a new typedef for a Unicode code point ("codepoint"? "uchar"?)
   * a no-op conversion routine from pg_wchar to a codepoint that would
assert that the server encoding is UTF-8 (#ifndef FRONTEND, of course)
   * a no-op conversion routine from pg_wchar to wchar_t that would be a
good place for a comment describing that it's a "best effort" and may
not be correct in all cases


I guess sometimes you really want to just store an array of Unicode code 
points.  But I'm not sure how this would actually address coding 
mistakes like the one above.  You still need to check the server 
encoding and do encoding conversion when necessary.






Re: Add notes to pg_combinebackup docs

2024-04-18 Thread Daniel Gustafsson
> On 18 Apr 2024, at 20:11, Robert Haas  wrote:

> 2. As (1), but make check_control_files() emit a warning message when
> the problem case is detected.

Being in the post-freeze part of the cycle, this seems like the best option.

> 3. As (2), but also add a command-line option to pg_combinebackup to
> flip the checksum flag to false in the control file.

I don't think this is the way to go, such an option will be plastered on to
helpful tutorials which users will copy/paste from even when they don't need
the option at all, only to disable checksums when there was no reason for doing
so.

> We could perhaps consider doing (2) for now and (5) for a future
> release, or something like that.

+1

--
Daniel Gustafsson





Re: Add notes to pg_combinebackup docs

2024-04-18 Thread Robert Haas
On Thu, Apr 18, 2024 at 3:25 PM Daniel Gustafsson  wrote:
> I don't think this is the way to go, such an option will be plastered on to
> helpful tutorials which users will copy/paste from even when they don't need
> the option at all, only to disable checksums when there was no reason for 
> doing
> so.

That's actually a really good point. I mean, it's such an obscure
scenario, maybe it wouldn't make it into the tutorials, but if it
does, it'll never get taken out.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: Add new protocol message to change GUCs for usage with future protocol-only GUCs

2024-04-18 Thread Jelte Fennema-Nio
On Mon, 15 Apr 2024 at 21:47, Dave Cramer  wrote:
>> On Mon, 15 Apr 2024 at 19:52, Robert Haas  wrote:
>> > surely it can't be right to use protocol
>> > version 3.0 to refer to a bunch of different things. But at the same
>> > time, surely we don't want clients to start panicking and bailing out
>> > when everything would have been fine.
>>
>> I feel like the ProtocolVersionNegotiation should make sure people
>> don't panic and bail out. And if not, then feature flags won't help
>> with this either. Because clients would then still bail out if some
>> feature is not supported.
>
> I don't think a client should ever bail out. They may not support something 
> but IMO bailing out is not an option.


On Thu, 18 Apr 2024 at 21:01, Robert Haas  wrote:
> On Thu, Apr 18, 2024 at 1:49 PM Jelte Fennema-Nio  wrote:
> > IMHO that means that we should also bump the protocol version for this
> > change, because it's changing the wire protocol by adding a new
> > parameter format code. And it does so in a way that does not depend on
> > the new protocol extension.
>
> I think we're more or less covering the same ground we did on the
> other thread here -- in theory I don't love the fact that we never
> bump the protocol version when we change stuff, but in practice if we
> start bumping it every time we do anything I think it's going to just
> break a bunch of stuff without any real benefit.

(the second quoted message comes from Peter his column encryption
thread, but responding here to keep this discussion in one place)

I really don't understand what exactly you're worried about. What do
you expect will break when bumping the protocol version? As Dave said,
clients should never bail out due to protocol version differences.

When the server supports a lower version than the client, the client
should disable certain features because it gets the
ProtocolVersionNegotiation message. This is also true if we don't bump
the version. Negotiating a lower version actually makes it clearer for
the client what features to disable. Using the reported postgres
version for this might not, because a connection pooler in the middle
might not support the features that the client wants and thus throw an
error (e.g. due to the client sending unknown messages) even if the
backing Postgres server would support these features. Not to mention
non-postgresql servers that implement the PostgreSQL protocol (of
which there are more and more).

When the server supports a higher version, the client never even
notices this because the server will silently accept and only enable
the features of the lower version. So this could never cause breakage,
as from the client's perspective the server didn't bump their protocol
version.

So, I don't understand why you seem to view bumping the protocol
version with so much negativity. We're also bumping PG versions every
year. Afaik people only like that, partially because it's immediately
clear that certain features (e.g. MERGE) are not supported when
connecting to older servers. To me the same is true for bumping the
protocol version. There are no downsides to bumping it, only upsides.




Re: Popcount optimization using AVX512

2024-04-18 Thread Nathan Bossart
On Thu, Apr 18, 2024 at 06:12:22PM +, Shankaran, Akash wrote:
> Good find. I confirmed after speaking with an intel expert, and from the 
> intel AVX-512 manual [0] section 14.3, which recommends to check bit27. From 
> the manual:
> 
> "Prior to using Intel AVX, the application must identify that the operating 
> system supports the XGETBV instruction,
> the YMM register state, in addition to processor's support for YMM state 
> management using XSAVE/XRSTOR and
> AVX instructions. The following simplified sequence accomplishes both and is 
> strongly recommended.
> 1) Detect CPUID.1:ECX.OSXSAVE[bit 27] = 1 (XGETBV enabled for application 
> use1).
> 2) Issue XGETBV and verify that XCR0[2:1] = '11b' (XMM state and YMM state 
> are enabled by OS).
> 3) detect CPUID.1:ECX.AVX[bit 28] = 1 (AVX instructions supported).
> (Step 3 can be done in any order relative to 1 and 2.)"

Thanks for confirming.  IIUC my patch should be sufficient, then.

> It also seems that step 1 and step 2 need to be done prior to the CPUID 
> OSXSAVE check in the popcount code.

This seems to contradict the note about doing step 3 at any point, and
given step 1 is the OSXSAVE check, I'm not following what this means,
anyway.

I'm also wondering if we need to check that (_xgetbv(0) & 0xe6) == 0xe6
instead of just (_xgetbv(0) & 0xe0) != 0, as the status of the lower half
of some of the ZMM registers is stored in the SSE and AVX state [0].  I
don't know how likely it is that 0xe0 would succeed but 0xe6 wouldn't, but
we might as well make it correct.

[0] https://en.wikipedia.org/wiki/Control_register#cite_ref-23

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com




Re: Add SPLIT PARTITION/MERGE PARTITIONS commands

2024-04-18 Thread Robert Haas
On Thu, Apr 18, 2024 at 6:35 AM Alexander Korotkov  wrote:
> The revised patchset is attached.
> 1) I've split the fix for the CommandCounterIncrement() issue and the
> fix for relation persistence issue into a separate patch.
> 2) I've validated that the lock on the new partition is held in
> createPartitionTable() after ProcessUtility() as pointed out by
> Robert.  So, no need to place the lock again.
> 3) Added fix for problematic error message as a separate patch [1].
> 4) Added rename "salemans" => "salesmen" for tests as a separate patch.
>
> I think these fixes are reaching committable shape, but I'd like
> someone to check it before I push.

Reviewing 0001:

- Seems mostly fine. I think the comment /* Unlock and drop merged
partitions. */ is wrong. I think it should say something like /* Drop
the current partitions before adding the new one. */ because (a) it
doesn't unlock anything, and there's another comment saying that and
(b) we now know that the drop vs. add order matters.

Reviewing 0002:

- Commit message typos: behavious, corresponsing

- Given the change to the header comment of createPartitionTable, it's
rather surprising to me that this patch doesn't touch the
documentation. Isn't that a big change in semantics?

- My previous review comment was really about the code comment, I
believe, rather than the use of AccessExclusiveLock. NoLock is
probably fine, but if it were me I'd be tempted to write
AccessExclusiveLock and just make the comment say something like /* We
should already have the lock, but do it this way just to be certain
*/. But what you have is probably fine, too. Mostly, I want to clarify
the intent of my previous comment.

- Do we, or can we, have a test that if you split a partition that's
not in the search path, the resulting partitions end up in your
creation namespace? And similarly for merge? And maybe also that
schema-qualification works properly?

I haven't exhaustively verified the patch, but these are some things I
noticed when scrolling through it.

Reviewing 0003:

- Are you sure this can't dereference datum when datum is NULL, in
either the upper or lower half? It sure looks strange to have code
that looks like it can make datum a null pointer, and then an
unconditional deference just after.

- In general I think the wording changes are improvements. I'm
slightly suspicious that there might be an even better way to word it,
but I can't think of it right at this very moment.

- I'm kind of unhappy (but not totally unhappy) with the semantics.
Suppose I have a partition that allows values from 0 to 1000, but
actually only contains values that are either between 0 and 99 or
between 901 and 1000. If I try to to split the partition into one that
allows 0..100 and a second that allows 900..1000, it will fail. Maybe
that's good, because that means that if a failure is going to happen,
it will happen right at the beginning, rather than maybe after doing a
lot of work. But on the other hand, it also kind of stinks, because it
feels like I'm being told I can't do something that I know is
perfectly fine.

Reviewing 0004:

- Obviously this is quite trivial and there's no real problem with it,
but if we're changing it anyway, how about a gender-neutral term
(salesperson/salespeople)?

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: cataloguing NOT NULL constraints

2024-04-18 Thread Alexander Lakhin

Hello Alvaro,

18.04.2024 16:39, Alvaro Herrera wrote:

I have pushed a fix which should hopefully fix this problem
(d9f686a72e).  Please give this a look.  Thanks for reporting the issue.


Please look at an assertion failure, introduced with d9f686a72:
CREATE TABLE t(a int, NOT NULL a NO INHERIT);
CREATE TABLE t2() INHERITS (t);

ALTER TABLE t ADD CONSTRAINT nna NOT NULL a;
TRAP: failed Assert("lockmode != NoLock || IsBootstrapProcessingMode() || CheckRelationLockedByMe(r, AccessShareLock, 
true)"), File: "relation.c", Line: 67, PID: 2980258


On d9f686a72~1 this script results in:
ERROR:  cannot change NO INHERIT status of inherited NOT NULL constraint "t_a_not_null" 
on relation "t"

Best regards,
Alexander




Re: [HACKERS] pg_upgrade failed with error - ERROR: column "a" in child table must be marked NOT NULL

2024-04-18 Thread Justin Pryzby
On Thu, Dec 14, 2017 at 08:51:06AM +0700, Ali Akbar wrote:
> Patch for adding check in pg_upgrade.

[...]

On Fri, Sep 29, 2023 at 11:36:42AM -0500, Justin Pryzby wrote:
> On Fri, Sep 29, 2023 at 09:16:35AM +0900, Michael Paquier wrote:
> > You mean when upgrading from an instance of 9.6 or older as c30f177 is
> > not there, right?
> 
> No - while upgrading from v15 to v16.  I'm not clear on how we upgraded
> *to* v15 without hitting the issue, nor how the "not null" got
> dropped...
> 
> > Anyway, it seems like the patch from [1] has no need to run this check
> > when the old cluster's version is 10 or newer.  And perhaps it should
> > mention that this check could be removed from pg_upgrade once v10
> > support is out of scope, in the shape of a comment.
> 
> You're still thinking of PRIMARY KEY as the only way to hit this, right?
> But Ali Akbar already pointed out how to reproduce the problem with DROP
> NOT NULL - which still applies to both v16 and v17.

Rebased and amended the forgotten patch.
See also ZiE3NoY6DdvlvFl9@pryzbyj2023 et seq.
>From f804bec39acac0b53e29563be9ef7a10237ba717 Mon Sep 17 00:00:00 2001
From: Ali Akbar 
Date: Thu, 14 Dec 2017 19:18:59 +0700
Subject: [PATCH] pg_upgrade: check for inconsistent inherited not null columns

Otherwise, the upgrade can fail halfway through with error:
CREATE TABLE ip(id int PRIMARY KEY); CREATE TABLE ic(id int) INHERITS (ip); ALTER TABLE ic ALTER id DROP NOT NULL;
ERROR: column "a" in child table must be marked NOT NULL

JTP: updated to handle contype='n' - otherwise, the cnn_grandchild2
added at b0e96f311 triggers the issue this patch tries to avoid ...

Should be backpatched to all versions.
---
 src/bin/pg_upgrade/check.c | 101 +
 1 file changed, 101 insertions(+)

diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index ad1f8e3bcb1..79671a9cdb4 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -31,6 +31,7 @@ static void check_new_cluster_logical_replication_slots(void);
 static void check_new_cluster_subscription_configuration(void);
 static void check_old_cluster_for_valid_slots(bool live_check);
 static void check_old_cluster_subscription_state(void);
+static void check_for_not_null_inheritance(ClusterInfo *cluster);
 
 /*
  * DataTypesUsageChecks - definitions of data type checks for the old cluster
@@ -646,6 +647,8 @@ check_and_dump_old_cluster(bool live_check)
 	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100)
 		check_for_tables_with_oids(&old_cluster);
 
+	check_for_not_null_inheritance(&old_cluster);
+
 	/*
 	 * Pre-PG 10 allowed tables with 'unknown' type columns and non WAL logged
 	 * hash indexes
@@ -1832,6 +1835,104 @@ check_new_cluster_subscription_configuration(void)
 	check_ok();
 }
 
+/*
+ * check_for_not_null_inheritance()
+ *
+ *  Currently pg_upgrade will fail if there are any inconsistencies in NOT NULL
+ *  constraint inheritance: In PostgreSQL, we can have a column that is NOT NULL
+ *  in parent, but nullable in its children. So check for inconsistent NOT NULL
+ *  constraints in inheritance tree.
+ */
+static void
+check_for_not_null_inheritance(ClusterInfo *cluster)
+{
+	int			dbnum;
+	FILE	   *script = NULL;
+	char		output_path[MAXPGPATH];
+
+	prep_status("Checking for NOT NULL inconsistencies in inheritance");
+
+	snprintf(output_path, sizeof(output_path), "not_null_inconsistent_columns.txt");
+
+	for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+	{
+		PGresult   *res;
+		bool		db_used = false;
+		int			ntups;
+		int			rowno;
+		int			i_nspname,
+	i_relname,
+	i_attname;
+		DbInfo	   *active_db = &cluster->dbarr.dbs[dbnum];
+		PGconn	   *conn = connectToServer(cluster, active_db->db_name);
+
+		res = executeQueryOrDie(conn,
+"WITH RECURSIVE parents AS ( "
+"	SELECT	i.inhrelid, i.inhparent "
+"	FROM	pg_catalog.pg_inherits i "
+"	UNION ALL "
+"	SELECT	p.inhrelid, i.inhparent "
+"   FROM	parents p "
+"	JOIN	pg_catalog.pg_inherits i "
+"		ON	i.inhrelid = p.inhparent "
+") "
+"SELECT	n.nspname, c.relname, ac.attname  "
+"FROM	parents p, "
+"		pg_catalog.pg_attribute ac, "
+"		pg_catalog.pg_attribute ap, "
+"		pg_catalog.pg_constraint cc, "
+"		pg_catalog.pg_class c, "
+"		pg_catalog.pg_namespace n "
+"WHERE	NOT ac.attnotnull AND "
+"		ac.attrelid = p.inhrelid AND "
+"		ap.attrelid = p.inhparent AND "
+"		ac.attname = ap.attname AND "
+"		ac.attrelid = cc.conrelid AND ac.attnum = ANY(cc.conkey) AND contype='n' AND NOT connoinherit AND"
+"		ap.attnotnull AND "
+"		c.oid = ac.attrelid AND "
+"		c.relnamespace = n.oid");
+
+		ntups = PQntuples(res);
+		i_nspname = PQfnumber(res, "nspname");
+		i_relname = PQfnumber(res, "relname");
+		i_attname = PQfnumber(res, "attname");
+		for (rowno = 0; rowno < ntups; rowno++)
+		{
+			if (script == N

Re: Add new protocol message to change GUCs for usage with future protocol-only GUCs

2024-04-18 Thread Robert Haas
On Thu, Apr 18, 2024 at 3:34 PM Jelte Fennema-Nio  wrote:
> I really don't understand what exactly you're worried about. What do
> you expect will break when bumping the protocol version? As Dave said,
> clients should never bail out due to protocol version differences.

Sure, and I should never forget to take out the trash or mow the lawn.

> So, I don't understand why you seem to view bumping the protocol
> version with so much negativity. We're also bumping PG versions every
> year. Afaik people only like that, partially because it's immediately
> clear that certain features (e.g. MERGE) are not supported when
> connecting to older servers. To me the same is true for bumping the
> protocol version. There are no downsides to bumping it, only upsides.

I see it the exact opposite way around.

If we go with Peter's approach, every driver that supports his feature
will work perfectly, and every driver that doesn't will work exactly
as it does today. The risk of breaking anything is as near to zero as
human developers can reasonably hope to achieve. Nobody who doesn't
care about the feature will have to lift a single finger, today,
tomorrow, or ever. That's absolutely brilliant.

If we instead go with your approach, then anyone who wants to support
3.2 when it materializes will have to also support 3.1, which means
they have to support this feature. That's not a terrible burden, but
it's not a necessary one either. Also, even just 3.1 is going to break
something for somebody. There's just no way that we've left the
protocol version unchanged for this long and the first change we make
doesn't cause some collateral damage.

Sure, those are minor downsides in the grand scheme of things. But
AFAICS the only downside of Peter's approach that you've alleged is
that doesn't involve bumping the version number. Of course, if bumping
the version number is an intrinsic good, then no further justification
is required, but I don't buy that. I do not believe that users or
maintainers will throw us a pizza party when they find out that we've
changed the version number. There's no reason for anyone to be happy
about that for its own sake.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: Cutting support for OpenSSL 1.0.1 and 1.0.2 in 17~?

2024-04-18 Thread Daniel Gustafsson
> On 18 Apr 2024, at 12:53, Peter Eisentraut  wrote:

> Review of the latest batch:

Thanks for reviewing!

> 8 v9-0002-Remove-support-for-OpenSSL-1.0.2.patch
> 
> Ok, but maybe make the punctuation consistent here:

Fixed.

> * v9-0004-Support-SSL_R_VERSION_TOO_LOW-on-LibreSSL.patch
> 
> Seems ok, but the reason isn't clear to me.  Are there LibreSSL versions that 
> have SSL_R_VERSION_TOO_LOW but not SSL_R_VERSION_TOO_HIGH?  Maybe this could 
> be explained better.

LibreSSL doesn't support SSL_R_VERSION_TOO_HIGH at all, they only support
_TOO_LOW starting with the OpenBSD 7.2 release.  I've expanded the commit
message to document this.

> Also, "OpenSSL 7.2" in the commit message probably meant "OpenBSD"?

Ah yes, fixed.

> * v9-0005-Remove-pg_strong_random-initialization.patch
> 
> I don't understand the reason for this phrase in the commit message: "1.1.1 
> is being increasingly phased out from production use".  Did you mean 1.1.0 
> there?

Correct, I got lost among the version numbers it seems. Fixed.

> Conditionally sticking the RAND_poll() into pg_strong_random(), does that 
> have the effect we want?  It wouldn't reinitialize after a fork, AFAICT.

No I think you're right, my previous version would have worked (but was ugly)
but this doesn't guarantee that.  Thinking more about it maybe it's best to
just keep the init function and have a version check for 1.1.0 there, making it
an empty no-op for all other cases.  When we move past 1.1.0 due to a new API
requirement we can blow it all away.

> If everything is addressed, I agree that 0001, 0003, and 0004 can go into 
> PG17, the rest later.

Agreed, 0002 and 0005 are clearly for the v18 cycle.

--
Daniel Gustafsson



v10-0001-Doc-Use-past-tense-for-things-which-happened-in-.patch
Description: Binary data


v10-0002-Remove-support-for-OpenSSL-1.0.2.patch
Description: Binary data


v10-0003-Support-disallowing-SSL-renegotiation-in-LibreSS.patch
Description: Binary data


v10-0004-Support-SSL_R_VERSION_TOO_LOW-on-LibreSSL.patch
Description: Binary data


v10-0005-Remove-pg_strong_random-initialization.patch
Description: Binary data


Re: improve performance of pg_dump --binary-upgrade

2024-04-18 Thread Nathan Bossart
On Thu, Apr 18, 2024 at 10:23:01AM -0500, Nathan Bossart wrote:
> On Thu, Apr 18, 2024 at 09:24:53AM +0200, Daniel Gustafsson wrote:
>> That does indeed seem like a saner approach.  Since we look up the relkind we
>> can also remove the is_index parameter to binary_upgrade_set_pg_class_oids
>> since we already know that without the caller telling us?
> 
> Yeah.  It looks like that's been possible since commit 9a974cb, so I can
> write a prerequisite patch for this.

Here's a new patch set with this change.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From d74692fe52d3c376d4b60323dd367a36593cd31b Mon Sep 17 00:00:00 2001
From: Nathan Bossart 
Date: Thu, 18 Apr 2024 15:15:19 -0500
Subject: [PATCH v2 1/2] Remove is_index parameter from
 binary_upgrade_set_pg_class_oids().

---
 src/bin/pg_dump/pg_dump.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 6d2f3fdef3..58c77a5e2b 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -324,7 +324,7 @@ static void binary_upgrade_set_type_oids_by_rel(Archive *fout,
 const TableInfo *tbinfo);
 static void binary_upgrade_set_pg_class_oids(Archive *fout,
 			 PQExpBuffer upgrade_buffer,
-			 Oid pg_class_oid, bool is_index);
+			 Oid pg_class_oid);
 static void binary_upgrade_extension_member(PQExpBuffer upgrade_buffer,
 			const DumpableObject *dobj,
 			const char *objtype,
@@ -5394,8 +5394,7 @@ binary_upgrade_set_type_oids_by_rel(Archive *fout,
 
 static void
 binary_upgrade_set_pg_class_oids(Archive *fout,
- PQExpBuffer upgrade_buffer, Oid pg_class_oid,
- bool is_index)
+ PQExpBuffer upgrade_buffer, Oid pg_class_oid)
 {
 	PQExpBuffer upgrade_query = createPQExpBuffer();
 	PGresult   *upgrade_res;
@@ -5444,7 +5443,8 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
 	appendPQExpBufferStr(upgrade_buffer,
 		 "\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n");
 
-	if (!is_index)
+	if (relkind != RELKIND_INDEX &&
+		relkind != RELKIND_PARTITIONED_INDEX)
 	{
 		appendPQExpBuffer(upgrade_buffer,
 		  "SELECT pg_catalog.binary_upgrade_set_next_heap_pg_class_oid('%u'::pg_catalog.oid);\n",
@@ -11864,7 +11864,7 @@ dumpCompositeType(Archive *fout, const TypeInfo *tyinfo)
 		binary_upgrade_set_type_oids_by_type_oid(fout, q,
  tyinfo->dobj.catId.oid,
  false, false);
-		binary_upgrade_set_pg_class_oids(fout, q, tyinfo->typrelid, false);
+		binary_upgrade_set_pg_class_oids(fout, q, tyinfo->typrelid);
 	}
 
 	qtypname = pg_strdup(fmtId(tyinfo->dobj.name));
@@ -15998,7 +15998,7 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo)
 
 		if (dopt->binary_upgrade)
 			binary_upgrade_set_pg_class_oids(fout, q,
-			 tbinfo->dobj.catId.oid, false);
+			 tbinfo->dobj.catId.oid);
 
 		appendPQExpBuffer(q, "CREATE VIEW %s", qualrelname);
 
@@ -16100,7 +16100,7 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo)
 
 		if (dopt->binary_upgrade)
 			binary_upgrade_set_pg_class_oids(fout, q,
-			 tbinfo->dobj.catId.oid, false);
+			 tbinfo->dobj.catId.oid);
 
 		appendPQExpBuffer(q, "CREATE %s%s %s",
 		  tbinfo->relpersistence == RELPERSISTENCE_UNLOGGED ?
@@ -16990,7 +16990,7 @@ dumpIndex(Archive *fout, const IndxInfo *indxinfo)
 
 		if (dopt->binary_upgrade)
 			binary_upgrade_set_pg_class_oids(fout, q,
-			 indxinfo->dobj.catId.oid, true);
+			 indxinfo->dobj.catId.oid);
 
 		/* Plain secondary index */
 		appendPQExpBuffer(q, "%s;\n", indxinfo->indexdef);
@@ -17244,7 +17244,7 @@ dumpConstraint(Archive *fout, const ConstraintInfo *coninfo)
 
 		if (dopt->binary_upgrade)
 			binary_upgrade_set_pg_class_oids(fout, q,
-			 indxinfo->dobj.catId.oid, true);
+			 indxinfo->dobj.catId.oid);
 
 		appendPQExpBuffer(q, "ALTER %sTABLE ONLY %s\n", foreign,
 		  fmtQualifiedDumpable(tbinfo));
@@ -17653,7 +17653,7 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo)
 	if (dopt->binary_upgrade)
 	{
 		binary_upgrade_set_pg_class_oids(fout, query,
-		 tbinfo->dobj.catId.oid, false);
+		 tbinfo->dobj.catId.oid);
 
 		/*
 		 * In older PG versions a sequence will have a pg_type entry, but v14
-- 
2.25.1

>From 1580a7b9896b727925cab364ae9fdc7107d791d4 Mon Sep 17 00:00:00 2001
From: Nathan Bossart 
Date: Wed, 17 Apr 2024 22:55:27 -0500
Subject: [PATCH v2 2/2] Improve performance of pg_dump --binary-upgrade.

---
 src/bin/pg_dump/pg_dump.c| 117 +++
 src/tools/pgindent/typedefs.list |   1 +
 2 files changed, 72 insertions(+), 46 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 58c77a5e2b..062638ad34 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -55,6 +55,7 @@
 #include "catalog/pg_trigger_d.h"
 #include "catalog/pg_type_d.h"
 #include "common/co

Re: Cannot find a working 64-bit integer type on Illumos

2024-04-18 Thread Thomas Munro
On Thu, Apr 18, 2024 at 8:47 PM Peter Eisentraut  wrote:
> I'm not sure I understand the problem here.  Do you mean that in theory
> a platform's PRId64 could be something other than "l" or "ll"?

Yes.  I don't know why anyone would do that, and the systems I checked
all have the obvious definitions, eg "ld", "lld" etc.  Perhaps it's an
acceptable risk?  It certainly gives us a tidier result.

For discussion, here is a variant that fully embraces  and
the PRI*64 macros.


v2-0001-Use-int64_t-support-from-stdint.h-and-inttypes.h.patch
Description: Binary data


v2-0002-Remove-traces-of-BeOS.patch
Description: Binary data


Re: improve performance of pg_dump --binary-upgrade

2024-04-18 Thread Daniel Gustafsson
> On 18 Apr 2024, at 22:28, Nathan Bossart  wrote:
> 
> On Thu, Apr 18, 2024 at 10:23:01AM -0500, Nathan Bossart wrote:
>> On Thu, Apr 18, 2024 at 09:24:53AM +0200, Daniel Gustafsson wrote:
>>> That does indeed seem like a saner approach.  Since we look up the relkind 
>>> we
>>> can also remove the is_index parameter to binary_upgrade_set_pg_class_oids
>>> since we already know that without the caller telling us?
>> 
>> Yeah.  It looks like that's been possible since commit 9a974cb, so I can
>> write a prerequisite patch for this.
> 
> Here's a new patch set with this change.

From a read-through they look good, a very nice performance improvement in an
important path.  I think it would be nice with some comments on the
BinaryUpgradeClassOids struct (since the code using it is thousands of lines
away), and a comment on the if (oids == NULL) block explaining the caching.

--
Daniel Gustafsson





Re: Add SPLIT PARTITION/MERGE PARTITIONS commands

2024-04-18 Thread Justin Pryzby
Here are some additional fixes to docs.
>From 6da8beaa5a2b78e785e5b6519894f8357002d916 Mon Sep 17 00:00:00 2001
From: Justin Pryzby 
Date: Thu, 18 Apr 2024 15:40:44 -0500
Subject: [PATCH] doc review for ALTER TABLE ... SPLIT/MERGE PARTITION

---
 doc/src/sgml/ddl.sgml |  4 ++--
 doc/src/sgml/ref/alter_table.sgml | 22 +++---
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 026bfff70f3..01277b1d327 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -4384,7 +4384,7 @@ ALTER INDEX measurement_city_id_logdate_key
 
 
  There is also an option for merging multiple table partitions into
- a single partition using the
+ a single partition using
  ALTER TABLE ... MERGE PARTITIONS.
  This feature simplifies the management of partitioned tables by allowing
  users to combine partitions that are no longer needed as
@@ -4403,7 +4403,7 @@ ALTER TABLE measurement
 
 
  Similarly to merging multiple table partitions, there is an option for
- splitting a single partition into multiple using the
+ splitting a single partition into multiple partitions using
  ALTER TABLE ... SPLIT PARTITION.
  This feature could come in handy when one partition grows too big
  and needs to be split into multiple.  It's important to note that
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index fe36ff82e52..e52cfee840c 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -1136,16 +1136,16 @@ WITH ( MODULUS numeric_literal, REM
   If the split partition is a DEFAULT partition, one of the new partitions must be DEFAULT.
   In case one of the new partitions or one of existing partitions is DEFAULT,
   new partitions partition_name1,
-  partition_name2, ... can have spaces
+  partition_name2, ... can have gaps
   between partitions bounds.  If the partitioned table does not have a DEFAULT
   partition, the DEFAULT partition can be defined as one of the new partitions.
  
  
   In case new partitions do not contain a DEFAULT partition and the partitioned table
-  does not have a DEFAULT partition, the following must be true: sum bounds of
+  does not have a DEFAULT partition, the following must be true: the sum bounds of
   new partitions partition_name1,
   partition_name2, ... should be
-  equal to bound of split partition partition_name.
+  equal to the bounds of split partition partition_name.
   One of the new partitions partition_name1,
   partition_name2, ... can have
   the same name as split partition partition_name
@@ -1168,24 +1168,24 @@ WITH ( MODULUS numeric_literal, REM
 
 
  
-  This form merges several partitions into the one partition of the target table.
-  Hash-partitioning is not supported.  If DEFAULT partition is not in the
+  This form merges several partitions of the target table into a single partition.
+  Hash-partitioning is not supported.  If a DEFAULT partition is not in the
   list of partitions partition_name1,
   partition_name2 [, ...]:
   

 
- For range-partitioned tables it is necessary that the ranges
+ For range-partitioned tables, it is necessary that the ranges
  of the partitions partition_name1,
  partition_name2 [, ...] can
- be merged into one range without spaces and overlaps (otherwise an error
+ be merged into one range with neither gaps nor overlaps (otherwise an error
  will be generated).  The combined range will be the range for the partition
  partition_name.
 


 
- For list-partitioned tables the value lists of all partitions
+ For list-partitioned tables, the value lists of all partitions
  partition_name1,
  partition_name2 [, ...] are
  combined and form the list of values of partition
@@ -1193,7 +1193,7 @@ WITH ( MODULUS numeric_literal, REM
 

   
-  If DEFAULT partition is in the list of partitions partition_name1,
+  If a DEFAULT partition is in the list of partitions partition_name1,
   partition_name2 [, ...]:
   

@@ -1204,8 +1204,8 @@ WITH ( MODULUS numeric_literal, REM


 
- For range- and list-partitioned tables the ranges and lists of values
- of the merged partitions can be any.
+ For range- and list-partitioned tables, the ranges and lists of values
+ of the merged partitions can be anything.
 

   
-- 
2.42.0



Re: Cannot find a working 64-bit integer type on Illumos

2024-04-18 Thread Thomas Munro
On Thu, Apr 18, 2024 at 8:47 PM Peter Eisentraut  wrote:
> Maybe this means something like our int64 is long long int but the
> system's int64_t is long int underneath, but I don't see how that would
> matter for the limit macros.

Agreed, so I don't think it's long vs long long (when they have the same width).

I wonder if this comment is a clue:

static char *
inet_net_ntop_ipv6(const u_char *src, int bits, char *dst, size_t size)
{
/*
 * Note that int32_t and int16_t need only be "at least" large enough to
 * contain a value of the specified size.  On some systems, like Crays,
 * there is no such thing as an integer variable with 16 bits. Keep this
 * in mind if you think this function should have been coded to use
 * pointer overlays.  All the world's not a VAX.
 */

I'd seen that claim before somewhere else but I can't recall where.
So there were systems using those names in an ad hoc unspecified way
before C99 nailed this stuff down?  In modern C, int32_t is definitely
an exact width type (but there are other standardised variants like
int_fast32_t to allow for Cray-like systems that would prefer to use a
wider type, ie "at least", 32 bits wide, so I guess that's what
happened to that idea?).

Or perhaps it's referring to worries about the width of char, short,
int or the assumption of two's-complement.  I think if any of that
stuff weren't as assumed we'd have many problems in many places, so
I'm not seeing a problem.  (FTR C23 finally nailed down
two's-complement as a requirement, and although C might not say so,
POSIX says that char is a byte, and our assumption that int = int32_t
is pretty deeply baked into PostgreSQL, so it's almost impossible to
imagine that short has a size other than 16 bits; but these are all
assumptions made by the OLD coding, not by the patch I posted).  In
short, I guess that isn't what was meant.




Re: Popcount optimization using AVX512

2024-04-18 Thread Nathan Bossart
On Thu, Apr 18, 2024 at 08:24:03PM +, Devulapalli, Raghuveer wrote:
>> This seems to contradict the note about doing step 3 at any point, and
>> given step 1 is the OSXSAVE check, I'm not following what this means,
>> anyway.
> 
> It is recommended that you run the xgetbv code before you check for cpu
> features avx512-popcnt and avx512-bw. The way it is written now is the
> opposite order. I would also recommend splitting the cpuid feature check
> for avx512popcnt/avx512bw and xgetbv section into separate functions to
> make them modular. Something like:
> 
> static inline
> int check_os_avx512_support(void)
> {
> // (1) run cpuid leaf 1 to check for xgetbv instruction support:
> unsigned int exx[4] = {0, 0, 0, 0};
> __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
> if ((exx[2] & (1 << 27)) == 0)  /* xsave */
> return false;
> 
> /* Does XGETBV say the ZMM/YMM/XMM registers are enabled? */
> return (_xgetbv(0) & 0xe0) == 0xe0;
> }
> 
>> I'm also wondering if we need to check that (_xgetbv(0) & 0xe6) == 0xe6
>> instead of just (_xgetbv(0) & 0xe0) != 0, as the status of the lower
>> half of some of the ZMM registers is stored in the SSE and AVX state
>> [0].  I don't know how likely it is that 0xe0 would succeed but 0xe6
>> wouldn't, but we might as well make it correct.
> 
> This is correct. It needs to check all the 3 bits (XMM/YMM and ZMM). The
> way it is written is now is in-correct. 

Thanks for the feedback.  I've attached an updated patch.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From d20b19804a17d9f6eab1d40de7e9fb10488ac6b0 Mon Sep 17 00:00:00 2001
From: Nathan Bossart 
Date: Thu, 18 Apr 2024 15:57:56 -0500
Subject: [PATCH v2 1/1] osxsave

---
 src/port/pg_popcount_avx512_choose.c | 89 +++-
 1 file changed, 60 insertions(+), 29 deletions(-)

diff --git a/src/port/pg_popcount_avx512_choose.c b/src/port/pg_popcount_avx512_choose.c
index ae3fa3d306..009f94909a 100644
--- a/src/port/pg_popcount_avx512_choose.c
+++ b/src/port/pg_popcount_avx512_choose.c
@@ -34,27 +34,47 @@
 #ifdef TRY_POPCNT_FAST
 
 /*
- * Returns true if the CPU supports the instructions required for the AVX-512
- * pg_popcount() implementation.
+ * Does CPUID say there's support for XSAVE instructions?
  */
-bool
-pg_popcount_avx512_available(void)
+static inline bool
+xsave_available(void)
 {
 	unsigned int exx[4] = {0, 0, 0, 0};
 
-	/* Does CPUID say there's support for AVX-512 popcount instructions? */
-#if defined(HAVE__GET_CPUID_COUNT)
-	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
-	__cpuidex(exx, 7, 0);
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 1);
 #else
 #error cpuid instruction not available
 #endif
-	if ((exx[2] & (1 << 14)) == 0)	/* avx512-vpopcntdq */
-		return false;
+	return (exx[2] & (1 << 27)) != 0;	/* osxsave */
+}
+
+/*
+ * Does XGETBV say the ZMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+static inline bool
+zmm_regs_available(void)
+{
+#ifdef HAVE_XSAVE_INTRINSICS
+	return (_xgetbv(0) & 0xe6) != 0xe6;
+#else
+	return false;
+#endif
+}
+
+/*
+ * Does CPUID say there's support for AVX-512 popcount instructions?
+ */
+static inline bool
+avx512_popcnt_available(void)
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
 
-	/* Does CPUID say there's support for AVX-512 byte and word instructions? */
-	memset(exx, 0, sizeof(exx));
 #if defined(HAVE__GET_CPUID_COUNT)
 	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
 #elif defined(HAVE__CPUIDEX)
@@ -62,27 +82,38 @@ pg_popcount_avx512_available(void)
 #else
 #error cpuid instruction not available
 #endif
-	if ((exx[1] & (1 << 30)) == 0)	/* avx512-bw */
-		return false;
+	return (exx[2] & (1 << 14)) != 0;	/* avx512-vpopcntdq */
+}
 
-	/* Does CPUID say there's support for XSAVE instructions? */
-	memset(exx, 0, sizeof(exx));
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 1);
+/*
+ * Does CPUID say there's support for AVX-512 byte and word instructions?
+ */
+static inline bool
+avx512_bw_available(void)
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID_COUNT)
+	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+	__cpuidex(exx, 7, 0);
 #else
 #error cpuid instruction not available
 #endif
-	if ((exx[2] & (1 << 26)) == 0)	/* xsave */
-		return false;
+	return (exx[1] & (1 << 30)) != 0;	/* avx512-bw */
+}
 
-	/* Does XGETBV say the ZMM registers are enabled? */
-#ifdef HAVE_XSAVE_INTRINSICS
-	return (_xgetbv(0) & 0xe0) != 0;
-#else
-	return false;
-#endif
+/*
+ * Returns true if the CPU supports the instructions required for the AVX-512
+ * pg_popcount() implementation.
+ */
+bool
+pg_popcount_avx5

Re: improve performance of pg_dump --binary-upgrade

2024-04-18 Thread Nathan Bossart
On Thu, Apr 18, 2024 at 10:33:08PM +0200, Daniel Gustafsson wrote:
> From a read-through they look good, a very nice performance improvement in an
> important path.  I think it would be nice with some comments on the
> BinaryUpgradeClassOids struct (since the code using it is thousands of lines
> away), and a comment on the if (oids == NULL) block explaining the caching.

Added.  Thanks for reviewing!  Unfortunately, this one will have to sit for
a couple months...

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From 6797e4e1fc1a63f710ed0a409b1337880eb91ad4 Mon Sep 17 00:00:00 2001
From: Nathan Bossart 
Date: Thu, 18 Apr 2024 15:15:19 -0500
Subject: [PATCH v3 1/2] Remove is_index parameter from
 binary_upgrade_set_pg_class_oids().

---
 src/bin/pg_dump/pg_dump.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 6d2f3fdef3..58c77a5e2b 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -324,7 +324,7 @@ static void binary_upgrade_set_type_oids_by_rel(Archive *fout,
 const TableInfo *tbinfo);
 static void binary_upgrade_set_pg_class_oids(Archive *fout,
 			 PQExpBuffer upgrade_buffer,
-			 Oid pg_class_oid, bool is_index);
+			 Oid pg_class_oid);
 static void binary_upgrade_extension_member(PQExpBuffer upgrade_buffer,
 			const DumpableObject *dobj,
 			const char *objtype,
@@ -5394,8 +5394,7 @@ binary_upgrade_set_type_oids_by_rel(Archive *fout,
 
 static void
 binary_upgrade_set_pg_class_oids(Archive *fout,
- PQExpBuffer upgrade_buffer, Oid pg_class_oid,
- bool is_index)
+ PQExpBuffer upgrade_buffer, Oid pg_class_oid)
 {
 	PQExpBuffer upgrade_query = createPQExpBuffer();
 	PGresult   *upgrade_res;
@@ -5444,7 +5443,8 @@ binary_upgrade_set_pg_class_oids(Archive *fout,
 	appendPQExpBufferStr(upgrade_buffer,
 		 "\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n");
 
-	if (!is_index)
+	if (relkind != RELKIND_INDEX &&
+		relkind != RELKIND_PARTITIONED_INDEX)
 	{
 		appendPQExpBuffer(upgrade_buffer,
 		  "SELECT pg_catalog.binary_upgrade_set_next_heap_pg_class_oid('%u'::pg_catalog.oid);\n",
@@ -11864,7 +11864,7 @@ dumpCompositeType(Archive *fout, const TypeInfo *tyinfo)
 		binary_upgrade_set_type_oids_by_type_oid(fout, q,
  tyinfo->dobj.catId.oid,
  false, false);
-		binary_upgrade_set_pg_class_oids(fout, q, tyinfo->typrelid, false);
+		binary_upgrade_set_pg_class_oids(fout, q, tyinfo->typrelid);
 	}
 
 	qtypname = pg_strdup(fmtId(tyinfo->dobj.name));
@@ -15998,7 +15998,7 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo)
 
 		if (dopt->binary_upgrade)
 			binary_upgrade_set_pg_class_oids(fout, q,
-			 tbinfo->dobj.catId.oid, false);
+			 tbinfo->dobj.catId.oid);
 
 		appendPQExpBuffer(q, "CREATE VIEW %s", qualrelname);
 
@@ -16100,7 +16100,7 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo)
 
 		if (dopt->binary_upgrade)
 			binary_upgrade_set_pg_class_oids(fout, q,
-			 tbinfo->dobj.catId.oid, false);
+			 tbinfo->dobj.catId.oid);
 
 		appendPQExpBuffer(q, "CREATE %s%s %s",
 		  tbinfo->relpersistence == RELPERSISTENCE_UNLOGGED ?
@@ -16990,7 +16990,7 @@ dumpIndex(Archive *fout, const IndxInfo *indxinfo)
 
 		if (dopt->binary_upgrade)
 			binary_upgrade_set_pg_class_oids(fout, q,
-			 indxinfo->dobj.catId.oid, true);
+			 indxinfo->dobj.catId.oid);
 
 		/* Plain secondary index */
 		appendPQExpBuffer(q, "%s;\n", indxinfo->indexdef);
@@ -17244,7 +17244,7 @@ dumpConstraint(Archive *fout, const ConstraintInfo *coninfo)
 
 		if (dopt->binary_upgrade)
 			binary_upgrade_set_pg_class_oids(fout, q,
-			 indxinfo->dobj.catId.oid, true);
+			 indxinfo->dobj.catId.oid);
 
 		appendPQExpBuffer(q, "ALTER %sTABLE ONLY %s\n", foreign,
 		  fmtQualifiedDumpable(tbinfo));
@@ -17653,7 +17653,7 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo)
 	if (dopt->binary_upgrade)
 	{
 		binary_upgrade_set_pg_class_oids(fout, query,
-		 tbinfo->dobj.catId.oid, false);
+		 tbinfo->dobj.catId.oid);
 
 		/*
 		 * In older PG versions a sequence will have a pg_type entry, but v14
-- 
2.25.1

>From b3ae9df69fdbf386d09250f1c225ecd8665141a7 Mon Sep 17 00:00:00 2001
From: Nathan Bossart 
Date: Wed, 17 Apr 2024 22:55:27 -0500
Subject: [PATCH v3 2/2] Improve performance of pg_dump --binary-upgrade.

---
 src/bin/pg_dump/pg_dump.c| 126 ---
 src/tools/pgindent/typedefs.list |   1 +
 2 files changed, 81 insertions(+), 46 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 58c77a5e2b..b1f06a199b 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -55,6 +55,7 @@
 #include "catalog/pg_trigger_d.h"
 #include "catalog/pg_type_d.h"
 #include "common/connect.h"
+#include "common/int.h"
 #includ

Re: Cannot find a working 64-bit integer type on Illumos

2024-04-18 Thread Thomas Munro
On Thu, Apr 18, 2024 at 6:09 PM Japin Li  wrote:
> /home/japin/postgres/build/../src/common/config_info.c:198:11: error: 
> comparison of integer expressions of different signedness: 'int' and 'size_t' 
> {aka 'long unsigned int'} [-Werror=sign-compare]
>   198 |  Assert(i == *configdata_len);

Right, PostgreSQL doesn't compile cleanly with the "sign-compare"
warning.  There have been a few threads about that, and someone might
want to think harder about it, but it's a different topic unrelated to
.

> /home/japin/postgres/build/../src/include/lib/simplehash.h:1138:9: error: 
> format '%llu' expects argument of type 'long long unsigned int', but argument 
> 4 has type 'uint64' {aka 'long unsigned int'} [-Werror=format=]

It seems my v1 patch's configure probe for INT64_FORMAT was broken.
In the v2 patch I tried not doing that probe at all, and instead
inviting  into our world (that's the standardised way to
produce format strings, which has the slight complication that we are
intercepting printf calls...).  I suspect that'll work better for you.




Re: Idea Feedback: psql \h misses -> Offers Links?

2024-04-18 Thread Kirk Wolak
On Thu, Apr 18, 2024 at 2:37 PM Peter Eisentraut 
wrote:

> On 17.04.24 19:47, Kirk Wolak wrote:
> > *Example:*
> > \h current_setting
> > No help available for "current_setting".
> > Try \h with no arguments to see available help.
> >
> > https://www.postgresql.org/search/?u=%2Fdocs%2F16%2F&q=current_setting
> > 
>
> One problem is that this search URL does not actually produce any useful
> information about current_setting.
>
> I see what you mean, but doesn't that imply our web search feature is
weak?  That's the full name of an existing function, and it's in the index.
But it cannot be found if searched from the website?


RE: Popcount optimization using AVX512

2024-04-18 Thread Devulapalli, Raghuveer
> Thanks for the feedback.  I've attached an updated patch.

(1) Shouldn't it be: return (_xgetbv(0) & 0xe6) == 0xe6; ? Otherwise 
zmm_regs_available() will return false. 
(2) Nitpick: avx512_popcnt_available and avx512_bw_available() run the same 
cpuid leaf. You could combine them into one to avoid running cpuid twice. My 
apologies, I should have mentioned this before. 




Re: documentation structure

2024-04-18 Thread Dagfinn Ilmari Mannsåker
Corey Huinker  writes:

>>
>> I havent dealt with variadic yet, since the two styles are visually
>> different, not just markup (... renders as [...]).
>>
>> The two styles for variadic are the what I call caller-style:
>>
>>concat ( val1 "any" [, val2 "any" [, ...] ] )
>>format(formatstr text [, formatarg "any" [, ...] ])
>>
>
> While this style is obviously clumsier for us to compose, it does avoid
> relying on the user understanding what the word variadic means. Searching
> through online documentation of the python *args parameter, the word
> variadic never comes up, the closest they get is "variable length
> argument". I realize that python is not SQL, but I think it's a good point
> of reference for what concepts the average reader is likely to know.

Yeah, we can't expect everyone wanting to call a built-in function to
know how they would define an equivalent one themselves. In that case I
propos marking it up like this:

format (
formatstr text
, formatarg "any"
, ...  )
text


> Looking at the patch, I think it is good, though I'd consider doing some
> indentation for the nested s to allow the author to do more
> visual tag-matching. The ']'s were sufficiently visually distinct that we
> didn't really need or want nesting, but  is just another tag to
> my eyes in a sea of tags.

The requisite nesting when there are multiple optional parameters makes
it annoying to wrap and indent it "properly" per XML convention, but how
about something like this, with each parameter on a line of its own, and
all the closing  tags on one line?

regexp_substr (
string text,
pattern text
, start integer
, N integer
, flags text
, subexpr integer
)
text

A lot of functions mostly follow this style, except they tend to put the
first parameter on the same line of the function namee, even when that
makes the line overly long. I propose going the other way, with each
parameter on a line of its own, even if the first one would fit after
the function name, except the whole parameter list fits after the
function name.

Also, when there's only one optional argument, or they're independently
optional, not nested, the  tag should go on the same line as
the parameter.

substring (
bits bit
 FROM start 
integer 
 FOR count 
integer  )
bit


I'm not quite sure what to with things like json_object which have even
more complex nexting of optional parameters, but I do think the current
200+ character lines are too long.

- ilmari




Re: Add new protocol message to change GUCs for usage with future protocol-only GUCs

2024-04-18 Thread Jelte Fennema-Nio
On Thu, 18 Apr 2024 at 22:17, Robert Haas  wrote:
> If we go with Peter's approach, every driver that supports his feature
> will work perfectly, and every driver that doesn't will work exactly
> as it does today. The risk of breaking anything is as near to zero as
> human developers can reasonably hope to achieve. Nobody who doesn't
> care about the feature will have to lift a single finger, today,
> tomorrow, or ever. That's absolutely brilliant.
>
> If we instead go with your approach, then anyone who wants to support
> 3.2 when it materializes will have to also support 3.1, which means
> they have to support this feature.

To clarify: My proposed approach is to use a protocol extension
parameter for to enable the new messages that the server can send
(like Peter is doing now). And **in addition to that** gate the new
Bind format type behind a feature switch. There is literally nothing
clients will have to do to "support" that feature (except for
requesting a higher version protocol). Your suggestion of not bumping
the version but still allowing the new format type on version 3.0
doesn't have any advantage afaict, except secretly hiding from any
pooler in the middle that such a format type might be sent.

> Also, even just 3.1 is going to break
> something for somebody. There's just no way that we've left the
> protocol version unchanged for this long and the first change we make
> doesn't cause some collateral damage.

Sure, but the exact same argument holds for protocol extension
parameters. We've never set them, so they are bound to break something
the first time. My whole point is that once we bite that bullet, the
next protocol parameters and protocol version bumps won't cause such
breakage.

> Sure, those are minor downsides in the grand scheme of things. But
> AFAICS the only downside of Peter's approach that you've alleged is
> that doesn't involve bumping the version number. Of course, if bumping
> the version number is an intrinsic good, then no further justification
> is required, but I don't buy that. I do not believe that users or
> maintainers will throw us a pizza party when they find out that we've
> changed the version number. There's no reason for anyone to be happy
> about that for its own sake.

As a connection pooler maintainer I would definitely love it if every
protocol change required either a protocol version parameter or a
protocol version bump. That way I can easily check every release if
the protocol changed by looking at two things, instead of diffing the
protocol docs for some tiny "supposedly irrelevant" change was made.




Re: Idea Feedback: psql \h misses -> Offers Links?

2024-04-18 Thread Dagfinn Ilmari Mannsåker
Kirk Wolak  writes:

> On Thu, Apr 18, 2024 at 2:37 PM Peter Eisentraut 
> wrote:
>
>> On 17.04.24 19:47, Kirk Wolak wrote:
>> > *Example:*
>> > \h current_setting
>> > No help available for "current_setting".
>> > Try \h with no arguments to see available help.
>> >
>> > https://www.postgresql.org/search/?u=%2Fdocs%2F16%2F&q=current_setting
>> > 
>>
>> One problem is that this search URL does not actually produce any useful
>> information about current_setting.
>
> I see what you mean, but doesn't that imply our web search feature is
> weak?  That's the full name of an existing function, and it's in the index.
> But it cannot be found if searched from the website?

While I do think we could do a better job of providing links directly to
the documentation of functions and config parameters, I wouldn't say
that the search result is _completely_ useless in this case.  The first
hit is https://www.postgresql.org/docs/16/functions-admin.html, which is
where current_setting() is documented (it's even the first function on
that page, but that's just luck in this case).

- ilmari




Re: Add new protocol message to change GUCs for usage with future protocol-only GUCs

2024-04-18 Thread Jelte Fennema-Nio
On Thu, 18 Apr 2024 at 23:36, Jelte Fennema-Nio  wrote:
> To clarify: My proposed approach is to use a protocol extension
> parameter for to enable the new messages that the server can send
> (like Peter is doing now). And **in addition to that** gate the new
> Bind format type behind a feature switch.

ugh, correction: gate the new Bind format type behind a **protocol bump**




Re: Popcount optimization using AVX512

2024-04-18 Thread Nathan Bossart
On Thu, Apr 18, 2024 at 09:29:55PM +, Devulapalli, Raghuveer wrote:
> (1) Shouldn't it be: return (_xgetbv(0) & 0xe6) == 0xe6; ? Otherwise
> zmm_regs_available() will return false..

Yes, that's a mistake.  I fixed that in v3.

> (2) Nitpick: avx512_popcnt_available and avx512_bw_available() run the
> same cpuid leaf. You could combine them into one to avoid running cpuid
> twice. My apologies, I should have mentioned this before..

Good call.  The byte-and-word instructions were a late addition to the
patch, so I missed this originally.

On that note, is it necessary to also check for avx512f?  At the moment, we
are assuming that's supported if the other AVX-512 instructions are
available.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From e04c348eb389c6aa1597ac35d57b5e7ae7075381 Mon Sep 17 00:00:00 2001
From: Nathan Bossart 
Date: Thu, 18 Apr 2024 15:57:56 -0500
Subject: [PATCH v3 1/1] osxsave

---
 src/port/pg_popcount_avx512_choose.c | 80 
 1 file changed, 47 insertions(+), 33 deletions(-)

diff --git a/src/port/pg_popcount_avx512_choose.c b/src/port/pg_popcount_avx512_choose.c
index ae3fa3d306..b37107803a 100644
--- a/src/port/pg_popcount_avx512_choose.c
+++ b/src/port/pg_popcount_avx512_choose.c
@@ -34,39 +34,13 @@
 #ifdef TRY_POPCNT_FAST
 
 /*
- * Returns true if the CPU supports the instructions required for the AVX-512
- * pg_popcount() implementation.
+ * Does CPUID say there's support for XSAVE instructions?
  */
-bool
-pg_popcount_avx512_available(void)
+static inline bool
+xsave_available(void)
 {
 	unsigned int exx[4] = {0, 0, 0, 0};
 
-	/* Does CPUID say there's support for AVX-512 popcount instructions? */
-#if defined(HAVE__GET_CPUID_COUNT)
-	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
-	__cpuidex(exx, 7, 0);
-#else
-#error cpuid instruction not available
-#endif
-	if ((exx[2] & (1 << 14)) == 0)	/* avx512-vpopcntdq */
-		return false;
-
-	/* Does CPUID say there's support for AVX-512 byte and word instructions? */
-	memset(exx, 0, sizeof(exx));
-#if defined(HAVE__GET_CPUID_COUNT)
-	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
-	__cpuidex(exx, 7, 0);
-#else
-#error cpuid instruction not available
-#endif
-	if ((exx[1] & (1 << 30)) == 0)	/* avx512-bw */
-		return false;
-
-	/* Does CPUID say there's support for XSAVE instructions? */
-	memset(exx, 0, sizeof(exx));
 #if defined(HAVE__GET_CPUID)
 	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
 #elif defined(HAVE__CPUID)
@@ -74,15 +48,55 @@ pg_popcount_avx512_available(void)
 #else
 #error cpuid instruction not available
 #endif
-	if ((exx[2] & (1 << 26)) == 0)	/* xsave */
-		return false;
+	return (exx[2] & (1 << 27)) != 0;	/* osxsave */
+}
 
-	/* Does XGETBV say the ZMM registers are enabled? */
+/*
+ * Does XGETBV say the ZMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+static inline bool
+zmm_regs_available(void)
+{
 #ifdef HAVE_XSAVE_INTRINSICS
-	return (_xgetbv(0) & 0xe0) != 0;
+	return (_xgetbv(0) & 0xe6) == 0xe6;
 #else
 	return false;
 #endif
 }
 
+/*
+ * Does CPUID say there's support for AVX-512 popcount and byte-and-word
+ * instructions?
+ */
+static inline bool
+avx512_popcnt_available(void)
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID_COUNT)
+	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+	__cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+	return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
+		(exx[1] & (1 << 30)) != 0;	/* avx512-bw */
+}
+
+/*
+ * Returns true if the CPU supports the instructions required for the AVX-512
+ * pg_popcount() implementation.
+ */
+bool
+pg_popcount_avx512_available(void)
+{
+	return xsave_available() &&
+		zmm_regs_available() &&
+		avx512_popcnt_available();
+}
+
 #endif			/* TRY_POPCNT_FAST */
-- 
2.25.1



RE: Popcount optimization using AVX512

2024-04-18 Thread Devulapalli, Raghuveer
> On that note, is it necessary to also check for avx512f?  At the moment, we 
> are assuming that's supported if the other AVX-512 instructions are available.

No, it's not needed. There are no CPU's with avx512bw/avx512popcnt without 
avx512f.  Unfortunately though, avx512popcnt does not mean avx512bw (I think 
the deprecated Xeon Phi processors falls in this category) which is why we need 
both. 




  1   2   >