One of the things I notice which comes up in a profile of a pgbench -S
test is the generation of the command completion tag.

At the moment, this is done by a snprintf() call, namely:

snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
tag == CMDTAG_INSERT ?
"%s 0 " UINT64_FORMAT : "%s " UINT64_FORMAT,
tagname, qc->nprocessed);

Ever since aa2387e2f, and the relevant discussion in [1], it's been
clear that the sprintf() functions are not the fastest. I think
there's just some unavoidable overhead to __VA_ARGS__.

The generation of the completion tag is not hugely dominant in the
profiles, but it does appear:

   0.36%  postgres          [.] dopr.constprop.0

In the attached, there are a few things done to make the generation of
completion tags faster:

Namely:
1. Store the tag length in struct CommandTagBehavior so that we can
memcpy() a fixed length rather than having to copy byte-by-byte
looking for the \0.
2. Use pg_ulltoa_n to write the number of rows affected by the command tag.
3. Have the function that builds the tag return its length so save
from having to do a strlen before writing the tag in pq_putmessage().

It's difficult to measure the performance of something that takes
0.36% of execution. I have previously seen the tag generation take
over 1% of execution time.

One thing that's changed in the patch vs master is that if the
snprintf's buffer, for some reason had not been long enough to store
the entire completion tag, it would have truncated it and perhaps sent
a truncated version of the row count to the client.  For this to
happen, we'd have to have some excessively long command name.  Since
these cannot be added by users, I've opted to just add an Assert that
will trigger if we're ever asked to write a command tag that won't
fit.  Assuming we test our new excessively-long-named-command, we'll
trigger that Assert and realise long before it's a problem.

Both Andres and I seemed to have independently written almost exactly
the same patch for this. The attached is mine but with the
GetCommandTagNameAndLen function from his. I'd written
GetCommandTagLen() for mine, which required a couple of function calls
instead of Andres' 1 function call to get the name and length in one
go.

Does anyone object to this?

David

[1] 
https://www.postgresql.org/message-id/cakjs1f8oew8zekqd4x3e+tfwzt+mwv6o-tf8mbpdo4xnnar...@mail.gmail.com
From 5900b1662aea3d82c4aad1f8a94d9f9242067e9c Mon Sep 17 00:00:00 2001
From: David Rowley <dgrow...@gmail.com>
Date: Sat, 10 Dec 2022 20:27:01 +1300
Subject: [PATCH v1] Speed up creation of command completion tags

Author: David Rowley, Andres Freund
---
 src/backend/tcop/cmdtag.c | 10 +++++-
 src/backend/tcop/dest.c   | 72 +++++++++++++++++++++++++--------------
 src/include/tcop/cmdtag.h |  1 +
 3 files changed, 57 insertions(+), 26 deletions(-)

diff --git a/src/backend/tcop/cmdtag.c b/src/backend/tcop/cmdtag.c
index 262484f561..36f7c37ec4 100644
--- a/src/backend/tcop/cmdtag.c
+++ b/src/backend/tcop/cmdtag.c
@@ -20,13 +20,14 @@
 typedef struct CommandTagBehavior
 {
        const char *name;
+       const uint8 namelen;
        const bool      event_trigger_ok;
        const bool      table_rewrite_ok;
        const bool      display_rowcount;
 } CommandTagBehavior;
 
 #define PG_CMDTAG(tag, name, evtrgok, rwrok, rowcnt) \
-       { name, evtrgok, rwrok, rowcnt },
+       { name, (uint8) (sizeof(name) - 1), evtrgok, rwrok, rowcnt },
 
 static const CommandTagBehavior tag_behavior[COMMAND_TAG_NEXTTAG] = {
 #include "tcop/cmdtaglist.h"
@@ -47,6 +48,13 @@ GetCommandTagName(CommandTag commandTag)
        return tag_behavior[commandTag].name;
 }
 
+const char *
+GetCommandTagNameAndLen(CommandTag commandTag, Size *len)
+{
+       *len = (Size) tag_behavior[commandTag].namelen;
+       return tag_behavior[commandTag].name;
+}
+
 bool
 command_tag_display_rowcount(CommandTag commandTag)
 {
diff --git a/src/backend/tcop/dest.c b/src/backend/tcop/dest.c
index 6afaa153ca..e80a501dce 100644
--- a/src/backend/tcop/dest.c
+++ b/src/backend/tcop/dest.c
@@ -39,6 +39,7 @@
 #include "executor/tstoreReceiver.h"
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
+#include "utils/builtins.h"
 #include "utils/portal.h"
 
 
@@ -158,6 +159,49 @@ CreateDestReceiver(CommandDest dest)
        pg_unreachable();
 }
 
+static inline Size
+make_completion_tag(char *buff, const QueryCompletion *qc,
+                                       bool force_undecorated_output)
+{
+       CommandTag      tag = qc->commandTag;
+       Size            taglen;
+       const char *tagname = GetCommandTagNameAndLen(tag, &taglen);
+       char       *bufp;
+
+       /*
+        * We assume the tagname is plain ASCII and therefore requires no 
encoding
+        * conversion.
+        */
+       memcpy(buff, tagname, taglen + 1);
+       bufp = buff + taglen;
+
+       /* ensure that the tagname isn't long enough to overrun the buffer */
+       Assert(taglen <= COMPLETION_TAG_BUFSIZE - MAXINT8LEN - 4);
+
+       /*
+        * In PostgreSQL versions 11 and earlier, it was possible to create a
+        * table WITH OIDS.  When inserting into such a table, INSERT used to
+        * include the Oid of the inserted record in the completion tag.  To
+        * maintain compatibility in the wire protocol, we now write a "0" (for
+        * InvalidOid) in the location where we once wrote the new record's Oid.
+        */
+       if (command_tag_display_rowcount(tag) && !force_undecorated_output)
+       {
+               if (tag == CMDTAG_INSERT)
+               {
+                       *bufp++ = ' ';
+                       *bufp++ = '0';
+               }
+               *bufp++ = ' ';
+               bufp += pg_ulltoa_n(qc->nprocessed, bufp);
+               *bufp = '\0';
+       }
+
+       Assert((bufp - buff) == strlen(buff));
+
+       return bufp - buff;
+}
+
 /* ----------------
  *             EndCommand - clean up the destination at end of command
  * ----------------
@@ -166,8 +210,7 @@ void
 EndCommand(const QueryCompletion *qc, CommandDest dest, bool 
force_undecorated_output)
 {
        char            completionTag[COMPLETION_TAG_BUFSIZE];
-       CommandTag      tag;
-       const char *tagname;
+       Size            len;
 
        switch (dest)
        {
@@ -175,29 +218,8 @@ EndCommand(const QueryCompletion *qc, CommandDest dest, 
bool force_undecorated_o
                case DestRemoteExecute:
                case DestRemoteSimple:
 
-                       /*
-                        * We assume the tagname is plain ASCII and therefore 
requires no
-                        * encoding conversion.
-                        */
-                       tag = qc->commandTag;
-                       tagname = GetCommandTagName(tag);
-
-                       /*
-                        * In PostgreSQL versions 11 and earlier, it was 
possible to
-                        * create a table WITH OIDS.  When inserting into such 
a table,
-                        * INSERT used to include the Oid of the inserted 
record in the
-                        * completion tag.  To maintain compatibility in the 
wire
-                        * protocol, we now write a "0" (for InvalidOid) in the 
location
-                        * where we once wrote the new record's Oid.
-                        */
-                       if (command_tag_display_rowcount(tag) && 
!force_undecorated_output)
-                               snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
-                                                tag == CMDTAG_INSERT ?
-                                                "%s 0 " UINT64_FORMAT : "%s " 
UINT64_FORMAT,
-                                                tagname, qc->nprocessed);
-                       else
-                               snprintf(completionTag, COMPLETION_TAG_BUFSIZE, 
"%s", tagname);
-                       pq_putmessage('C', completionTag, strlen(completionTag) 
+ 1);
+                       len = make_completion_tag(completionTag, qc, 
force_undecorated_output);
+                       pq_putmessage('C', completionTag, len + 1);
 
                case DestNone:
                case DestDebug:
diff --git a/src/include/tcop/cmdtag.h b/src/include/tcop/cmdtag.h
index 60e3179c74..ca1008db25 100644
--- a/src/include/tcop/cmdtag.h
+++ b/src/include/tcop/cmdtag.h
@@ -50,6 +50,7 @@ CopyQueryCompletion(QueryCompletion *dst, const 
QueryCompletion *src)
 
 extern void InitializeQueryCompletion(QueryCompletion *qc);
 extern const char *GetCommandTagName(CommandTag commandTag);
+extern const char *GetCommandTagNameAndLen(CommandTag commandTag, Size *len);
 extern bool command_tag_display_rowcount(CommandTag commandTag);
 extern bool command_tag_event_trigger_ok(CommandTag commandTag);
 extern bool command_tag_table_rewrite_ok(CommandTag commandTag);
-- 
2.35.1.windows.2

Reply via email to