One of the things I notice which comes up in a profile of a pgbench -S test is the generation of the command completion tag.
At the moment, this is done by a snprintf() call, namely: snprintf(completionTag, COMPLETION_TAG_BUFSIZE, tag == CMDTAG_INSERT ? "%s 0 " UINT64_FORMAT : "%s " UINT64_FORMAT, tagname, qc->nprocessed); Ever since aa2387e2f, and the relevant discussion in [1], it's been clear that the sprintf() functions are not the fastest. I think there's just some unavoidable overhead to __VA_ARGS__. The generation of the completion tag is not hugely dominant in the profiles, but it does appear: 0.36% postgres [.] dopr.constprop.0 In the attached, there are a few things done to make the generation of completion tags faster: Namely: 1. Store the tag length in struct CommandTagBehavior so that we can memcpy() a fixed length rather than having to copy byte-by-byte looking for the \0. 2. Use pg_ulltoa_n to write the number of rows affected by the command tag. 3. Have the function that builds the tag return its length so save from having to do a strlen before writing the tag in pq_putmessage(). It's difficult to measure the performance of something that takes 0.36% of execution. I have previously seen the tag generation take over 1% of execution time. One thing that's changed in the patch vs master is that if the snprintf's buffer, for some reason had not been long enough to store the entire completion tag, it would have truncated it and perhaps sent a truncated version of the row count to the client. For this to happen, we'd have to have some excessively long command name. Since these cannot be added by users, I've opted to just add an Assert that will trigger if we're ever asked to write a command tag that won't fit. Assuming we test our new excessively-long-named-command, we'll trigger that Assert and realise long before it's a problem. Both Andres and I seemed to have independently written almost exactly the same patch for this. The attached is mine but with the GetCommandTagNameAndLen function from his. I'd written GetCommandTagLen() for mine, which required a couple of function calls instead of Andres' 1 function call to get the name and length in one go. Does anyone object to this? David [1] https://www.postgresql.org/message-id/cakjs1f8oew8zekqd4x3e+tfwzt+mwv6o-tf8mbpdo4xnnar...@mail.gmail.com
From 5900b1662aea3d82c4aad1f8a94d9f9242067e9c Mon Sep 17 00:00:00 2001 From: David Rowley <dgrow...@gmail.com> Date: Sat, 10 Dec 2022 20:27:01 +1300 Subject: [PATCH v1] Speed up creation of command completion tags Author: David Rowley, Andres Freund --- src/backend/tcop/cmdtag.c | 10 +++++- src/backend/tcop/dest.c | 72 +++++++++++++++++++++++++-------------- src/include/tcop/cmdtag.h | 1 + 3 files changed, 57 insertions(+), 26 deletions(-) diff --git a/src/backend/tcop/cmdtag.c b/src/backend/tcop/cmdtag.c index 262484f561..36f7c37ec4 100644 --- a/src/backend/tcop/cmdtag.c +++ b/src/backend/tcop/cmdtag.c @@ -20,13 +20,14 @@ typedef struct CommandTagBehavior { const char *name; + const uint8 namelen; const bool event_trigger_ok; const bool table_rewrite_ok; const bool display_rowcount; } CommandTagBehavior; #define PG_CMDTAG(tag, name, evtrgok, rwrok, rowcnt) \ - { name, evtrgok, rwrok, rowcnt }, + { name, (uint8) (sizeof(name) - 1), evtrgok, rwrok, rowcnt }, static const CommandTagBehavior tag_behavior[COMMAND_TAG_NEXTTAG] = { #include "tcop/cmdtaglist.h" @@ -47,6 +48,13 @@ GetCommandTagName(CommandTag commandTag) return tag_behavior[commandTag].name; } +const char * +GetCommandTagNameAndLen(CommandTag commandTag, Size *len) +{ + *len = (Size) tag_behavior[commandTag].namelen; + return tag_behavior[commandTag].name; +} + bool command_tag_display_rowcount(CommandTag commandTag) { diff --git a/src/backend/tcop/dest.c b/src/backend/tcop/dest.c index 6afaa153ca..e80a501dce 100644 --- a/src/backend/tcop/dest.c +++ b/src/backend/tcop/dest.c @@ -39,6 +39,7 @@ #include "executor/tstoreReceiver.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" +#include "utils/builtins.h" #include "utils/portal.h" @@ -158,6 +159,49 @@ CreateDestReceiver(CommandDest dest) pg_unreachable(); } +static inline Size +make_completion_tag(char *buff, const QueryCompletion *qc, + bool force_undecorated_output) +{ + CommandTag tag = qc->commandTag; + Size taglen; + const char *tagname = GetCommandTagNameAndLen(tag, &taglen); + char *bufp; + + /* + * We assume the tagname is plain ASCII and therefore requires no encoding + * conversion. + */ + memcpy(buff, tagname, taglen + 1); + bufp = buff + taglen; + + /* ensure that the tagname isn't long enough to overrun the buffer */ + Assert(taglen <= COMPLETION_TAG_BUFSIZE - MAXINT8LEN - 4); + + /* + * In PostgreSQL versions 11 and earlier, it was possible to create a + * table WITH OIDS. When inserting into such a table, INSERT used to + * include the Oid of the inserted record in the completion tag. To + * maintain compatibility in the wire protocol, we now write a "0" (for + * InvalidOid) in the location where we once wrote the new record's Oid. + */ + if (command_tag_display_rowcount(tag) && !force_undecorated_output) + { + if (tag == CMDTAG_INSERT) + { + *bufp++ = ' '; + *bufp++ = '0'; + } + *bufp++ = ' '; + bufp += pg_ulltoa_n(qc->nprocessed, bufp); + *bufp = '\0'; + } + + Assert((bufp - buff) == strlen(buff)); + + return bufp - buff; +} + /* ---------------- * EndCommand - clean up the destination at end of command * ---------------- @@ -166,8 +210,7 @@ void EndCommand(const QueryCompletion *qc, CommandDest dest, bool force_undecorated_output) { char completionTag[COMPLETION_TAG_BUFSIZE]; - CommandTag tag; - const char *tagname; + Size len; switch (dest) { @@ -175,29 +218,8 @@ EndCommand(const QueryCompletion *qc, CommandDest dest, bool force_undecorated_o case DestRemoteExecute: case DestRemoteSimple: - /* - * We assume the tagname is plain ASCII and therefore requires no - * encoding conversion. - */ - tag = qc->commandTag; - tagname = GetCommandTagName(tag); - - /* - * In PostgreSQL versions 11 and earlier, it was possible to - * create a table WITH OIDS. When inserting into such a table, - * INSERT used to include the Oid of the inserted record in the - * completion tag. To maintain compatibility in the wire - * protocol, we now write a "0" (for InvalidOid) in the location - * where we once wrote the new record's Oid. - */ - if (command_tag_display_rowcount(tag) && !force_undecorated_output) - snprintf(completionTag, COMPLETION_TAG_BUFSIZE, - tag == CMDTAG_INSERT ? - "%s 0 " UINT64_FORMAT : "%s " UINT64_FORMAT, - tagname, qc->nprocessed); - else - snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "%s", tagname); - pq_putmessage('C', completionTag, strlen(completionTag) + 1); + len = make_completion_tag(completionTag, qc, force_undecorated_output); + pq_putmessage('C', completionTag, len + 1); case DestNone: case DestDebug: diff --git a/src/include/tcop/cmdtag.h b/src/include/tcop/cmdtag.h index 60e3179c74..ca1008db25 100644 --- a/src/include/tcop/cmdtag.h +++ b/src/include/tcop/cmdtag.h @@ -50,6 +50,7 @@ CopyQueryCompletion(QueryCompletion *dst, const QueryCompletion *src) extern void InitializeQueryCompletion(QueryCompletion *qc); extern const char *GetCommandTagName(CommandTag commandTag); +extern const char *GetCommandTagNameAndLen(CommandTag commandTag, Size *len); extern bool command_tag_display_rowcount(CommandTag commandTag); extern bool command_tag_event_trigger_ok(CommandTag commandTag); extern bool command_tag_table_rewrite_ok(CommandTag commandTag); -- 2.35.1.windows.2