Andy Fan <[email protected]> writes: Hi,
> Andres Freund <[email protected]> writes: >> Here's a very rough prototype for how it could look like. This clearly needs >> more helpers that I introduced, but I thought this should be enough to show >> the idea. > > Yes, so optional context is really elegant. Thanks for sharing! I continue to use optional context as the way for this optimize. Here is the result: Port 7432 is the optimized version and port 7433 is the master. We can see the noticeable improvement. (feature_data-va1)> PGBENCH_RESULT_FORMAT=binary ./run.sh 7432 7433 transactions=1000 database=postgres clients=1 jobs=1 protocol=extended result_format=binary pgbench=./src/bin/pgbench/pgbench script port latency_ms tps lat_ratio tps_ratio int2_bench.sql 7432 3.419 292.476681 - - int2_bench.sql 7433 4.399 227.311939 1.287 0.777 int4_bench.sql 7432 3.519 284.153068 - - int4_bench.sql 7433 4.487 222.853492 1.275 0.784 int8_bench.sql 7432 4.525 220.970547 - - int8_bench.sql 7433 4.574 218.632615 1.011 0.989 float4_bench.sql 7432 3.663 273.008770 - - float4_bench.sql 7433 4.660 214.610005 1.272 0.786 float8_bench.sql 7432 3.775 264.907820 - - float8_bench.sql 7433 4.798 208.409012 1.271 0.787 numeric_bench.sql 7432 5.715 174.968484 - - numeric_bench.sql 7433 6.628 150.879014 1.160 0.862 text_bench.sql 7432 4.249 235.374976 - - text_bench.sql 7433 5.275 189.576982 1.241 0.805 date_bench.sql 7432 4.870 205.342604 - - date_bench.sql 7433 5.010 199.599045 1.029 0.972 time_bench.sql 7432 4.018 248.861954 - - time_bench.sql 7433 5.114 195.555072 1.273 0.786 timestamp_bench.sql 7432 4.179 239.273546 - - timestamp_bench.sql 7433 5.194 192.526840 1.243 0.805 timestamptz_bench.sql 7432 4.285 233.351970 - - timestamptz_bench.sql 7433 5.264 189.958959 1.228 0.814 (feature_data-va1)> PGBENCH_RESULT_FORMAT=text ./run.sh 7432 7433 transactions=1000 database=postgres clients=1 jobs=1 protocol=extended result_format=text pgbench=./src/bin/pgbench/pgbench script port latency_ms tps lat_ratio tps_ratio int2_bench.sql 7432 3.643 274.498587 - - int2_bench.sql 7433 4.461 224.154031 1.225 0.817 int4_bench.sql 7432 3.684 271.442388 - - int4_bench.sql 7433 4.482 223.111396 1.217 0.822 int8_bench.sql 7432 3.839 260.464961 - - int8_bench.sql 7433 4.878 204.981207 1.271 0.787 float4_bench.sql 7432 5.482 182.425027 - - float4_bench.sql 7433 5.977 167.320695 1.090 0.917 float8_bench.sql 7432 6.596 151.607586 - - float8_bench.sql 7433 7.116 140.535931 1.079 0.927 numeric_bench.sql 7432 6.762 147.878199 - - numeric_bench.sql 7433 6.830 146.411705 1.010 0.990 text_bench.sql 7432 4.271 234.110510 - - text_bench.sql 7433 4.904 203.915171 1.148 0.871 date_bench.sql 7432 5.473 182.707235 - - date_bench.sql 7433 6.397 156.323668 1.169 0.856 time_bench.sql 7432 4.903 203.953267 - - time_bench.sql 7433 5.939 168.389232 1.211 0.826 timestamp_bench.sql 7432 6.300 158.732981 - - timestamp_bench.sql 7433 7.244 138.038000 1.150 0.870 timestamptz_bench.sql 7432 8.464 118.152459 - - timestamptz_bench.sql 7433 9.370 106.724987 1.107 0.903 Patches and test scripts are attached. Patch 001 and 002 comes from Andres, patch 003 uses the same way to optimize more data type and some helper function and a bugfix for binary format. patch 004 make pgbench support binary format, just for test purpose. I also attached the test scripts I used in test.tar.gz. -- Best Regards Andy Fan
>From 50f8528698156aa25911ab1fa98418e94a5eb92f Mon Sep 17 00:00:00 2001 From: Andres Freund <[email protected]> Date: Wed, 6 May 2026 11:41:36 -0400 Subject: [PATCH v2 1/4] WIP: Use permanent FunctionCallInfo in printtup Should probably be done similarly for COPY --- src/backend/access/common/printtup.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c index 616bdafd395..6fa93a6798a 100644 --- a/src/backend/access/common/printtup.c +++ b/src/backend/access/common/printtup.c @@ -50,6 +50,8 @@ typedef struct bool typisvarlena; /* is it varlena (ie possibly toastable)? */ int16 format; /* format code for this column */ FmgrInfo finfo; /* Precomputed call info for output fn */ + /* XXX: Would probably be faster to allocate "inline" */ + FunctionCallInfo outstate; /* Prepared FCI for slightly faster calls */ } PrinttupAttrInfo; typedef struct @@ -291,6 +293,10 @@ printtup_prepare_info(DR_printtup *myState, TupleDesc typeinfo, int numAttrs) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unsupported format code: %d", format))); + + /* both out and send funcs have one argument */ + thisState->outstate = palloc0(SizeForFunctionCallInfo(1)); + thisState->outstate->flinfo = &thisState->finfo; } } @@ -353,12 +359,16 @@ printtup(TupleTableSlot *slot, DestReceiver *self) VALGRIND_CHECK_MEM_IS_DEFINED(DatumGetPointer(attr), VARSIZE_ANY(DatumGetPointer(attr))); + /* fill in argument for output / send function */ + thisState->outstate->args[0].value = attr; + if (thisState->format == 0) { /* Text output */ char *outputstr; - outputstr = OutputFunctionCall(&thisState->finfo, attr); + outputstr = DatumGetCString(FunctionCallInvoke(thisState->outstate)); + Assert(!thisState->outstate->isnull); pq_sendcountedtext(buf, outputstr, strlen(outputstr)); } else @@ -366,7 +376,8 @@ printtup(TupleTableSlot *slot, DestReceiver *self) /* Binary output */ bytea *outputbytes; - outputbytes = SendFunctionCall(&thisState->finfo, attr); + outputbytes = DatumGetByteaP(FunctionCallInvoke(thisState->outstate)); + Assert(!thisState->outstate->isnull); pq_sendint32(buf, VARSIZE(outputbytes) - VARHDRSZ); pq_sendbytes(buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); -- 2.43.0
>From f64830f1b2e4ee409f565fcd5636b4ecefcc8907 Mon Sep 17 00:00:00 2001 From: Andres Freund <[email protected]> Date: Wed, 6 May 2026 11:44:26 -0400 Subject: [PATCH v2 2/4] Mega-WIP: Optimized out/send path for printtup Discussion: https://postgr.es/m/[email protected] --- src/backend/access/common/printtup.c | 35 +++++++++++-- src/backend/utils/adt/int.c | 73 +++++++++++++++++++++++++--- src/backend/utils/adt/varlena.c | 40 ++++++++++++++- src/include/nodes/miscnodes.h | 12 +++++ 4 files changed, 148 insertions(+), 12 deletions(-) diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c index 6fa93a6798a..2e3eb8f56d3 100644 --- a/src/backend/access/common/printtup.c +++ b/src/backend/access/common/printtup.c @@ -63,6 +63,7 @@ typedef struct int nattrs; PrinttupAttrInfo *myinfo; /* Cached info about each attr */ StringInfoData buf; /* output buffer (*not* in tmpcontext) */ + InOutContext inout; /* FunctionCallInfo->context data */ MemoryContext tmpcontext; /* Memory context for per-row workspace */ } DR_printtup; @@ -142,6 +143,9 @@ printtup_startup(DestReceiver *self, int operation, TupleDesc typeinfo) FetchPortalTargetList(portal), portal->formats); + myState->inout.type = T_InOutContext; + myState->inout.buf = &myState->buf; + /* ---------------- * We could set up the derived attr info at this time, but we postpone it * until the first call of printtup, for 2 reasons: @@ -297,6 +301,15 @@ printtup_prepare_info(DR_printtup *myState, TupleDesc typeinfo, int numAttrs) /* both out and send funcs have one argument */ thisState->outstate = palloc0(SizeForFunctionCallInfo(1)); thisState->outstate->flinfo = &thisState->finfo; + + /* + * The idea here is that output functions can optionally use more + * efficient paths if they see that the context is InOutContext, by + * directly appending correctly formatted output into the output + * buffer. + */ + thisState->outstate->context = (Node *) &myState->inout; + thisState->outstate->nargs = 1; } } @@ -369,7 +382,13 @@ printtup(TupleTableSlot *slot, DestReceiver *self) outputstr = DatumGetCString(FunctionCallInvoke(thisState->outstate)); Assert(!thisState->outstate->isnull); - pq_sendcountedtext(buf, outputstr, strlen(outputstr)); + + /* + * If outputstr == NULL, the output function directly appended a + * correctly formatted message. + */ + if (outputstr) + pq_sendcountedtext(buf, outputstr, strlen(outputstr)); } else { @@ -378,9 +397,17 @@ printtup(TupleTableSlot *slot, DestReceiver *self) outputbytes = DatumGetByteaP(FunctionCallInvoke(thisState->outstate)); Assert(!thisState->outstate->isnull); - pq_sendint32(buf, VARSIZE(outputbytes) - VARHDRSZ); - pq_sendbytes(buf, VARDATA(outputbytes), - VARSIZE(outputbytes) - VARHDRSZ); + + /* + * If outputbytes == NULL, the send function directly appended a + * correctly formatted message. + */ + if (outputbytes) + { + pq_sendint32(buf, VARSIZE(outputbytes) - VARHDRSZ); + pq_sendbytes(buf, VARDATA(outputbytes), + VARSIZE(outputbytes) - VARHDRSZ); + } } } diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index 01608d8ca42..de54a43c98d 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -327,10 +327,54 @@ Datum int4out(PG_FUNCTION_ARGS) { int32 arg1 = PG_GETARG_INT32(0); - char *result = (char *) palloc(12); /* sign, 10 digits, '\0' */ + int maxlen = 12; /* sign, 10 digits, '\0' */ - pg_ltoa(arg1, result); - PG_RETURN_CSTRING(result); + if (fcinfo->context && IsA(fcinfo->context, InOutContext)) + { + /* + * Optimized path for output functions called as part of a larger + * ouput. + * + * FIXME: A good chunk of this should obviously be in helper + * functions. + */ + InOutContext *inout = castNode(InOutContext, fcinfo->context); + StringInfo buf = inout->buf; + int prev_buflen; + int len; + uint32 len_net; + + /* reserve space for length and the max string length */ + enlargeStringInfo(buf, sizeof(uint32) + maxlen); + + /* reserve space for length, to be filled out later */ + prev_buflen = buf->len; + buf->len += sizeof(uint32); + + /* + * Construct string directly in buffer, we don't have to care about + * encoding conversions, because we assume that every encoding + * embodies ascii (XXX: Is that actually true with client encodings?). + */ + len = pg_ltoa(arg1, buf->data + buf->len); + buf->len += len; + + /* update the previously reserved length */ + len_net = pg_hton32(len); + memcpy(&buf->data[prev_buflen], &len_net, sizeof(uint32)); + + PG_RETURN_VOID(); + } + else + { + /* + * Fallback path called in any other context. + */ + char *result = (char *) palloc(maxlen); + + pg_ltoa(arg1, result); + PG_RETURN_CSTRING(result); + } } /* @@ -351,11 +395,26 @@ Datum int4send(PG_FUNCTION_ARGS) { int32 arg1 = PG_GETARG_INT32(0); - StringInfoData buf; - pq_begintypsend(&buf); - pq_sendint32(&buf, arg1); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + if (fcinfo->context && IsA(fcinfo->context, InOutContext)) + { + InOutContext *inout = castNode(InOutContext, fcinfo->context); + + /* length of data */ + pq_sendint32(inout->buf, 4); + /* data itself */ + pq_sendint32(inout->buf, arg1); + + PG_RETURN_VOID(); + } + else + { + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint32(&buf, arg1); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + } } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 0c6d3ba4d22..4948ced7dec 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -290,7 +290,45 @@ textout(PG_FUNCTION_ARGS) { Datum txt = PG_GETARG_DATUM(0); - PG_RETURN_CSTRING(TextDatumGetCString(txt)); + if (fcinfo->context && IsA(fcinfo->context, InOutContext)) + { + StringInfo buf = castNode(InOutContext, fcinfo->context)->buf; + text *tunpacked = pg_detoast_datum_packed(DatumGetPointer(txt)); + int len = VARSIZE_ANY_EXHDR(tunpacked); + char *data = VARDATA_ANY(tunpacked); + char *data_converted; + size_t data_len; + + /* + * Convert text output to the right encoding. For efficiency, this + * should really happen directly into buf. For that we would have to + * reserve space for the length first and fill it out after + * conversion. + * + * FIXME: Obviously we would need helpers for this too. + */ + data_converted = pg_server_to_client(data, len); + + if (data == data_converted) + data_len = len; + else + data_len = strlen(data_converted); + + /* length */ + pq_sendint32(buf, data_len); + + /* actual data */ + appendBinaryStringInfoNT(buf, data_converted, data_len); + + if (tunpacked != DatumGetPointer(txt)) + pfree(tunpacked); + + PG_RETURN_VOID(); + } + else + { + PG_RETURN_CSTRING(TextDatumGetCString(txt)); + } } /* diff --git a/src/include/nodes/miscnodes.h b/src/include/nodes/miscnodes.h index ec833001ab0..b3c189a5c0c 100644 --- a/src/include/nodes/miscnodes.h +++ b/src/include/nodes/miscnodes.h @@ -54,4 +54,16 @@ typedef struct ErrorSaveContext ((escontext) != NULL && IsA(escontext, ErrorSaveContext) && \ ((ErrorSaveContext *) (escontext))->error_occurred) + +/* + * Type optionally passed to input/receive/output/send functions that allows + * those functions to opt into more efficient ways of performing their work + * (mainly reducing allocations & copies). + */ +typedef struct InOutContext +{ + NodeTag type; + StringInfo buf; +} InOutContext; + #endif /* MISCNODES_H */ -- 2.43.0
>From 4cae157489e3c923a874f37aec07099d75f0cefe Mon Sep 17 00:00:00 2001 From: Andy Fan <[email protected]> Date: Tue, 2 Jun 2026 17:37:29 +0800 Subject: [PATCH v2 3/4] Optimize more data type for less memory copy with optional context. --- src/backend/access/common/printtup.c | 8 ++- src/backend/utils/adt/date.c | 32 +++++++---- src/backend/utils/adt/float.c | 62 ++++++++++++++++----- src/backend/utils/adt/int.c | 82 +++++++++++++++------------- src/backend/utils/adt/int8.c | 40 ++++++++++---- src/backend/utils/adt/numeric.c | 46 ++++++++++++---- src/backend/utils/adt/timestamp.c | 50 ++++++++++++----- src/backend/utils/adt/varlena.c | 52 ++++++++---------- src/include/libpq/pqformat.h | 61 +++++++++++++++++++++ src/include/utils/builtins.h | 24 ++++++++ 10 files changed, 325 insertions(+), 132 deletions(-) diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c index 2e3eb8f56d3..ab625736ac1 100644 --- a/src/backend/access/common/printtup.c +++ b/src/backend/access/common/printtup.c @@ -393,17 +393,19 @@ printtup(TupleTableSlot *slot, DestReceiver *self) else { /* Binary output */ + Datum outputdatum; bytea *outputbytes; - outputbytes = DatumGetByteaP(FunctionCallInvoke(thisState->outstate)); + outputdatum = FunctionCallInvoke(thisState->outstate); Assert(!thisState->outstate->isnull); /* - * If outputbytes == NULL, the send function directly appended a + * If outputdatum == 0, the send function directly appended a * correctly formatted message. */ - if (outputbytes) + if (outputdatum) { + outputbytes = DatumGetByteaP(outputdatum); pq_sendint32(buf, VARSIZE(outputbytes) - VARHDRSZ); pq_sendbytes(buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index 7f746dd84c9..43284d43fda 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -180,7 +180,6 @@ Datum date_out(PG_FUNCTION_ARGS) { DateADT date = PG_GETARG_DATEADT(0); - char *result; struct pg_tm tt, *tm = &tt; char buf[MAXDATELEN + 1]; @@ -194,8 +193,10 @@ date_out(PG_FUNCTION_ARGS) EncodeDateOnly(tm, DateStyle, buf); } - result = pstrdup(buf); - PG_RETURN_CSTRING(result); + if (pg_send_inout_text(fcinfo, buf, strlen(buf))) + PG_RETURN_VOID(); + else + PG_RETURN_CSTRING(pstrdup(buf)); } /* @@ -1606,7 +1607,6 @@ Datum time_out(PG_FUNCTION_ARGS) { TimeADT time = PG_GETARG_TIMEADT(0); - char *result; struct pg_tm tt, *tm = &tt; fsec_t fsec; @@ -1615,8 +1615,10 @@ time_out(PG_FUNCTION_ARGS) time2tm(time, tm, &fsec); EncodeTimeOnly(tm, fsec, false, 0, DateStyle, buf); - result = pstrdup(buf); - PG_RETURN_CSTRING(result); + if (pg_send_inout_text(fcinfo, buf, strlen(buf))) + PG_RETURN_VOID(); + else + PG_RETURN_CSTRING(pstrdup(buf)); } /* @@ -1652,11 +1654,21 @@ Datum time_send(PG_FUNCTION_ARGS) { TimeADT time = PG_GETARG_TIMEADT(0); - StringInfoData buf; + StringInfo buf = pg_get_inout_context_buf(fcinfo); - pq_begintypsend(&buf); - pq_sendint64(&buf, time); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + if (buf) + { + pq_sendint64_field(buf, time); + PG_RETURN_VOID(); + } + else + { + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, time); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + } } Datum diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index 362c29ab803..161d5593f5f 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -24,6 +24,7 @@ #include "common/shortest_dec.h" #include "libpq/pqformat.h" #include "utils/array.h" +#include "utils/builtins.h" #include "utils/float.h" #include "utils/fmgrprotos.h" #include "utils/sortsupport.h" @@ -360,17 +361,18 @@ Datum float4out(PG_FUNCTION_ARGS) { float4 num = PG_GETARG_FLOAT4(0); - char *ascii = (char *) palloc(32); + char ascii[32]; int ndig = FLT_DIG + extra_float_digits; if (extra_float_digits > 0) - { float_to_shortest_decimal_buf(num, ascii); - PG_RETURN_CSTRING(ascii); - } + else + (void) pg_strfromd(ascii, 32, ndig, num); - (void) pg_strfromd(ascii, 32, ndig, num); - PG_RETURN_CSTRING(ascii); + if (pg_send_inout_text(fcinfo, ascii, strlen(ascii))) + PG_RETURN_VOID(); + else + PG_RETURN_CSTRING(pstrdup(ascii)); } /* @@ -391,11 +393,21 @@ Datum float4send(PG_FUNCTION_ARGS) { float4 num = PG_GETARG_FLOAT4(0); - StringInfoData buf; + StringInfo buf = pg_get_inout_context_buf(fcinfo); - pq_begintypsend(&buf); - pq_sendfloat4(&buf, num); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + if (buf) + { + pq_sendfloat4_field(buf, num); + PG_RETURN_VOID(); + } + else + { + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat4(&buf, num); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + } } /* @@ -563,8 +575,18 @@ Datum float8out(PG_FUNCTION_ARGS) { float8 num = PG_GETARG_FLOAT8(0); + char ascii[32]; + int ndig = DBL_DIG + extra_float_digits; + + if (extra_float_digits > 0) + double_to_shortest_decimal_buf(num, ascii); + else + (void) pg_strfromd(ascii, 32, ndig, num); - PG_RETURN_CSTRING(float8out_internal(num)); + if (pg_send_inout_text(fcinfo, ascii, strlen(ascii))) + PG_RETURN_VOID(); + else + PG_RETURN_CSTRING(pstrdup(ascii)); } /* @@ -608,11 +630,21 @@ Datum float8send(PG_FUNCTION_ARGS) { float8 num = PG_GETARG_FLOAT8(0); - StringInfoData buf; + StringInfo buf = pg_get_inout_context_buf(fcinfo); - pq_begintypsend(&buf); - pq_sendfloat8(&buf, num); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + if (buf) + { + pq_sendfloat8_field(buf, num); + PG_RETURN_VOID(); + } + else + { + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat8(&buf, num); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + } } diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index de54a43c98d..9a3fda0403a 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -74,10 +74,27 @@ Datum int2out(PG_FUNCTION_ARGS) { int16 arg1 = PG_GETARG_INT16(0); - char *result = (char *) palloc(7); /* sign, 5 digits, '\0' */ + int maxlen = 7; /* sign, 5 digits, '\0' */ + StringInfo buf = pg_get_inout_context_buf(fcinfo); - pg_itoa(arg1, result); - PG_RETURN_CSTRING(result); + if (buf) + { + int offset; + int len; + + len = pg_itoa(arg1, pq_begincountedfield(buf, maxlen, &offset)); + buf->len += len; + pq_endcountedfield(buf, offset); + + PG_RETURN_VOID(); + } + else + { + char *result = (char *) palloc(maxlen); + + pg_itoa(arg1, result); + PG_RETURN_CSTRING(result); + } } /* @@ -98,11 +115,21 @@ Datum int2send(PG_FUNCTION_ARGS) { int16 arg1 = PG_GETARG_INT16(0); - StringInfoData buf; + StringInfo buf = pg_get_inout_context_buf(fcinfo); - pq_begintypsend(&buf); - pq_sendint16(&buf, arg1); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + if (buf) + { + pq_sendint16_field(buf, arg1); + PG_RETURN_VOID(); + } + else + { + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint16(&buf, arg1); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + } } /* @@ -328,40 +355,22 @@ int4out(PG_FUNCTION_ARGS) { int32 arg1 = PG_GETARG_INT32(0); int maxlen = 12; /* sign, 10 digits, '\0' */ + StringInfo buf = pg_get_inout_context_buf(fcinfo); - if (fcinfo->context && IsA(fcinfo->context, InOutContext)) + if (buf) { - /* - * Optimized path for output functions called as part of a larger - * ouput. - * - * FIXME: A good chunk of this should obviously be in helper - * functions. - */ - InOutContext *inout = castNode(InOutContext, fcinfo->context); - StringInfo buf = inout->buf; - int prev_buflen; + /* Optimized path for output functions called as part of a larger output. */ + int offset; int len; - uint32 len_net; - - /* reserve space for length and the max string length */ - enlargeStringInfo(buf, sizeof(uint32) + maxlen); - - /* reserve space for length, to be filled out later */ - prev_buflen = buf->len; - buf->len += sizeof(uint32); /* * Construct string directly in buffer, we don't have to care about * encoding conversions, because we assume that every encoding * embodies ascii (XXX: Is that actually true with client encodings?). */ - len = pg_ltoa(arg1, buf->data + buf->len); + len = pg_ltoa(arg1, pq_begincountedfield(buf, maxlen, &offset)); buf->len += len; - - /* update the previously reserved length */ - len_net = pg_hton32(len); - memcpy(&buf->data[prev_buflen], &len_net, sizeof(uint32)); + pq_endcountedfield(buf, offset); PG_RETURN_VOID(); } @@ -395,16 +404,11 @@ Datum int4send(PG_FUNCTION_ARGS) { int32 arg1 = PG_GETARG_INT32(0); + StringInfo buf = pg_get_inout_context_buf(fcinfo); - if (fcinfo->context && IsA(fcinfo->context, InOutContext)) + if (buf) { - InOutContext *inout = castNode(InOutContext, fcinfo->context); - - /* length of data */ - pq_sendint32(inout->buf, 4); - /* data itself */ - pq_sendint32(inout->buf, arg1); - + pq_sendint32_field(buf, arg1); PG_RETURN_VOID(); } else diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index 9b429da86d9..184927f4438 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -63,19 +63,37 @@ Datum int8out(PG_FUNCTION_ARGS) { int64 val = PG_GETARG_INT64(0); - char buf[MAXINT8LEN + 1]; - char *result; - int len; + int maxlen = MAXINT8LEN + 1; + StringInfo buf = pg_get_inout_context_buf(fcinfo); - len = pg_lltoa(val, buf) + 1; + if (buf) + { + int offset; + int len; - /* - * Since the length is already known, we do a manual palloc() and memcpy() - * to avoid the strlen() call that would otherwise be done in pstrdup(). - */ - result = palloc(len); - memcpy(result, buf, len); - PG_RETURN_CSTRING(result); + len = pg_lltoa(val, pq_begincountedfield(buf, maxlen, &offset)); + buf->len += len; + pq_endcountedfield(buf, offset); + + PG_RETURN_VOID(); + } + else + { + char buf[MAXINT8LEN + 1]; + char *result; + int len; + + len = pg_lltoa(val, buf) + 1; + + /* + * Since the length is already known, we do a manual palloc() and + * memcpy() to avoid the strlen() call that would otherwise be done in + * pstrdup(). + */ + result = palloc(len); + memcpy(result, buf, len); + PG_RETURN_CSTRING(result); + } } /* diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index cb23dfe9b95..1264a0bb0ff 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -808,11 +808,16 @@ numeric_out(PG_FUNCTION_ARGS) if (NUMERIC_IS_SPECIAL(num)) { if (NUMERIC_IS_PINF(num)) - PG_RETURN_CSTRING(pstrdup("Infinity")); + str = "Infinity"; else if (NUMERIC_IS_NINF(num)) - PG_RETURN_CSTRING(pstrdup("-Infinity")); + str = "-Infinity"; else - PG_RETURN_CSTRING(pstrdup("NaN")); + str = "NaN"; + + if (pg_send_inout_text(fcinfo, str, strlen(str))) + PG_RETURN_VOID(); + + PG_RETURN_CSTRING(pstrdup(str)); } /* @@ -822,6 +827,9 @@ numeric_out(PG_FUNCTION_ARGS) str = get_str_from_var(&x); + if (pg_send_inout_text(fcinfo, str, strlen(str))) + PG_RETURN_VOID(); + PG_RETURN_CSTRING(str); } @@ -1147,21 +1155,37 @@ numeric_send(PG_FUNCTION_ARGS) { Numeric num = PG_GETARG_NUMERIC(0); NumericVar x; - StringInfoData buf; + StringInfo buf; + StringInfoData localbuf; + bool inout; int i; init_var_from_num(num, &x); - pq_begintypsend(&buf); + buf = pg_get_inout_context_buf(fcinfo); + inout = buf != NULL; + if (inout) + { + /* length of data */ + pq_sendint32(buf, (4 + x.ndigits) * sizeof(int16)); + } + else + { + pq_begintypsend(&localbuf); + buf = &localbuf; + } - pq_sendint16(&buf, x.ndigits); - pq_sendint16(&buf, x.weight); - pq_sendint16(&buf, x.sign); - pq_sendint16(&buf, x.dscale); + pq_sendint16(buf, x.ndigits); + pq_sendint16(buf, x.weight); + pq_sendint16(buf, x.sign); + pq_sendint16(buf, x.dscale); for (i = 0; i < x.ndigits; i++) - pq_sendint16(&buf, x.digits[i]); + pq_sendint16(buf, x.digits[i]); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + if (inout) + PG_RETURN_VOID(); + else + PG_RETURN_BYTEA_P(pq_endtypsend(&localbuf)); } diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index a20e7ea1d11..0c0b7af700a 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -227,7 +227,6 @@ Datum timestamp_out(PG_FUNCTION_ARGS) { Timestamp timestamp = PG_GETARG_TIMESTAMP(0); - char *result; struct pg_tm tt, *tm = &tt; fsec_t fsec; @@ -242,8 +241,10 @@ timestamp_out(PG_FUNCTION_ARGS) (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); - result = pstrdup(buf); - PG_RETURN_CSTRING(result); + if (pg_send_inout_text(fcinfo, buf, strlen(buf))) + PG_RETURN_VOID(); + else + PG_RETURN_CSTRING(pstrdup(buf)); } /* @@ -286,11 +287,21 @@ Datum timestamp_send(PG_FUNCTION_ARGS) { Timestamp timestamp = PG_GETARG_TIMESTAMP(0); - StringInfoData buf; + StringInfo buf = pg_get_inout_context_buf(fcinfo); - pq_begintypsend(&buf); - pq_sendint64(&buf, timestamp); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + if (buf) + { + pq_sendint64_field(buf, timestamp); + PG_RETURN_VOID(); + } + else + { + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, timestamp); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + } } Datum @@ -773,7 +784,6 @@ Datum timestamptz_out(PG_FUNCTION_ARGS) { TimestampTz dt = PG_GETARG_TIMESTAMPTZ(0); - char *result; int tz; struct pg_tm tt, *tm = &tt; @@ -790,8 +800,10 @@ timestamptz_out(PG_FUNCTION_ARGS) (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); - result = pstrdup(buf); - PG_RETURN_CSTRING(result); + if (pg_send_inout_text(fcinfo, buf, strlen(buf))) + PG_RETURN_VOID(); + else + PG_RETURN_CSTRING(pstrdup(buf)); } /* @@ -835,11 +847,21 @@ Datum timestamptz_send(PG_FUNCTION_ARGS) { TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0); - StringInfoData buf; + StringInfo buf = pg_get_inout_context_buf(fcinfo); - pq_begintypsend(&buf); - pq_sendint64(&buf, timestamp); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + if (buf) + { + pq_sendint64_field(buf, timestamp); + PG_RETURN_VOID(); + } + else + { + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendint64(&buf, timestamp); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + } } Datum diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 4948ced7dec..5dac5b39f11 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -289,37 +289,15 @@ Datum textout(PG_FUNCTION_ARGS) { Datum txt = PG_GETARG_DATUM(0); + StringInfo buf = pg_get_inout_context_buf(fcinfo); - if (fcinfo->context && IsA(fcinfo->context, InOutContext)) + if (buf) { - StringInfo buf = castNode(InOutContext, fcinfo->context)->buf; text *tunpacked = pg_detoast_datum_packed(DatumGetPointer(txt)); int len = VARSIZE_ANY_EXHDR(tunpacked); char *data = VARDATA_ANY(tunpacked); - char *data_converted; - size_t data_len; - - /* - * Convert text output to the right encoding. For efficiency, this - * should really happen directly into buf. For that we would have to - * reserve space for the length first and fill it out after - * conversion. - * - * FIXME: Obviously we would need helpers for this too. - */ - data_converted = pg_server_to_client(data, len); - - if (data == data_converted) - data_len = len; - else - data_len = strlen(data_converted); - - /* length */ - pq_sendint32(buf, data_len); - - /* actual data */ - appendBinaryStringInfoNT(buf, data_converted, data_len); + pq_sendcountedtext(buf, data, len); if (tunpacked != DatumGetPointer(txt)) pfree(tunpacked); @@ -356,11 +334,27 @@ Datum textsend(PG_FUNCTION_ARGS) { text *t = PG_GETARG_TEXT_PP(0); - StringInfoData buf; + StringInfo buf = pg_get_inout_context_buf(fcinfo); - pq_begintypsend(&buf); - pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)); - PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + if (buf) + { + int offset; + + /* reserve space for length, to be filled out after conversion */ + (void) pq_begincountedfield(buf, 0, &offset); + pq_sendtext(buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)); + pq_endcountedfield(buf, offset); + + PG_RETURN_VOID(); + } + else + { + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); + } } diff --git a/src/include/libpq/pqformat.h b/src/include/libpq/pqformat.h index bc4ab1381a9..d40f36b5fbc 100644 --- a/src/include/libpq/pqformat.h +++ b/src/include/libpq/pqformat.h @@ -94,6 +94,32 @@ pq_writeint64(StringInfoData *pg_restrict buf, uint64 i) buf->len += sizeof(uint64); } +/* + * Reserve a length-prefixed field in a StringInfo buffer, returning the + * location at which the payload should be written. The payload length is + * filled in by pq_endcountedfield(). + */ +static inline char * +pq_begincountedfield(StringInfo buf, int maxlen, int *offset) +{ + enlargeStringInfo(buf, sizeof(uint32) + maxlen); + + *offset = buf->len; + buf->len += sizeof(uint32); + + return buf->data + buf->len; +} + +/* Fill in the length of a field started by pq_begincountedfield(). */ +static inline void +pq_endcountedfield(StringInfo buf, int offset) +{ + int len = buf->len - offset - sizeof(uint32); + uint32 len_net = pg_hton32(len); + + memcpy(&buf->data[offset], &len_net, sizeof(uint32)); +} + /* * Append a null-terminated text string (with conversion) to a buffer with * preallocated space. @@ -187,6 +213,41 @@ pq_sendint(StringInfo buf, uint32 i, int b) } } +/* append length-prefixed binary fields to a StringInfo buffer */ +static inline void +pq_sendint16_field(StringInfo buf, uint16 i) +{ + pq_sendint32(buf, sizeof(uint16)); + pq_sendint16(buf, i); +} + +static inline void +pq_sendint32_field(StringInfo buf, uint32 i) +{ + pq_sendint32(buf, sizeof(uint32)); + pq_sendint32(buf, i); +} + +static inline void +pq_sendint64_field(StringInfo buf, uint64 i) +{ + pq_sendint32(buf, sizeof(uint64)); + pq_sendint64(buf, i); +} + +static inline void +pq_sendfloat4_field(StringInfo buf, float4 f) +{ + pq_sendint32(buf, sizeof(float4)); + pq_sendfloat4(buf, f); +} + +static inline void +pq_sendfloat8_field(StringInfo buf, float8 f) +{ + pq_sendint32(buf, sizeof(float8)); + pq_sendfloat8(buf, f); +} extern void pq_begintypsend(StringInfo buf); extern bytea *pq_endtypsend(StringInfo buf); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index b6a11bfa288..0bf228a746b 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -15,12 +15,36 @@ #define BUILTINS_H #include "fmgr.h" +#include "libpq/pqformat.h" +#include "nodes/miscnodes.h" #include "nodes/nodes.h" #include "utils/fmgrprotos.h" /* Sign + the most decimal digits an 8-byte number could have */ #define MAXINT8LEN 20 +/* Helpers for output/send functions using InOutContext. */ +static inline StringInfo +pg_get_inout_context_buf(FunctionCallInfo fcinfo) +{ + if (fcinfo->context && IsA(fcinfo->context, InOutContext)) + return castNode(InOutContext, fcinfo->context)->buf; + + return NULL; +} + +static inline bool +pg_send_inout_text(FunctionCallInfo fcinfo, const char *str, int slen) +{ + StringInfo buf = pg_get_inout_context_buf(fcinfo); + + if (!buf) + return false; + + pq_sendcountedtext(buf, str, slen); + return true; +} + /* bool.c */ extern bool parse_bool(const char *value, bool *result); extern bool parse_bool_with_len(const char *value, size_t len, bool *result); -- 2.43.0
>From 4be7ac289e263a14b82009ff5adeba3a82d46174 Mon Sep 17 00:00:00 2001 From: Andy Fan <[email protected]> Date: Tue, 2 Jun 2026 18:08:13 +0800 Subject: [PATCH v2 4/4] Let pgbench support binary format. --- src/bin/pgbench/pgbench.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 0b2bb9340b5..533a1732158 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -722,6 +722,16 @@ typedef enum QueryMode static QueryMode querymode = QUERY_SIMPLE; static const char *const QUERYMODE[] = {"simple", "extended", "prepared"}; +typedef enum ResultFormat +{ + RESULT_FORMAT_TEXT, + RESULT_FORMAT_BINARY, + NUM_RESULT_FORMAT +} ResultFormat; + +static ResultFormat result_format = RESULT_FORMAT_TEXT; +static const char *const RESULT_FORMAT[] = {"text", "binary"}; + /* * struct Command represents one command in a script. * @@ -965,6 +975,8 @@ usage(void) " --max-tries=NUM max number of tries to run transaction (default: 1)\n" " --progress-timestamp use Unix epoch timestamps for progress\n" " --random-seed=SEED set random seed (\"time\", \"rand\", integer)\n" + " --result-format=text|binary\n" + " result format for extended/prepared protocol (default: text)\n" " --sampling-rate=NUM fraction of transactions to log (e.g., 0.01 for 1%%)\n" " --show-script=NAME show builtin script code, then exit\n" " --verbose-errors print messages of all errors\n" @@ -3173,7 +3185,7 @@ sendCommand(CState *st, Command *command) pg_log_debug("client %d sending %s", st->id, sql); r = PQsendQueryParams(st->con, sql, command->argc - 1, - NULL, params, NULL, NULL, 0); + NULL, params, NULL, NULL, result_format); } else if (querymode == QUERY_PREPARED) { @@ -3184,7 +3196,7 @@ sendCommand(CState *st, Command *command) pg_log_debug("client %d sending %s", st->id, command->prepname); r = PQsendQueryPrepared(st->con, command->prepname, command->argc - 1, - params, NULL, NULL, 0); + params, NULL, NULL, result_format); } else /* unknown sql mode */ r = 0; @@ -6470,6 +6482,7 @@ printResults(StatsData *total, printf("partition method: %s\npartitions: %d\n", PARTITION_METHOD[partition_method], partitions); printf("query mode: %s\n", QUERYMODE[querymode]); + printf("result format: %s\n", RESULT_FORMAT[result_format]); printf("number of clients: %d\n", nclients); printf("number of threads: %d\n", nthreads); @@ -6778,6 +6791,7 @@ main(int argc, char **argv) {"exit-on-abort", no_argument, NULL, 16}, {"debug", no_argument, NULL, 17}, {"continue-on-error", no_argument, NULL, 18}, + {"result-format", required_argument, NULL, 19}, {NULL, 0, NULL, 0} }; @@ -7138,6 +7152,14 @@ main(int argc, char **argv) benchmarking_option_set = true; continue_on_error = true; break; + case 19: /* result-format */ + benchmarking_option_set = true; + for (result_format = 0; result_format < NUM_RESULT_FORMAT; result_format++) + if (strcmp(optarg, RESULT_FORMAT[result_format]) == 0) + break; + if (result_format >= NUM_RESULT_FORMAT) + pg_fatal("invalid result format: \"%s\"", optarg); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -7169,6 +7191,9 @@ main(int argc, char **argv) if (total_weight == 0 && !is_init_mode) pg_fatal("total script weight must not be zero"); + if (result_format == RESULT_FORMAT_BINARY && querymode == QUERY_SIMPLE) + pg_fatal("binary result format requires extended or prepared query mode"); + /* show per script stats if several scripts are used */ if (num_scripts > 1) per_script_stats = true; -- 2.43.0
test.tar.gz
Description: application/gzip
