Re: appendBinaryStringInfo stuff

2023-02-14 Thread Peter Eisentraut

On 10.02.23 20:08, Corey Huinker wrote:



On Fri, Feb 10, 2023 at 7:16 AM Peter Eisentraut 
> wrote:


On 19.12.22 07:13, Peter Eisentraut wrote:
 > Also, the argument type of appendBinaryStringInfo() is char *. 
There is

 > some code that uses this function to assemble some kind of packed
binary
 > layout, which requires a bunch of casts because of this.  I think
 > functions taking binary data plus length should take void * instead,
 > like memcpy() for example.

I found a little follow-up for this one: Make the same change to
pq_sendbytes(), which is a thin wrapper around
appendBinaryStringInfo().
   This would allow getting rid of further casts at call sites.


+1

Has all the benefits that 54a177a948b0a773c25c6737d1cc3cc49222a526 had.

Passes make check-world.


committed, thanks




Re: appendBinaryStringInfo stuff

2023-02-10 Thread Corey Huinker
On Fri, Feb 10, 2023 at 7:16 AM Peter Eisentraut <
peter.eisentr...@enterprisedb.com> wrote:

> On 19.12.22 07:13, Peter Eisentraut wrote:
> > Also, the argument type of appendBinaryStringInfo() is char *.  There is
> > some code that uses this function to assemble some kind of packed binary
> > layout, which requires a bunch of casts because of this.  I think
> > functions taking binary data plus length should take void * instead,
> > like memcpy() for example.
>
> I found a little follow-up for this one: Make the same change to
> pq_sendbytes(), which is a thin wrapper around appendBinaryStringInfo().
>   This would allow getting rid of further casts at call sites.
>

+1

Has all the benefits that 54a177a948b0a773c25c6737d1cc3cc49222a526 had.

Passes make check-world.


Re: appendBinaryStringInfo stuff

2023-02-10 Thread Peter Eisentraut

On 19.12.22 07:13, Peter Eisentraut wrote:
Also, the argument type of appendBinaryStringInfo() is char *.  There is 
some code that uses this function to assemble some kind of packed binary 
layout, which requires a bunch of casts because of this.  I think 
functions taking binary data plus length should take void * instead, 
like memcpy() for example.


I found a little follow-up for this one: Make the same change to 
pq_sendbytes(), which is a thin wrapper around appendBinaryStringInfo(). 
 This would allow getting rid of further casts at call sites.
From 0aed766e046ba71b01093b44f0cae3a4098f1e5b Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Fri, 10 Feb 2023 13:11:09 +0100
Subject: [PATCH] Change argument type of pq_sendbytes from char * to void *

---
 src/backend/libpq/pqformat.c|  2 +-
 src/backend/utils/adt/array_userfuncs.c | 11 +--
 src/backend/utils/adt/uuid.c|  2 +-
 src/backend/utils/adt/varbit.c  |  2 +-
 src/include/libpq/pqformat.h|  2 +-
 5 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/backend/libpq/pqformat.c b/src/backend/libpq/pqformat.c
index b7e2c7b6c9..d712c80b5e 100644
--- a/src/backend/libpq/pqformat.c
+++ b/src/backend/libpq/pqformat.c
@@ -123,7 +123,7 @@ pq_beginmessage_reuse(StringInfo buf, char msgtype)
  * 
  */
 void
-pq_sendbytes(StringInfo buf, const char *data, int datalen)
+pq_sendbytes(StringInfo buf, const void *data, int datalen)
 {
/* use variant that maintains a trailing null-byte, out of caution */
appendBinaryStringInfo(buf, data, datalen);
diff --git a/src/backend/utils/adt/array_userfuncs.c 
b/src/backend/utils/adt/array_userfuncs.c
index c8a8d33ca3..80750191d8 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -654,7 +654,7 @@ array_agg_serialize(PG_FUNCTION_ARGS)
pq_sendbyte(, state->typalign);
 
/* dnulls */
-   pq_sendbytes(, (char *) state->dnulls, sizeof(bool) * 
state->nelems);
+   pq_sendbytes(, state->dnulls, sizeof(bool) * state->nelems);
 
/*
 * dvalues.  By agreement with array_agg_deserialize, when the element
@@ -664,8 +664,7 @@ array_agg_serialize(PG_FUNCTION_ARGS)
 * must be sent first).
 */
if (state->typbyval)
-   pq_sendbytes(, (char *) state->dvalues,
-sizeof(Datum) * state->nelems);
+   pq_sendbytes(, state->dvalues, sizeof(Datum) * 
state->nelems);
else
{
SerialIOData *iodata;
@@ -1097,7 +1096,7 @@ array_agg_array_serialize(PG_FUNCTION_ARGS)
if (state->nullbitmap)
{
Assert(state->aitems > 0);
-   pq_sendbytes(, (char *) state->nullbitmap, (state->aitems + 
7) / 8);
+   pq_sendbytes(, state->nullbitmap, (state->aitems + 7) / 8);
}
 
/* nitems */
@@ -1107,10 +1106,10 @@ array_agg_array_serialize(PG_FUNCTION_ARGS)
pq_sendint32(, state->ndims);
 
/* dims: XXX should we just send ndims elements? */
-   pq_sendbytes(, (char *) state->dims, sizeof(state->dims));
+   pq_sendbytes(, state->dims, sizeof(state->dims));
 
/* lbs */
-   pq_sendbytes(, (char *) state->lbs, sizeof(state->lbs));
+   pq_sendbytes(, state->lbs, sizeof(state->lbs));
 
result = pq_endtypsend();
 
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 76ff6c533f..4f7aa768fd 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -154,7 +154,7 @@ uuid_send(PG_FUNCTION_ARGS)
StringInfoData buffer;
 
pq_begintypsend();
-   pq_sendbytes(, (char *) uuid->data, UUID_LEN);
+   pq_sendbytes(, uuid->data, UUID_LEN);
PG_RETURN_BYTEA_P(pq_endtypsend());
 }
 
diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c
index 4fde0c1b14..3dbbd1207f 100644
--- a/src/backend/utils/adt/varbit.c
+++ b/src/backend/utils/adt/varbit.c
@@ -685,7 +685,7 @@ varbit_send(PG_FUNCTION_ARGS)
 
pq_begintypsend();
pq_sendint32(, VARBITLEN(s));
-   pq_sendbytes(, (char *) VARBITS(s), VARBITBYTES(s));
+   pq_sendbytes(, VARBITS(s), VARBITBYTES(s));
PG_RETURN_BYTEA_P(pq_endtypsend());
 }
 
diff --git a/src/include/libpq/pqformat.h b/src/include/libpq/pqformat.h
index c5644c0061..0d2f958af3 100644
--- a/src/include/libpq/pqformat.h
+++ b/src/include/libpq/pqformat.h
@@ -22,7 +22,7 @@ extern void pq_beginmessage_reuse(StringInfo buf, char 
msgtype);
 extern void pq_endmessage(StringInfo buf);
 extern void pq_endmessage_reuse(StringInfo buf);
 
-extern void pq_sendbytes(StringInfo buf, const char *data, int datalen);
+extern void pq_sendbytes(StringInfo buf, const void *data, int datalen);
 extern void pq_sendcountedtext(StringInfo buf, const char *str, int slen,
   bool 

Re: appendBinaryStringInfo stuff

2022-12-30 Thread Peter Eisentraut

On 23.12.22 14:01, David Rowley wrote:

Maybe if there's concern that inlining appendStringInfoString is going
to bloat the binary too much, then how about we just invent an inlined
version of it using some other name that we can use when performance
matters?  We could then safely replace the offending
appendBinaryStringInfos from both places without any concern for
regressing performance.


The jsonpath output routines don't appear to be written with deep 
concern about performance now, so I'm not sure this is the place to 
start tweaking.  For the jsonb parts, there are only a handful of 
strings this affects ("true", "false", "null"), so using 
appendBinaryStringInfo() there a few times doesn't seem so bad.  So I'm 
not too worried about this altogether.






Re: appendBinaryStringInfo stuff

2022-12-30 Thread Peter Eisentraut

On 23.12.22 10:04, Peter Eisentraut wrote:

On 19.12.22 23:48, David Rowley wrote:

On Tue, 20 Dec 2022 at 11:42, Tom Lane  wrote:

I think Peter is entirely right to question whether *this* type's
output function is performance-critical.  Who's got large tables with
jsonpath columns?  It seems to me the type would mostly only exist
as constants within queries.


The patch touches code in the path of jsonb's output function too. I
don't think you could claim the same for that.


Ok, let's leave the jsonb output alone.  The jsonb output code also 
won't change a lot, but there is a bunch of stuff for jsonpath on the 
horizon, so having some more robust coding style to imitate there seems 
useful.  Here is another patch set with the jsonb changes omitted.


I have committed these.




Re: appendBinaryStringInfo stuff

2022-12-23 Thread David Rowley
On Fri, 23 Dec 2022 at 22:04, Peter Eisentraut
 wrote:
>
> On 19.12.22 23:48, David Rowley wrote:
> > On Tue, 20 Dec 2022 at 11:42, Tom Lane  wrote:
> >> I think Peter is entirely right to question whether *this* type's
> >> output function is performance-critical.  Who's got large tables with
> >> jsonpath columns?  It seems to me the type would mostly only exist
> >> as constants within queries.
> >
> > The patch touches code in the path of jsonb's output function too. I
> > don't think you could claim the same for that.
>
> Ok, let's leave the jsonb output alone.  The jsonb output code also
> won't change a lot, but there is a bunch of stuff for jsonpath on the
> horizon, so having some more robust coding style to imitate there seems
> useful.  Here is another patch set with the jsonb changes omitted.

Maybe if there's concern that inlining appendStringInfoString is going
to bloat the binary too much, then how about we just invent an inlined
version of it using some other name that we can use when performance
matters?  We could then safely replace the offending
appendBinaryStringInfos from both places without any concern for
regressing performance.

FWIW, I just did a few compilation runs of our supported versions to
see how much postgres binary grew release to release:

branch  postgres binary size growth bytes
REL_10_STABLE8230232  0
REL_11_STABLE8586024 355792
REL_12_STABLE8831664 245640
REL_13_STABLE8990824 159160
REL_14_STABLE9484848 494024
REL_15_STABLE9744680 259832
master   9977896 233216
inline_asis 10004032  26136

(inlined_asis  = inlined appendStringInfoString)

On the other hand, if we went with inlining the existing function,
then it looks to be about 10% of the growth we saw between v14 and
v15. That seems quite large.

David




Re: appendBinaryStringInfo stuff

2022-12-23 Thread Peter Eisentraut

On 19.12.22 23:48, David Rowley wrote:

On Tue, 20 Dec 2022 at 11:42, Tom Lane  wrote:

I think Peter is entirely right to question whether *this* type's
output function is performance-critical.  Who's got large tables with
jsonpath columns?  It seems to me the type would mostly only exist
as constants within queries.


The patch touches code in the path of jsonb's output function too. I
don't think you could claim the same for that.


Ok, let's leave the jsonb output alone.  The jsonb output code also 
won't change a lot, but there is a bunch of stuff for jsonpath on the 
horizon, so having some more robust coding style to imitate there seems 
useful.  Here is another patch set with the jsonb changes omitted.
From 319c206bec80d326808a211f9edd50346edbeb8c Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Mon, 19 Dec 2022 07:01:27 +0100
Subject: [PATCH v2 1/2] Use appendStringInfoString instead of
 appendBinaryStringInfo where possible

For the jsonpath output, we don't need to squeeze out every bit of
performance, so instead use a more robust coding style.  There are
similar calls in jsonb.c, which we leave alone here since there is
indeed a performance impact for bulk exports.

Discussion: 
https://www.postgresql.org/message-id/flat/a0086cfc-ff0f-2827-20fe-52b591d2666c%40enterprisedb.com
---
 src/backend/utils/adt/jsonpath.c | 42 
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c
index 91af030095..a39eab9c20 100644
--- a/src/backend/utils/adt/jsonpath.c
+++ b/src/backend/utils/adt/jsonpath.c
@@ -213,7 +213,7 @@ jsonPathToCstring(StringInfo out, JsonPath *in, int 
estimated_len)
enlargeStringInfo(out, estimated_len);
 
if (!(in->header & JSONPATH_LAX))
-   appendBinaryStringInfo(out, "strict ", 7);
+   appendStringInfoString(out, "strict ");
 
jspInit(, in);
printJsonPathItem(out, , false, true);
@@ -510,9 +510,9 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool 
inKey,
break;
case jpiBool:
if (jspGetBool(v))
-   appendBinaryStringInfo(buf, "true", 4);
+   appendStringInfoString(buf, "true");
else
-   appendBinaryStringInfo(buf, "false", 5);
+   appendStringInfoString(buf, "false");
break;
case jpiAnd:
case jpiOr:
@@ -553,13 +553,13 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool 
inKey,
  
operationPriority(elem.type) <=
  
operationPriority(v->type));
 
-   appendBinaryStringInfo(buf, " like_regex ", 12);
+   appendStringInfoString(buf, " like_regex ");
 
escape_json(buf, v->content.like_regex.pattern);
 
if (v->content.like_regex.flags)
{
-   appendBinaryStringInfo(buf, " flag \"", 7);
+   appendStringInfoString(buf, " flag \"");
 
if (v->content.like_regex.flags & 
JSP_REGEX_ICASE)
appendStringInfoChar(buf, 'i');
@@ -591,13 +591,13 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool 
inKey,
appendStringInfoChar(buf, ')');
break;
case jpiFilter:
-   appendBinaryStringInfo(buf, "?(", 2);
+   appendStringInfoString(buf, "?(");
jspGetArg(v, );
printJsonPathItem(buf, , false, false);
appendStringInfoChar(buf, ')');
break;
case jpiNot:
-   appendBinaryStringInfo(buf, "!(", 2);
+   appendStringInfoString(buf, "!(");
jspGetArg(v, );
printJsonPathItem(buf, , false, false);
appendStringInfoChar(buf, ')');
@@ -606,10 +606,10 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool 
inKey,
appendStringInfoChar(buf, '(');
jspGetArg(v, );
printJsonPathItem(buf, , false, false);
-   appendBinaryStringInfo(buf, ") is unknown", 12);
+   appendStringInfoString(buf, ") is unknown");
break;
case jpiExists:
-   appendBinaryStringInfo(buf, "exists (", 8);
+   appendStringInfoString(buf, "exists (");
jspGetArg(v, );
printJsonPathItem(buf, , false, false);
  

Re: appendBinaryStringInfo stuff

2022-12-22 Thread John Naylor
On Thu, Dec 22, 2022 at 4:19 PM David Rowley  wrote:
>
> Test 1 (from earlier)
>
> master + escape_json using appendStringInfoCharMacro
> $ pgbench -n -T 60 -f bench.sql -M prepared postgres | grep latency
> latency average = 1.807 ms
> latency average = 1.800 ms
> latency average = 1.812 ms (~4.8% faster than master)

> 23.05%  postgres  [.] pg_utf_mblen

I get about 20% improvement by adding an ascii fast path in
pg_mbstrlen_with_len, which I think would work with all encodings we
support:

@@ -1064,7 +1064,12 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)

while (limit > 0 && *mbstr)
{
-   int l = pg_mblen(mbstr);
+   int l;
+
+   if (!IS_HIGHBIT_SET(*mbstr))
+   l = 1;
+   else
+   l = pg_mblen(mbstr);

--
John Naylor
EDB: http://www.enterprisedb.com


Re: appendBinaryStringInfo stuff

2022-12-22 Thread David Rowley
On Thu, 22 Dec 2022 at 20:56, Tom Lane  wrote:
>
> David Rowley  writes:
> >   22.57%  postgres  [.] escape_json
>
> Hmm ... shouldn't we do something like
>
> -appendStringInfoString(buf, "\\b");
> +appendStringInfoCharMacro(buf, '\\');
> +appendStringInfoCharMacro(buf, 'b');
>
> and so on in that function?  I'm not convinced that this one
> hotspot justifies inlining appendStringInfoString everywhere.

It improves things slightly:

Test 1 (from earlier)

master + escape_json using appendStringInfoCharMacro
$ pgbench -n -T 60 -f bench.sql -M prepared postgres | grep latency
latency average = 1.807 ms
latency average = 1.800 ms
latency average = 1.812 ms (~4.8% faster than master)

  23.05%  postgres  [.] pg_utf_mblen
  22.55%  postgres  [.] escape_json
   8.58%  postgres  [.] JsonbIteratorNext.part.0
   6.80%  postgres  [.] AllocSetAlloc
   4.23%  postgres  [.] pg_mbstrlen_with_len
   3.88%  postgres  [.] JsonbToCStringWorker
   3.79%  postgres  [.] fillJsonbValue
   3.18%  postgres  [.] appendBinaryStringInfo
   2.43%  postgres  [.] enlargeStringInfo
   2.02%  postgres  [.] palloc
   1.61%  postgres  [.] jsonb_put_escaped_value

David




Re: appendBinaryStringInfo stuff

2022-12-21 Thread Tom Lane
David Rowley  writes:
>   22.57%  postgres  [.] escape_json

Hmm ... shouldn't we do something like

-appendStringInfoString(buf, "\\b");
+appendStringInfoCharMacro(buf, '\\');
+appendStringInfoCharMacro(buf, 'b');

and so on in that function?  I'm not convinced that this one
hotspot justifies inlining appendStringInfoString everywhere.

regards, tom lane




Re: appendBinaryStringInfo stuff

2022-12-21 Thread David Rowley
On Tue, 20 Dec 2022 at 11:26, David Rowley  wrote:
> It would be good to see some measurements to find out how much adding
> the strlen calls back in would cost us.

I tried this out.  I'm not pretending I found the best test which
highlights how much the performance will change in a real-world case.
I just wanted to try to get an indication of if changing jsonb's
output function to make more use of appendStringInfoString instead of
appendBinaryStringInfo is likely to affect performance.

Also, in test 2, I picked a use case that makes quite a bit of use of
appendStringInfoString already and checked if inlining that function
would help improve things.  I imagine test 2 really is not
bottlenecked on appendStringInfoString enough to get a true idea of
how much inlining appendStringInfoString could really help (spoiler,
it helps quite a bit)

Test 1: See if using appendStringInfoString instead of
appendBinaryStringInfo hinders jsonb output performance.

setup:
create table jb (j jsonb);
insert into jb select row_to_json(pg_class) from pg_class;
vacuum freeze analyze jb;

bench.sql:
select sum(length(j::text)) from jb;

master (@3f28bd73):
$ pgbench -n -T 60 -f bench.sql -M prepared postgres | grep latency
latency average = 1.896 ms
latency average = 1.885 ms
latency average = 1.899 ms

  22.57%  postgres  [.] escape_json
  21.83%  postgres  [.] pg_utf_mblen
   9.23%  postgres  [.] JsonbIteratorNext.part.0
   7.12%  postgres  [.] AllocSetAlloc
   4.07%  postgres  [.] pg_mbstrlen_with_len
   3.71%  postgres  [.] JsonbToCStringWorker
   3.70%  postgres  [.] fillJsonbValue
   3.17%  postgres  [.] appendBinaryStringInfo
   2.95%  postgres  [.] enlargeStringInfo
   2.09%  postgres  [.] jsonb_put_escaped_value
   1.89%  postgres  [.] palloc

master + 0001-Use-appendStringInfoString-instead-of-appendBinarySt.patch

$ pgbench -n -T 60 -f bench.sql -M prepared postgres | grep latency
latency average = 1.912 ms
latency average = 1.912 ms
latency average = 1.912 ms (~1% slower)

  22.38%  postgres  [.] escape_json
  21.98%  postgres  [.] pg_utf_mblen
   9.07%  postgres  [.] JsonbIteratorNext.part.0
   5.93%  postgres  [.] AllocSetAlloc
   4.11%  postgres  [.] pg_mbstrlen_with_len
   3.87%  postgres  [.] fillJsonbValue
   3.66%  postgres  [.] JsonbToCStringWorker
   2.28%  postgres  [.] enlargeStringInfo
   2.15%  postgres  [.] appendStringInfoString
   1.98%  postgres  [.] jsonb_put_escaped_value
   1.92%  postgres  [.] palloc
   1.58%  postgres  [.] appendBinaryStringInfo
   1.42%  postgres  [.] pnstrdup

Test 2: Test if inlining appendStringInfoString helps

bench.sql:
select sum(length(pg_get_ruledef(oid))) from pg_rewrite;

master (@3f28bd73):
$ pgbench -n -T 60 -f bench.sql postgres | grep latency
latency average = 16.355 ms
latency average = 16.290 ms
latency average = 16.303 ms

static inline appendStringInfoString
$ pgbench -n -T 60 -f bench.sql postgres | grep latency
latency average = 15.690 ms
latency average = 15.575 ms
latency average = 15.604 ms (~4.4% faster)

David




Re: appendBinaryStringInfo stuff

2022-12-20 Thread Robert Haas
On Tue, Dec 20, 2022 at 10:47 AM Andrew Dunstan  wrote:
> There are 5 uses in the jsonb code where the length param is a compile
> time constant:
>
> andrew@ub22:adt $ grep appendBinary.*[0-9] jsonb*
> jsonb.c:appendBinaryStringInfo(out, "null", 4);
> jsonb.c:appendBinaryStringInfo(out, "true", 4);
> jsonb.c:appendBinaryStringInfo(out, "false", 5);
> jsonb.c:appendBinaryStringInfo(out, ": ", 2);
> jsonb.c:appendBinaryStringInfo(out, "", 4);
>
> None of these really bother me much, TBH. In fact the last one is
> arguably nicer because it tells you without counting how many spaces
> there are.

+1. There are certainly cases where this kind of style can create
confusion, but I have a hard time putting any of these instances into
that category. It's obvious at a glance that null is 4 bytes, false is
5, etc.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: appendBinaryStringInfo stuff

2022-12-20 Thread David Rowley
On Wed, 21 Dec 2022 at 04:47, Andrew Dunstan  wrote:
> jsonb.c:appendBinaryStringInfo(out, "", 4);
>
> None of these really bother me much, TBH. In fact the last one is
> arguably nicer because it tells you without counting how many spaces
> there are.

appendStringInfoSpaces() might be even better.

David




Re: appendBinaryStringInfo stuff

2022-12-20 Thread Andres Freund
Hi,

On 2022-12-19 21:29:10 +1300, David Rowley wrote:
> On Mon, 19 Dec 2022 at 21:12, Andres Freund  wrote:
> > Perhaps we should make appendStringInfoString() a static inline function
> > - most compilers can compute strlen() of a constant string at compile
> > time.
> 
> I had wondered about that, but last time I looked into it there was a
> small increase in the size of the binary from doing it. Perhaps it
> does not matter, but it's something to consider.

I'd not be too worried about that in this case.


> Re-thinking, I wonder if we could use the same macro trick used in
> ereport_domain(). Something like:
> 
> #ifdef HAVE__BUILTIN_CONSTANT_P
> #define appendStringInfoString(str, s) \
> __builtin_constant_p(s) ? \
> appendBinaryStringInfo(str, s, sizeof(s) - 1) : \
> appendStringInfoStringInternal(str, s)
> #else
> #define appendStringInfoString(str, s) \
> appendStringInfoStringInternal(str, s)
> #endif
> 
> and rename the existing function to appendStringInfoStringInternal.
> 
> Because __builtin_constant_p is a known compile-time constant, it
> should be folded to just call the corresponding function during
> compilation.

Several compilers can optimize away repeated strlen() calls, even if the
string isn't a compile-time constant. So I'm not really convinced that
tying inlining-strlen to __builtin_constant_p() is a good ida.

> Just looking at the binary sizes for postgres. I see:
> 
> unpatched = 9972128 bytes
> inline function = 9990064 bytes

That seems acceptable to me.


> macro trick = 9984968 bytes
>
> I'm currently not sure why the macro trick increases the binary at
> all. I understand why the inline function does.

I think Tom's explanation is on point.


I've in the past looked at stringinfo.c being the bottleneck in a bunch
of places and concluded that we really need to remove the function call
in the happy path entirely - we should have an enlargeStringInfo() that
we can call externally iff needed and then implement the rest of
appendBinaryStringInfo() etc in an inline function.  That allows the
compiler to e.g. optimize out the repeated maintenance of the \0 write
etc.

Greetings,

Andres Freund




Re: appendBinaryStringInfo stuff

2022-12-20 Thread Andrew Dunstan


On 2022-12-19 Mo 17:48, David Rowley wrote:
> On Tue, 20 Dec 2022 at 11:42, Tom Lane  wrote:
>> I think Peter is entirely right to question whether *this* type's
>> output function is performance-critical.  Who's got large tables with
>> jsonpath columns?  It seems to me the type would mostly only exist
>> as constants within queries.
> The patch touches code in the path of jsonb's output function too. I
> don't think you could claim the same for that.
>

I agree that some of the uses in the jsonpath code could reasonably just
be converted to use appendStringInfoString()

There are 5 uses in the jsonb code where the length param is a compile
time constant:

andrew@ub22:adt $ grep appendBinary.*[0-9] jsonb*
jsonb.c:            appendBinaryStringInfo(out, "null", 4);
jsonb.c:                appendBinaryStringInfo(out, "true", 4);
jsonb.c:                appendBinaryStringInfo(out, "false", 5);
jsonb.c:                appendBinaryStringInfo(out, ": ", 2);
jsonb.c:            appendBinaryStringInfo(out, "    ", 4);

None of these really bother me much, TBH. In fact the last one is
arguably nicer because it tells you without counting how many spaces
there are.

Changing the type of the second argument to appendBinaryStringInfo to
void* seems reasonable.


cheers


andrew

--
Andrew Dunstan
EDB: https://www.enterprisedb.com





Re: appendBinaryStringInfo stuff

2022-12-19 Thread David Rowley
On Tue, 20 Dec 2022 at 11:42, Tom Lane  wrote:
> I think Peter is entirely right to question whether *this* type's
> output function is performance-critical.  Who's got large tables with
> jsonpath columns?  It seems to me the type would mostly only exist
> as constants within queries.

The patch touches code in the path of jsonb's output function too. I
don't think you could claim the same for that.

David




Re: appendBinaryStringInfo stuff

2022-12-19 Thread Tom Lane
David Rowley  writes:
> On Tue, 20 Dec 2022 at 09:23, Peter Eisentraut
>  wrote:
>> AFAICT, the code in question is for the text output of the jsonpath
>> type, which is used ... for barely anything?

> I think the performance of a type's output function is quite critical.

I think Peter is entirely right to question whether *this* type's
output function is performance-critical.  Who's got large tables with
jsonpath columns?  It seems to me the type would mostly only exist
as constants within queries.

regards, tom lane




Re: appendBinaryStringInfo stuff

2022-12-19 Thread David Rowley
On Tue, 20 Dec 2022 at 09:23, Peter Eisentraut
 wrote:
> AFAICT, the code in question is for the text output of the jsonpath
> type, which is used ... for barely anything?

I think the performance of a type's output function is quite critical.
I've seen huge performance gains in COPY TO performance from
optimising output functions in the past (see dad75eb4a and aa2387e2f).
It would be good to see some measurements to find out how much adding
the strlen calls back in would cost us. If we're unable to measure the
change, then maybe the cleanup patch would be nice. If it's going to
slow COPY TO down 10-20%, then we need to leave this or consider the
inline function mentioned by Andres or the macro trick mentioned by
me.

David




Re: appendBinaryStringInfo stuff

2022-12-19 Thread Peter Eisentraut

On 19.12.22 09:12, Andres Freund wrote:

There are a bunch of places in the json code that use
appendBinaryStringInfo() where appendStringInfoString() could be used, e.g.,

 appendBinaryStringInfo(buf, ".size()", 7);

Is there a reason for this?  Are we that stretched for performance?

strlen() isn't that cheap, so it doesn't generally seem unreasonable. I
don't think we should add the strlen overhead in places that can
conceivably be a bottleneck - and some of the jsonb code clearly can be
that.


AFAICT, the code in question is for the text output of the jsonpath 
type, which is used ... for barely anything?






Re: appendBinaryStringInfo stuff

2022-12-19 Thread Tom Lane
David Rowley  writes:
> I'm currently not sure why the macro trick increases the binary at
> all. I understand why the inline function does.

In the places where it changes the code at all, you're replacing

appendStringInfoString(buf, s);

with

appendBinaryStringInfo(buf, s, n);

Even if n is a constant, the latter surely requires more instructions
per call site.

Whether this is a win seems to depend on how many of these are
performance-critical.

regards, tom lane




Re: appendBinaryStringInfo stuff

2022-12-19 Thread David Rowley
On Mon, 19 Dec 2022 at 21:12, Andres Freund  wrote:
> Perhaps we should make appendStringInfoString() a static inline function
> - most compilers can compute strlen() of a constant string at compile
> time.

I had wondered about that, but last time I looked into it there was a
small increase in the size of the binary from doing it. Perhaps it
does not matter, but it's something to consider.

Re-thinking, I wonder if we could use the same macro trick used in
ereport_domain(). Something like:

#ifdef HAVE__BUILTIN_CONSTANT_P
#define appendStringInfoString(str, s) \
__builtin_constant_p(s) ? \
appendBinaryStringInfo(str, s, sizeof(s) - 1) : \
appendStringInfoStringInternal(str, s)
#else
#define appendStringInfoString(str, s) \
appendStringInfoStringInternal(str, s)
#endif

and rename the existing function to appendStringInfoStringInternal.

Because __builtin_constant_p is a known compile-time constant, it
should be folded to just call the corresponding function during
compilation.

Just looking at the binary sizes for postgres. I see:

unpatched = 9972128 bytes
inline function = 9990064 bytes
macro trick = 9984968 bytes

I'm currently not sure why the macro trick increases the binary at
all. I understand why the inline function does.

David




Re: appendBinaryStringInfo stuff

2022-12-19 Thread Andres Freund
Hi,

On 2022-12-19 07:13:40 +0100, Peter Eisentraut wrote:
> I found a couple of adjacent weird things:
> 
> There are a bunch of places in the json code that use
> appendBinaryStringInfo() where appendStringInfoString() could be used, e.g.,
> 
> appendBinaryStringInfo(buf, ".size()", 7);
> 
> Is there a reason for this?  Are we that stretched for performance?

strlen() isn't that cheap, so it doesn't generally seem unreasonable. I
don't think we should add the strlen overhead in places that can
conceivably be a bottleneck - and some of the jsonb code clearly can be
that.


> I find this kind of code very fragile.

But this is obviously an issue.

Perhaps we should make appendStringInfoString() a static inline function
- most compilers can compute strlen() of a constant string at compile
time.

Greetings,

Andres Freund




appendBinaryStringInfo stuff

2022-12-18 Thread Peter Eisentraut

I found a couple of adjacent weird things:

There are a bunch of places in the json code that use 
appendBinaryStringInfo() where appendStringInfoString() could be used, e.g.,


appendBinaryStringInfo(buf, ".size()", 7);

Is there a reason for this?  Are we that stretched for performance?  I 
find this kind of code very fragile.


Also, the argument type of appendBinaryStringInfo() is char *.  There is 
some code that uses this function to assemble some kind of packed binary 
layout, which requires a bunch of casts because of this.  I think 
functions taking binary data plus length should take void * instead, 
like memcpy() for example.


Attached are two patches that illustrate these issues and show proposed 
changes.From 15b103edec2f89a63596d112fd62cfc2367576bf Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Mon, 19 Dec 2022 07:01:27 +0100
Subject: [PATCH 1/2] Use appendStringInfoString instead of
 appendBinaryStringInfo where possible

---
 src/backend/utils/adt/jsonb.c| 10 
 src/backend/utils/adt/jsonpath.c | 42 
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c
index 7c1e5e6144..c153c87ba6 100644
--- a/src/backend/utils/adt/jsonb.c
+++ b/src/backend/utils/adt/jsonb.c
@@ -361,7 +361,7 @@ jsonb_put_escaped_value(StringInfo out, JsonbValue 
*scalarVal)
switch (scalarVal->type)
{
case jbvNull:
-   appendBinaryStringInfo(out, "null", 4);
+   appendStringInfoString(out, "null");
break;
case jbvString:
escape_json(out, pnstrdup(scalarVal->val.string.val, 
scalarVal->val.string.len));
@@ -373,9 +373,9 @@ jsonb_put_escaped_value(StringInfo out, JsonbValue 
*scalarVal)
break;
case jbvBool:
if (scalarVal->val.boolean)
-   appendBinaryStringInfo(out, "true", 4);
+   appendStringInfoString(out, "true");
else
-   appendBinaryStringInfo(out, "false", 5);
+   appendStringInfoString(out, "false");
break;
default:
elog(ERROR, "unknown jsonb scalar type");
@@ -565,7 +565,7 @@ JsonbToCStringWorker(StringInfo out, JsonbContainer *in, 
int estimated_len, bool
 
/* json rules guarantee this is a string */
jsonb_put_escaped_value(out, );
-   appendBinaryStringInfo(out, ": ", 2);
+   appendStringInfoString(out, ": ");
 
type = JsonbIteratorNext(, , false);
if (type == WJB_VALUE)
@@ -630,7 +630,7 @@ add_indent(StringInfo out, bool indent, int level)
 
appendStringInfoCharMacro(out, '\n');
for (i = 0; i < level; i++)
-   appendBinaryStringInfo(out, "", 4);
+   appendStringInfoString(out, "");
}
 }
 
diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c
index 91af030095..a39eab9c20 100644
--- a/src/backend/utils/adt/jsonpath.c
+++ b/src/backend/utils/adt/jsonpath.c
@@ -213,7 +213,7 @@ jsonPathToCstring(StringInfo out, JsonPath *in, int 
estimated_len)
enlargeStringInfo(out, estimated_len);
 
if (!(in->header & JSONPATH_LAX))
-   appendBinaryStringInfo(out, "strict ", 7);
+   appendStringInfoString(out, "strict ");
 
jspInit(, in);
printJsonPathItem(out, , false, true);
@@ -510,9 +510,9 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool 
inKey,
break;
case jpiBool:
if (jspGetBool(v))
-   appendBinaryStringInfo(buf, "true", 4);
+   appendStringInfoString(buf, "true");
else
-   appendBinaryStringInfo(buf, "false", 5);
+   appendStringInfoString(buf, "false");
break;
case jpiAnd:
case jpiOr:
@@ -553,13 +553,13 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool 
inKey,
  
operationPriority(elem.type) <=
  
operationPriority(v->type));
 
-   appendBinaryStringInfo(buf, " like_regex ", 12);
+   appendStringInfoString(buf, " like_regex ");
 
escape_json(buf, v->content.like_regex.pattern);
 
if (v->content.like_regex.flags)
{
-