On Fri, Mar 29, 2019 at 08:25:10PM +0100, Tomas Vondra wrote:
On Fri, Mar 29, 2019 at 12:06:26PM -0700, Peter Geoghegan wrote:
On Fri, Mar 29, 2019 at 11:20 AM Tomas Vondra
<tomas.von...@2ndquadrant.com> wrote:
I've pushed a fix for this. The short version is that the serialized
representation was not respecting memory alignment requirements, which was
causing issues in machines sensitive to this (ia64, sparc, hppa). It's a
blind attempt, as I currently don't have access to any such machine.
Looks like gharial still has problems:
https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=gharial&dt=2019-03-29%2018%3A30%3A47
Yeah, I saw that :-(
Unfortunately fixing this without access to any ia64/sparc/hppa machine
will be hard. The only thing I can do is wait for gaur/snapper to report
the failure with a backtrace.
OK, so the second memory-alignment fix I pushed on Saturday seems to
have worked. There's one aspect of it that I don't like though - the
MCV is first serialized into a buffer, which is then copied into the
varlena buffer. Deserialization works in reverse, i.e. it copies the
data from the varlena, then accessed the copy.
For the emergency fix that seemed acceptable, as it somewhat reduced the
amount of modified code, but I don't want to leave it like that because
of efficiency and memory consumption. So barring objections I'll push
the attached patch, which ensures proper memory alignment within the raw
varlena buffer. Thus the copies are not needed.
(The second part of the patch merely adds some alignment-enforcing
asserts - I don't plan to commit that, but if someone plans decides to
test this, it might be useful.)
regards
--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 65c5613aeec46c425c6636d181c91631abfab05d Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Tue, 2 Apr 2019 23:29:35 +0200
Subject: [PATCH 1/2] rework
---
src/backend/statistics/mcv.c | 114 +++++++++++++++++++++++------------
1 file changed, 75 insertions(+), 39 deletions(-)
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index d1af5d84d0..633f73e1be 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -491,7 +491,6 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats
**stats)
char *item = palloc0(itemsize);
/* serialized items (indexes into arrays, etc.) */
- bytea *output;
char *raw;
char *ptr;
@@ -625,15 +624,25 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats
**stats)
* Now we can finally compute how much space we'll actually need for the
* whole serialized MCV list (varlena header, MCV header, dimension info
* for each attribute, deduplicated values and items).
+ *
+ * The header fields are copied one by one, so that we don't need any
+ * explicit alignment (we copy them while deserializing). All fields
+ * after this need to be properly aligned, for direct access.
*/
- total_length = offsetof(MCVList, items)
- + MAXALIGN(ndims * sizeof(DimensionInfo));
+ total_length = MAXALIGN(VARHDRSZ + (3 * sizeof(uint32))
+ + sizeof(AttrNumber) + (ndims * sizeof(Oid)));
+
+ /* dimension info */
+ total_length += MAXALIGN(ndims * sizeof(DimensionInfo));
/* add space for the arrays of deduplicated values */
for (i = 0; i < ndims; i++)
total_length += MAXALIGN(info[i].nbytes);
- /* and finally the items (no additional alignment needed) */
+ /*
+ * And finally the items (no additional alignment needed, we start
+ * at proper alignment and the itemsize formula uses MAXALIGN)
+ */
total_length += mcvlist->nitems * itemsize;
/*
@@ -641,11 +650,27 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats
**stats)
* so we set them to zero to make the result more compressible).
*/
raw = palloc0(total_length);
- ptr = raw;
+ SET_VARSIZE(raw, total_length);
+ ptr = VARDATA(raw);
+
+ /* copy the MCV list header fields, one by one */
+ memcpy(ptr, &mcvlist->magic, sizeof(uint32));
+ ptr += sizeof(uint32);
+
+ memcpy(ptr, &mcvlist->type, sizeof(uint32));
+ ptr += sizeof(uint32);
- /* copy the MCV list header */
- memcpy(ptr, mcvlist, offsetof(MCVList, items));
- ptr += offsetof(MCVList, items);
+ memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
+ ptr += sizeof(uint32);
+
+ memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
+ ptr += sizeof(AttrNumber);
+
+ memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
+ ptr += (sizeof(Oid) * ndims);
+
+ /* the header may not be exactly aligned, so make sure it is */
+ ptr = raw + MAXALIGN(ptr - raw);
/* store information about the attributes */
memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
@@ -761,14 +786,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats
**stats)
pfree(values);
pfree(counts);
- output = (bytea *) palloc(VARHDRSZ + total_length);
- SET_VARSIZE(output, VARHDRSZ + total_length);
-
- memcpy(VARDATA_ANY(output), raw, total_length);
-
- pfree(raw);
-
- return output;
+ return (bytea *) raw;
}
/*
@@ -789,8 +807,7 @@ statext_mcv_deserialize(bytea *data)
char *ptr;
int ndims,
- nitems,
- itemsize;
+ nitems;
DimensionInfo *info = NULL;
/* local allocation buffer (used only for deserialization) */
@@ -808,26 +825,42 @@ statext_mcv_deserialize(bytea *data)
if (data == NULL)
return NULL;
+#define MinSizeOfMCVList \
+ (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
+
+#define SizeOfMCVList(ndims,nitems) \
+ (MAXALIGN(MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
+ MAXALIGN((ndims) * sizeof(DimensionInfo)) + \
+ MAXALIGN((nitems) * ITEM_SIZE(ndims)))
+
/*
* We can't possibly deserialize a MCV list if there's not even a
complete
- * header.
+ * header. We need an explicit formula here, because we serialize the
+ * header fields one by one, so we need to ignore struct alignment.
*/
- if (VARSIZE_ANY_EXHDR(data) < offsetof(MCVList, items))
+ if (VARSIZE_ANY(data) < MinSizeOfMCVList)
elog(ERROR, "invalid MCV size %zd (expected at least %zu)",
- VARSIZE_ANY_EXHDR(data), offsetof(MCVList, items));
+ VARSIZE_ANY(data), MinSizeOfMCVList);
/* read the MCV list header */
mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
- /* initialize pointer to the data part (skip the varlena header) */
- raw = palloc(VARSIZE_ANY_EXHDR(data));
- ptr = raw;
-
- memcpy(raw, VARDATA_ANY(data), VARSIZE_ANY_EXHDR(data));
+ /* pointer to the data part (skip the varlena header) */
+ ptr = VARDATA_ANY(data);
+ raw = (char *) data;
/* get the header and perform further sanity checks */
- memcpy(mcvlist, ptr, offsetof(MCVList, items));
- ptr += offsetof(MCVList, items);
+ memcpy(&mcvlist->magic, ptr, sizeof(uint32));
+ ptr += sizeof(uint32);
+
+ memcpy(&mcvlist->type, ptr, sizeof(uint32));
+ ptr += sizeof(uint32);
+
+ memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
+ ptr += sizeof(uint32);
+
+ memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
+ ptr += sizeof(AttrNumber);
if (mcvlist->magic != STATS_MCV_MAGIC)
elog(ERROR, "invalid MCV magic %u (expected %u)",
@@ -852,25 +885,29 @@ statext_mcv_deserialize(bytea *data)
nitems = mcvlist->nitems;
ndims = mcvlist->ndimensions;
- itemsize = ITEM_SIZE(ndims);
/*
* Check amount of data including DimensionInfo for all dimensions and
* also the serialized items (including uint16 indexes). Also, walk
* through the dimension information and add it to the sum.
*/
- expected_size = offsetof(MCVList, items) +
- ndims * sizeof(DimensionInfo) +
- (nitems * itemsize);
+ expected_size = SizeOfMCVList(ndims, nitems);
/*
* Check that we have at least the dimension and info records, along
with
* the items. We don't know the size of the serialized values yet. We
need
* to do this check first, before accessing the dimension info.
*/
- if (VARSIZE_ANY_EXHDR(data) < expected_size)
+ if (VARSIZE_ANY(data) < expected_size)
elog(ERROR, "invalid MCV size %zd (expected %zu)",
- VARSIZE_ANY_EXHDR(data), expected_size);
+ VARSIZE_ANY(data), expected_size);
+
+ /* Now copy the array of type Oids. */
+ memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
+ ptr += (sizeof(Oid) * ndims);
+
+ /* ensure alignment of the pointer (after the header fields) */
+ ptr = raw + MAXALIGN(ptr - raw);
/* Now it's safe to access the dimension info. */
info = (DimensionInfo *) ptr;
@@ -894,9 +931,9 @@ statext_mcv_deserialize(bytea *data)
* (header, dimension info. items and deduplicated data). So do the
final
* check on size.
*/
- if (VARSIZE_ANY_EXHDR(data) != expected_size)
+ if (VARSIZE_ANY(data) != expected_size)
elog(ERROR, "invalid MCV size %zd (expected %zu)",
- VARSIZE_ANY_EXHDR(data), expected_size);
+ VARSIZE_ANY(data), expected_size);
/*
* We need an array of Datum values for each dimension, so that we can
@@ -1063,18 +1100,17 @@ statext_mcv_deserialize(bytea *data)
ptr += ITEM_SIZE(ndims);
/* check we're not overflowing the input */
- Assert(ptr <= (char *) raw + VARSIZE_ANY_EXHDR(data));
+ Assert(ptr <= (char *) raw + VARSIZE_ANY(data));
}
/* check that we processed all the data */
- Assert(ptr == raw + VARSIZE_ANY_EXHDR(data));
+ Assert(ptr == raw + VARSIZE_ANY(data));
/* release the buffers used for mapping */
for (dim = 0; dim < ndims; dim++)
pfree(map[dim]);
pfree(map);
- pfree(raw);
return mcvlist;
}
--
2.20.1
>From e3d36cc0a80ff0f85499718db7eb8adfedcc87b3 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Wed, 3 Apr 2019 00:05:52 +0200
Subject: [PATCH 2/2] extra memory-alignment asserts
---
src/backend/statistics/mcv.c | 68 ++++++++++++++++++++++++++++++------
1 file changed, 58 insertions(+), 10 deletions(-)
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 633f73e1be..8f0651f9fe 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -439,6 +439,11 @@ statext_mcv_load(Oid mvoid)
return result;
}
+static void
+AssertIsAligned(const void *base, const void *ptr)
+{
+ Assert(((char *) ptr - (char *) base) == MAXALIGN((char *) ptr - (char
*) base));
+}
/*
* statext_mcv_serialize
@@ -672,16 +677,23 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats
**stats)
/* the header may not be exactly aligned, so make sure it is */
ptr = raw + MAXALIGN(ptr - raw);
+ AssertIsAligned(raw, ptr);
+
/* store information about the attributes */
memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
ptr += MAXALIGN(sizeof(DimensionInfo) * ndims);
+ AssertIsAligned(raw, ptr);
+
/* Copy the deduplicated values for all attributes to the output. */
for (dim = 0; dim < ndims; dim++)
{
/* remember the starting point for Asserts later */
char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
+ /* proper alignment before earch dimension */
+ AssertIsAligned(raw, ptr);
+
for (i = 0; i < info[dim].nvalues; i++)
{
Datum value = values[dim][i];
@@ -714,6 +726,9 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats
**stats)
{
int len =
VARSIZE_ANY(value);
+ /* proper alignment before varlena values */
+ AssertIsAligned(raw, ptr);
+
memcpy(ptr, DatumGetPointer(value), len);
ptr += MAXALIGN(len);
}
@@ -721,6 +736,9 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats
**stats)
{
Size len =
strlen(DatumGetCString(value)) + 1; /* terminator */
+ /* proper alignment before cstring values */
+ AssertIsAligned(raw, ptr);
+
memcpy(ptr, DatumGetCString(value), len);
ptr += MAXALIGN(len);
}
@@ -773,6 +791,9 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats
**stats)
memcpy(ITEM_FREQUENCY(item, ndims), &mcvitem->frequency,
sizeof(double));
memcpy(ITEM_BASE_FREQUENCY(item, ndims),
&mcvitem->base_frequency, sizeof(double));
+ /* each serialized item is aligned */
+ AssertIsAligned(raw, ptr);
+
/* copy the serialized item into the array */
memcpy(ptr, item, itemsize);
@@ -886,18 +907,13 @@ statext_mcv_deserialize(bytea *data)
nitems = mcvlist->nitems;
ndims = mcvlist->ndimensions;
- /*
- * Check amount of data including DimensionInfo for all dimensions and
- * also the serialized items (including uint16 indexes). Also, walk
- * through the dimension information and add it to the sum.
- */
- expected_size = SizeOfMCVList(ndims, nitems);
-
/*
* Check that we have at least the dimension and info records, along
with
* the items. We don't know the size of the serialized values yet. We
need
* to do this check first, before accessing the dimension info.
*/
+ expected_size = SizeOfMCVList(ndims, nitems);
+
if (VARSIZE_ANY(data) < expected_size)
elog(ERROR, "invalid MCV size %zd (expected %zu)",
VARSIZE_ANY(data), expected_size);
@@ -909,6 +925,9 @@ statext_mcv_deserialize(bytea *data)
/* ensure alignment of the pointer (after the header fields) */
ptr = raw + MAXALIGN(ptr - raw);
+ /* proper alignment before serialized dimension info */
+ AssertIsAligned(raw, ptr);
+
/* Now it's safe to access the dimension info. */
info = (DimensionInfo *) ptr;
ptr += MAXALIGN(ndims * sizeof(DimensionInfo));
@@ -982,6 +1001,11 @@ statext_mcv_deserialize(bytea *data)
dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
+ /* proper alignment of pointers used to allocate MCV parts */
+ AssertIsAligned((char *) mcvlist, valuesptr);
+ AssertIsAligned((char *) mcvlist, isnullptr);
+ AssertIsAligned((char *) mcvlist, dataptr);
+
/*
* Build mapping (index => value) for translating the serialized data
into
* the in-memory representation.
@@ -991,6 +1015,9 @@ statext_mcv_deserialize(bytea *data)
/* remember start position in the input array */
char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
+ /* proper alignment before serialized values in input buffer */
+ AssertIsAligned(raw, ptr);
+
if (info[dim].typbyval)
{
/* for by-val types we simply copy data into the
mapping */
@@ -1029,7 +1056,13 @@ statext_mcv_deserialize(bytea *data)
/* varlena */
for (i = 0; i < info[dim].nvalues; i++)
{
- Size len = VARSIZE_ANY(ptr);
+ Size len;
+
+ /* proper alignment before serialized
varlena value */
+ AssertIsAligned(raw, ptr);
+ AssertIsAligned((char *) mcvlist,
dataptr);
+
+ len = VARSIZE_ANY(ptr);
memcpy(dataptr, ptr, len);
ptr += MAXALIGN(len);
@@ -1044,7 +1077,13 @@ statext_mcv_deserialize(bytea *data)
/* cstring */
for (i = 0; i < info[dim].nvalues; i++)
{
- Size len = (strlen(ptr) +
1); /* don't forget the \0 */
+ Size len;
+
+ /* proper alignment before serialized
cstring value */
+ AssertIsAligned(raw, ptr);
+ AssertIsAligned((char *) mcvlist,
dataptr);
+
+ len = (strlen(ptr) + 1); /*
don't forget the \0 */
memcpy(dataptr, ptr, len);
ptr += MAXALIGN(len);
@@ -1069,7 +1108,10 @@ statext_mcv_deserialize(bytea *data)
ptr = raw + MAXALIGN(ptr - raw);
}
- /* we should have also filled the MCV list exactly */
+ /*
+ * We should have also filled the MCV list exactly (data is allocated
+ * at the very end of the allocated buffer).
+ */
Assert(dataptr == ((char *) mcvlist + mcvlen));
/* deserialize the MCV items and translate the indexes to Datums */
@@ -1084,6 +1126,12 @@ statext_mcv_deserialize(bytea *data)
item->isnull = (bool *) isnullptr;
isnullptr += MAXALIGN(sizeof(bool) * ndims);
+ /* proper alignment of serialized MCV items */
+ AssertIsAligned(raw, ptr);
+
+ /* proper alignment of deserialized values/isnull arrays */
+ AssertIsAligned((char *) mcvlist, valuesptr);
+ AssertIsAligned((char *) mcvlist, isnullptr);
/* just point to the right place */
indexes = ITEM_INDEXES(ptr);
--
2.20.1