Hi, Andres Thanks a lot for the review!
> Why are we keeping that list / the "batch" allocation pattern? It
> doesn't actually seem useful to me after these changes. Given that we
> don't shrink hash-tables, we could just use the hash-table memory for
> this, no?
I don't think we can do that. According to comments:
```
* NOTE: once allocated, TabStatusArray structures are never moved or deleted
[...]
* This allows relcache pgstat_info pointers to be treated as long-lived data,
* avoiding repeated searches in pgstat_initstats() when a relation is
* repeatedly opened during a transaction.
```
It is my understanding that dynahash can't give same guarantees.
> 'faster ... lookup' doesn't strike me as a useful comment, because it's
> only useful in relation of the current code to the new code.
Agree, fixed to 'O(1) ... lookup'.
> It's not really freed...
Right, it's actually zeroed. Fixed.
> Arguably it'd be better to HASH_ENTER directly here, instead of doing
> two lookups.
Good point. Fixed.
> Think it'd be better to move the hash creation into its own function.
Agree, fixed.
On Mon, Mar 06, 2017 at 12:01:50PM -0800, Andres Freund wrote:
> Hi,
>
> This issue has bothered me in non-partitioned use-cases recently, so
> thanks for taking it up.
>
>
> On 2017-03-06 18:22:17 +0300, Aleksander Alekseev wrote:
> > diff --git a/src/backend/postmaster/pgstat.c
> > b/src/backend/postmaster/pgstat.c
> > index 2fb9a8bf58..fa906e7930 100644
> > --- a/src/backend/postmaster/pgstat.c
> > +++ b/src/backend/postmaster/pgstat.c
> > @@ -160,6 +160,16 @@ typedef struct TabStatusArray
> >
> > static TabStatusArray *pgStatTabList = NULL;
>
> Why are we keeping that list / the "batch" allocation pattern? It
> doesn't actually seem useful to me after these changes. Given that we
> don't shrink hash-tables, we could just use the hash-table memory for
> this, no?
>
> I think a separate list that only contains things that are "active" in
> the current transaction makes sense, because the current logic requires
> iterating over a potentially very large array at transaction commit.
>
>
> > +/* pgStatTabHash entry */
> > +typedef struct TabStatHashEntry
> > +{
> > + Oid t_id;
> > + PgStat_TableStatus* tsa_entry;
> > +} TabStatHashEntry;
> > +
> > +/* Hash table for faster t_id -> tsa_entry lookup */
> > +static HTAB *pgStatTabHash = NULL;
> > +
>
> 'faster ... lookup' doesn't strike me as a useful comment, because it's
> only useful in relation of the current code to the new code.
>
>
>
> > /*
> > * Backends store per-function info that's waiting to be sent to the
> > collector
> > * in this hash table (indexed by function OID).
> > @@ -815,7 +825,13 @@ pgstat_report_stat(bool force)
> > pgstat_send_tabstat(this_msg);
> > this_msg->m_nentries = 0;
> > }
> > +
> > + /*
> > + * Entry will be freed soon so we need to remove it
> > from the lookup table.
> > + */
> > + hash_search(pgStatTabHash, &entry->t_id, HASH_REMOVE,
> > NULL);
> > }
>
> It's not really freed...
>
>
> > static PgStat_TableStatus *
> > get_tabstat_entry(Oid rel_id, bool isshared)
> > {
> > + TabStatHashEntry* hash_entry;
> > PgStat_TableStatus *entry;
> > TabStatusArray *tsa;
> > - TabStatusArray *prev_tsa;
> > - int i;
> > +
> > + /* Try to find an entry */
> > + entry = find_tabstat_entry(rel_id);
> > + if(entry != NULL)
> > + return entry;
>
> Arguably it'd be better to HASH_ENTER directly here, instead of doing
> two lookups.
>
>
> > /*
> > - * Search the already-used tabstat slots for this relation.
> > + * Entry doesn't exist - creating one.
> > + * First make sure there is a free space in a last element of
> > pgStatTabList.
> > */
> > - prev_tsa = NULL;
> > - for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa =
> > tsa->tsa_next)
> > + if (!pgStatTabList)
> > {
> > - for (i = 0; i < tsa->tsa_used; i++)
> > - {
> > - entry = &tsa->tsa_entries[i];
> > - if (entry->t_id == rel_id)
> > - return entry;
> > - }
> > + HASHCTL ctl;
> >
> > - if (tsa->tsa_used < TABSTAT_QUANTUM)
> > + Assert(!pgStatTabHash);
> > +
> > + memset(&ctl, 0, sizeof(ctl));
> > + ctl.keysize = sizeof(Oid);
> > + ctl.entrysize = sizeof(TabStatHashEntry);
> > + ctl.hcxt = TopMemoryContext;
> > +
> > + pgStatTabHash = hash_create("pgstat t_id to tsa_entry lookup
> > table",
> > + TABSTAT_QUANTUM, &ctl, HASH_ELEM | HASH_BLOBS |
> > HASH_CONTEXT);
>
> Think it'd be better to move the hash creation into its own function.
>
>
> - Andres
--
Best regards,
Aleksander Alekseev
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 7cacb1e9b2..08aaf1dac1 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -160,6 +160,16 @@ typedef struct TabStatusArray
static TabStatusArray *pgStatTabList = NULL;
+/* pgStatTabHash entry */
+typedef struct TabStatHashEntry
+{
+ Oid t_id;
+ PgStat_TableStatus* tsa_entry;
+} TabStatHashEntry;
+
+/* Hash table for O(1) t_id -> tsa_entry lookup */
+static HTAB *pgStatTabHash = NULL;
+
/*
* Backends store per-function info that's waiting to be sent to the collector
* in this hash table (indexed by function OID).
@@ -815,7 +825,13 @@ pgstat_report_stat(bool force)
pgstat_send_tabstat(this_msg);
this_msg->m_nentries = 0;
}
+
+ /*
+ * Entry will be zeroed soon so we need to remove it from the lookup table.
+ */
+ hash_search(pgStatTabHash, &entry->t_id, HASH_REMOVE, NULL);
}
+
/* zero out TableStatus structs after use */
MemSet(tsa->tsa_entries, 0,
tsa->tsa_used * sizeof(PgStat_TableStatus));
@@ -1667,59 +1683,77 @@ pgstat_initstats(Relation rel)
}
/*
+ * Make sure pgStatTabList and pgStatTabHash are initialized.
+ */
+static void
+make_sure_stat_tab_initialized()
+{
+ HASHCTL ctl;
+
+ if (pgStatTabList)
+ return;
+
+ Assert(!pgStatTabHash);
+
+ memset(&ctl, 0, sizeof(ctl));
+ ctl.keysize = sizeof(Oid);
+ ctl.entrysize = sizeof(TabStatHashEntry);
+ ctl.hcxt = TopMemoryContext;
+
+ pgStatTabHash = hash_create("pgstat t_id to tsa_entry lookup table",
+ TABSTAT_QUANTUM, &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+
+ pgStatTabList = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
+ sizeof(TabStatusArray));
+}
+
+/*
* get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
*/
static PgStat_TableStatus *
get_tabstat_entry(Oid rel_id, bool isshared)
{
+ TabStatHashEntry* hash_entry;
PgStat_TableStatus *entry;
TabStatusArray *tsa;
- TabStatusArray *prev_tsa;
- int i;
+ bool found;
+
+ make_sure_stat_tab_initialized();
+
+ /*
+ * Find an entry or create a new one.
+ */
+ hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_ENTER, &found);
+ if(found)
+ return hash_entry->tsa_entry;
/*
- * Search the already-used tabstat slots for this relation.
+ * `hash_entry` was just created and now we have to fill it.
+ * First make sure there is a free space in a last element of pgStatTabList.
*/
- prev_tsa = NULL;
- for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = tsa->tsa_next)
+ tsa = pgStatTabList;
+ while(tsa->tsa_used == TABSTAT_QUANTUM)
{
- for (i = 0; i < tsa->tsa_used; i++)
+ if(tsa->tsa_next == NULL)
{
- entry = &tsa->tsa_entries[i];
- if (entry->t_id == rel_id)
- return entry;
+ tsa->tsa_next = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
+ sizeof(TabStatusArray));
}
- if (tsa->tsa_used < TABSTAT_QUANTUM)
- {
- /*
- * It must not be present, but we found a free slot instead. Fine,
- * let's use this one. We assume the entry was already zeroed,
- * either at creation or after last use.
- */
- entry = &tsa->tsa_entries[tsa->tsa_used++];
- entry->t_id = rel_id;
- entry->t_shared = isshared;
- return entry;
- }
+ tsa = tsa->tsa_next;
}
/*
- * We ran out of tabstat slots, so allocate more. Be sure they're zeroed.
- */
- tsa = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
- sizeof(TabStatusArray));
- if (prev_tsa)
- prev_tsa->tsa_next = tsa;
- else
- pgStatTabList = tsa;
-
- /*
- * Use the first entry of the new TabStatusArray.
+ * Add an entry.
*/
entry = &tsa->tsa_entries[tsa->tsa_used++];
entry->t_id = rel_id;
entry->t_shared = isshared;
+
+ /*
+ * Add a corespinding entry to pgStatTabHash.
+ */
+ hash_entry->tsa_entry = entry;
return entry;
}
@@ -1731,22 +1765,19 @@ get_tabstat_entry(Oid rel_id, bool isshared)
PgStat_TableStatus *
find_tabstat_entry(Oid rel_id)
{
- PgStat_TableStatus *entry;
- TabStatusArray *tsa;
- int i;
+ TabStatHashEntry* hash_entry;
- for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
- {
- for (i = 0; i < tsa->tsa_used; i++)
- {
- entry = &tsa->tsa_entries[i];
- if (entry->t_id == rel_id)
- return entry;
- }
- }
+ /*
+ * There are no entries at all.
+ */
+ if(!pgStatTabHash)
+ return NULL;
- /* Not present */
- return NULL;
+ hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_FIND, NULL);
+ if(!hash_entry)
+ return NULL;
+
+ return hash_entry->tsa_entry;
}
/*
signature.asc
Description: PGP signature
