Jeff Janes <[email protected]> writes:
> On Fri, Nov 13, 2015 at 3:13 PM, Tom Lane <[email protected]> wrote:
>> There's probably no reason not to do that, but I'd be much more interested
>> in eliminating the slowness to begin with ...
> I was thinking about that as well, but I don't think that would be
> back-patchable, at least not the way I was envisioning it.
> I was thinking of detecting bad cases (had to count to over 10 before
> finding a novel name, more than 10 times) and then switching from an
> object-local count, to a global count, for the numbers to add to the
> name. But that would be a behavior change under some conditions.
I experimented with using a hash table to avoid the O(N^2) behavior.
This seems to work quite well, and I think it doesn't change the results
(leastwise, it does not break the regression tests).
It would be possible to do something similar to dodge the O(N^2) costs
in make_colname_unique/colname_is_unique; but it would be a lot messier,
and the tests I did suggest that it's fairly hard to get into a regime
where those functions are a huge part of the runtime anyway, because
we have limits on the numbers of columns. So I'm inclined to leave that
alone.
regards, tom lane
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 3388f7c..2b6de8f 100644
*** a/src/backend/utils/adt/ruleutils.c
--- b/src/backend/utils/adt/ruleutils.c
***************
*** 55,60 ****
--- 55,61 ----
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
+ #include "utils/hsearch.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "utils/ruleutils.h"
*************** typedef struct
*** 267,272 ****
--- 268,282 ----
#define deparse_columns_fetch(rangetable_index, dpns) \
((deparse_columns *) list_nth((dpns)->rtable_columns, (rangetable_index)-1))
+ /*
+ * Entry in set_rtable_names' hash table
+ */
+ typedef struct
+ {
+ char name[NAMEDATALEN]; /* Hash key --- must be first */
+ int counter; /* Largest addition used so far for name */
+ } NameHashEntry;
+
/* ----------
* Global data
*************** static void print_function_rettype(Strin
*** 312,319 ****
static void print_function_trftypes(StringInfo buf, HeapTuple proctup);
static void set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
Bitmapset *rels_used);
- static bool refname_is_unique(char *refname, deparse_namespace *dpns,
- List *parent_namespaces);
static void set_deparse_for_query(deparse_namespace *dpns, Query *query,
List *parent_namespaces);
static void set_simple_column_names(deparse_namespace *dpns);
--- 322,327 ----
*************** static void
*** 2676,2690 ****
set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
Bitmapset *rels_used)
{
ListCell *lc;
- int rtindex = 1;
dpns->rtable_names = NIL;
foreach(lc, dpns->rtable)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
char *refname;
if (rels_used && !bms_is_member(rtindex, rels_used))
{
/* Ignore unreferenced RTE */
--- 2684,2744 ----
set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
Bitmapset *rels_used)
{
+ HASHCTL hash_ctl;
+ HTAB *names_hash;
+ NameHashEntry *hentry;
+ bool found;
+ int rtindex;
ListCell *lc;
dpns->rtable_names = NIL;
+ /* nothing more to do if empty rtable */
+ if (dpns->rtable == NIL)
+ return;
+
+ /*
+ * We use a hash table to hold known names, so that this process is O(N)
+ * not O(N^2) for N names.
+ */
+ MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+ hash_ctl.keysize = NAMEDATALEN;
+ hash_ctl.entrysize = sizeof(NameHashEntry);
+ hash_ctl.hcxt = CurrentMemoryContext;
+ names_hash = hash_create("set_rtable_names names",
+ list_length(dpns->rtable),
+ &hash_ctl,
+ HASH_ELEM | HASH_CONTEXT);
+ /* Preload the hash table with names appearing in parent_namespaces */
+ foreach(lc, parent_namespaces)
+ {
+ deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc);
+ ListCell *lc2;
+
+ foreach(lc2, olddpns->rtable_names)
+ {
+ char *oldname = (char *) lfirst(lc2);
+
+ if (oldname == NULL)
+ continue;
+ hentry = (NameHashEntry *) hash_search(names_hash,
+ oldname,
+ HASH_ENTER,
+ &found);
+ /* we do not complain about duplicate names in parent namespaces */
+ hentry->counter = 0;
+ }
+ }
+
+ /* Now we can scan the rtable */
+ rtindex = 1;
foreach(lc, dpns->rtable)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
char *refname;
+ /* Just in case this takes an unreasonable amount of time ... */
+ CHECK_FOR_INTERRUPTS();
+
if (rels_used && !bms_is_member(rtindex, rels_used))
{
/* Ignore unreferenced RTE */
*************** set_rtable_names(deparse_namespace *dpns
*** 2712,2767 ****
}
/*
! * If the selected name isn't unique, append digits to make it so
*/
! if (refname &&
! !refname_is_unique(refname, dpns, parent_namespaces))
{
! char *modname = (char *) palloc(strlen(refname) + 32);
! int i = 0;
! do
{
! sprintf(modname, "%s_%d", refname, ++i);
! } while (!refname_is_unique(modname, dpns, parent_namespaces));
! refname = modname;
}
dpns->rtable_names = lappend(dpns->rtable_names, refname);
rtindex++;
}
- }
-
- /*
- * refname_is_unique: is refname distinct from all already-chosen RTE names?
- */
- static bool
- refname_is_unique(char *refname, deparse_namespace *dpns,
- List *parent_namespaces)
- {
- ListCell *lc;
-
- foreach(lc, dpns->rtable_names)
- {
- char *oldname = (char *) lfirst(lc);
-
- if (oldname && strcmp(oldname, refname) == 0)
- return false;
- }
- foreach(lc, parent_namespaces)
- {
- deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc);
- ListCell *lc2;
-
- foreach(lc2, olddpns->rtable_names)
- {
- char *oldname = (char *) lfirst(lc2);
! if (oldname && strcmp(oldname, refname) == 0)
! return false;
! }
! }
! return true;
}
/*
--- 2766,2809 ----
}
/*
! * If the selected name isn't unique, append digits to make it so, and
! * make a new hash entry for it once we've got a unique name.
*/
! if (refname)
{
! hentry = (NameHashEntry *) hash_search(names_hash,
! refname,
! HASH_ENTER,
! &found);
! if (found)
! {
! /* Name already in use, must choose a new one */
! char *modname = (char *) palloc(strlen(refname) + 32);
! NameHashEntry *hentry2;
! do
! {
! sprintf(modname, "%s_%d", refname, ++(hentry->counter));
! hentry2 = (NameHashEntry *) hash_search(names_hash,
! modname,
! HASH_ENTER,
! &found);
! } while (found);
! hentry2->counter = 0; /* init new hash entry */
! refname = modname;
! }
! else
{
! /* Name not previously used, need only initialize hentry */
! hentry->counter = 0;
! }
}
dpns->rtable_names = lappend(dpns->rtable_names, refname);
rtindex++;
}
! hash_destroy(names_hash);
}
/*
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers