[PATCHES] Rewritten Index Advisor patch

Tom Lane Thu, 24 May 2007 17:47:59 -0700

Awhile back I complained that I didn't like the way that the index advisor
patch plugged into the system:
http://archives.postgresql.org/pgsql-hackers/2007-04/msg00346.php


Attached is a proposed replacement patch that keeps essentially all the
advisor logic outside the core backend, and uses the method I suggested of
modifying the result of get_relation_info() rather than installing phony
system-catalog entries.  Most of the patch bulk is actually just a small
refactoring of ExplainOnePlan's API to make it more convenient to call
from an advisor plugin.  I also added hooks to let an advisor work through
EXPLAIN, as I still maintain is a more useful behavior than doubling the
work involved in every planner call.  However, the planner() hook is still
there for those who are insistent.

To test the code, I made up a silly little proof-of-concept "advisor" that
just checks to see if 2-column indexes would be more helpful if the column
order were switched.  It's incomplete because I didn't do anything about
printing out a nice explanation of what the hypothetical index is.

Comments, objections?

                        regards, tom lane

regression=# create table fooey as select unique1,unique2 from tenk1;
SELECT
regression=# create index fooeyi on fooey(unique1,unique2);
CREATE INDEX
regression=# analyze fooey;
ANALYZE
regression=# explain select * from fooey order by unique2,unique1;
                           QUERY PLAN
-----------------------------------------------------------------
 Sort  (cost=809.39..834.39 rows=10000 width=8)
   Sort Key: unique2, unique1
   ->  Seq Scan on fooey  (cost=0.00..145.00 rows=10000 width=8)
(3 rows)

regression=# explain select * from fooey where unique2 in (1,2,3);
                      QUERY PLAN
-------------------------------------------------------
 Seq Scan on fooey  (cost=0.00..182.50 rows=3 width=8)
   Filter: (unique2 = ANY ('{1,2,3}'::integer[]))
(2 rows)

regression=# load '/home/tgl/pgsql/advisor';
LOAD
regression=# explain select * from fooey order by unique2,unique1;
                                       QUERY PLAN
----------------------------------------------------------------------------------------
 Sort  (cost=809.39..834.39 rows=10000 width=8)
   Sort Key: unique2, unique1
   ->  Seq Scan on fooey  (cost=0.00..145.00 rows=10000 width=8)

 Plan with hypothetical indexes:
 Index Scan using <hypothetical index> on fooey  (cost=0.00..376.00 rows=10000 
width=8)
(6 rows)

regression=# explain select * from fooey where unique2 in (1,2,3);
                                     QUERY PLAN
------------------------------------------------------------------------------------
 Seq Scan on fooey  (cost=0.00..182.50 rows=3 width=8)
   Filter: (unique2 = ANY ('{1,2,3}'::integer[]))

 Plan with hypothetical indexes:
 Bitmap Heap Scan on fooey  (cost=12.78..22.49 rows=3 width=8)
   Recheck Cond: (unique2 = ANY ('{1,2,3}'::integer[]))
   ->  Bitmap Index Scan on <hypothetical index>  (cost=0.00..12.77 rows=3 
width=0)
         Index Cond: (unique2 = ANY ('{1,2,3}'::integer[]))
(8 rows)

binzr7cPQO2Fd.bin
Description: advisor-hooks.patch.gz

#include "postgres.h"

#include "fmgr.h"
#include "commands/explain.h"
#include "optimizer/plancat.h"
#include "optimizer/planner.h"


PG_MODULE_MAGIC;

void            _PG_init(void);
void            _PG_fini(void);

static void my_get_relation_info(PlannerInfo *root, Oid relationObjectId,
                                                                 bool 
inhparent, RelOptInfo *rel);
static const char *my_explain_get_index_name(Oid indexId);


/*
 * Main hook to take over control during EXPLAIN of a query
 */
static void
my_ExplainOneQuery(Query *query, ExplainStmt *stmt, const char *queryString,
                                   ParamListInfo params, TupOutputState *tstate)
{
        Query   *savequery = NULL;
        PlannedStmt *plan;

        /*
         * Because planner scribbles on its input, we have to make a copy if
         * we want to plan twice.
         */
        if (!stmt->analyze)
                savequery = copyObject(query);

        /* plan the query */
        plan = planner(query, 0, params);

        /* run it (if needed) and produce output */
        ExplainOnePlan(plan, params, stmt, tstate);

        /* if not EXPLAIN ANALYZE, try it with hypothetical indexes */
        if (!stmt->analyze)
        {
                PG_TRY();
                {
                        PlannedStmt *hplan;

                        /* Enable special processing */
                        get_relation_info_hook = my_get_relation_info;
                        explain_get_index_name_hook = my_explain_get_index_name;

                        /* plan the query */
                        hplan = planner(savequery, 0, params);

                        /* if we got a better plan, print it */
                        if (hplan->planTree->total_cost < 
plan->planTree->total_cost)
                        {
                                do_text_output_oneline(tstate, ""); /* 
separator line */
                                do_text_output_oneline(tstate, "Plan with 
hypothetical indexes:");
                                ExplainOnePlan(hplan, params, stmt, tstate);
                        }
                }
                PG_CATCH();
                {
                        /* Make sure hooks get cleared on error exit */
                        get_relation_info_hook = NULL;
                        explain_get_index_name_hook = NULL;
                        PG_RE_THROW();
                }
                PG_END_TRY();
                get_relation_info_hook = NULL;
                explain_get_index_name_hook = NULL;
        }
}


/*
 * Get control during planner's get_relation_info() function, which sets up
 * a RelOptInfo struct based on the system catalog contents.  We can modify
 * the struct contents to cause the planner to work with a hypothetical
 * situation rather than what's actually in the catalogs.
 *
 * This simplistic example looks for two-column indexes on any two columns
 * (a,b), and sets up a hypothetical index in the other column order (b,a).
 */
static void
my_get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
                                         RelOptInfo *rel)
{
        ListCell   *ilist;

        /* Do nothing for an inheritance parent RelOptInfo */
        if (inhparent)
                return;

        foreach(ilist, rel->indexlist)
        {
                IndexOptInfo *oldindex = (IndexOptInfo *) lfirst(ilist);

                if (oldindex->ncolumns == 2)
                {
                        IndexOptInfo *info;
                        int                     ncolumns = oldindex->ncolumns;
                        int                     i;
                        ListCell   *lc;

                        info = makeNode(IndexOptInfo);

                        /* flat-copy as many fields as we can */
                        memcpy(info, oldindex, sizeof(IndexOptInfo));

                        /*
                         * In this toy example we just assign all hypothetical 
indexes
                         * OID 0, and the explain_get_index_name hook just 
prints
                         * <hypothetical index>.  In a realistic situation we'd 
probably
                         * assume that OIDs smaller than, say, 100 are never 
the OID of
                         * any real index, allowing us to identify one of up to 
100
                         * hypothetical indexes per plan.  Then we'd need to 
save aside
                         * some state data that would let the explain hooks 
print info
                         * about the selected index.
                         */
                        info->indexoid = InvalidOid;

                        /*
                         * Need to make opfamily array large enough to put a 
terminating
                         * zero at the end.
                         */
                        info->indexkeys = (int *) palloc(sizeof(int) * 
ncolumns);
                        info->opfamily = (Oid *) palloc0(sizeof(Oid) * 
(ncolumns + 1));
                        /* initialize these to zeroes in case index is 
unordered */
                        info->fwdsortop = (Oid *) palloc0(sizeof(Oid) * 
ncolumns);
                        info->revsortop = (Oid *) palloc0(sizeof(Oid) * 
ncolumns);
                        info->nulls_first = (bool *) palloc0(sizeof(bool) * 
ncolumns);

                        /* Reverse the order of the per-column arrays */
                        for (i = 0; i < ncolumns; i++)
                        {
                                info->indexkeys[i] = oldindex->indexkeys[1 - i];
                                info->opfamily[i] = oldindex->opfamily[1 - i];
                                info->fwdsortop[i] = oldindex->fwdsortop[1 - i];
                                info->revsortop[i] = oldindex->revsortop[1 - i];
                                info->nulls_first[i] = oldindex->nulls_first[1 
- i];
                        }

                        /* must reverse the order of the indexprs list, too */
                        info->indexprs = NIL;
                        foreach(lc, oldindex->indexprs)
                        {
                                Node *node = lfirst(lc);

                                info->indexprs = lcons(node, info->indexprs);
                        }

                        /*
                         * Put the phony IndexOptInfo on the front of the list, 
so that
                         * we don't get into an infinite loop.
                         */
                        rel->indexlist = lcons(info, rel->indexlist);
                }
        }
}


/*
 * Hook to let EXPLAIN print something for a nonexistent index
 *
 * This is too simplistic for real-world use.
 */
static const char *
my_explain_get_index_name(Oid indexId)
{
        if (indexId == InvalidOid)
                return "<hypothetical index>";

        return NULL;                            /* allow default behavior */
}


/*
 * _pg_init()                   - library load-time initialization
 *
 * DO NOT make this static nor change its name!
 */
void
_PG_init(void)
{
        /* Get into the hooks we need to be in all the time */
        ExplainOneQuery_hook = my_ExplainOneQuery;
}


/*
 * _PG_fini()                   - library unload-time finalization
 *
 * DO NOT make this static nor change its name!
 */
void
_PG_fini(void)
{
        /* Get out of all the hooks (just to be sure) */
        ExplainOneQuery_hook = NULL;
        get_relation_info_hook = NULL;
        explain_get_index_name_hook = NULL;
}

---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster

[PATCHES] Rewritten Index Advisor patch

Reply via email to