"Jim C. Nasby" <[EMAIL PROTECTED]> writes:
> The mention of the changes to the btree scan code in the latest weekly
> news got me curious so I started looking at the 'executive summary'
> (read as: README) of the patch changes for both the scan patch and the
> btbulkdelete patch. If my understanding is correct, vacuum will only see
> a speed improvement when an index's on-disk storage order has a low
> correlation to index order. Is there any way to see what that
> correlation is on a running system? I'm wondering if anyone has checked
> to see what kind of performance impact a highly out-of-order index has
> on index scans.

There's nothing built in.  If you feel like hacking something, I've
attached a truly ugly tool that I've used once or twice in the past to
debug broken indexes.  It's got some smarts about detecting inconsistent
index structure and it looks like this latest version was meant to dump
out the index keys of a particular index.  You could modify it to just
scan the level-zero pages and compute some statistics about their
ordering.

                        regards, tom lane

/*
 * Usage: checkindex filename
 *
 * Note: we read in the entire index file, hence this does not work well
 * for indexes bigger than available memory
 */
#include "postgres.h"

#include <unistd.h>

#include "access/nbtree.h"

static char *buffers;
static BlockNumber nblocks;

#define GetPage(p)      ((Page) (buffers + (p) * BLCKSZ))

static BlockNumber rootblk;
static uint32 rootlevel;

bool assert_enabled = true;


static void
check_metapage(void)
{
        Page    page = GetPage(0);
        BTMetaPageData *metad;
        BTPageOpaque metaopaque;

        metad = BTPageGetMeta(page);
        if (metad->btm_magic != BTREE_MAGIC)
                fprintf(stderr, "Bogus magic %lu\n", (long) metad->btm_magic);
        if (metad->btm_version != BTREE_VERSION)
                fprintf(stderr, "Bogus version %lu\n", (long) 
metad->btm_version);
        rootblk = metad->btm_root;
        rootlevel = metad->btm_level;

        metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
        if (metaopaque->btpo_flags != BTP_META)
                fprintf(stderr, "Bogus metapage flags 0x%x\n", 
metaopaque->btpo_flags);
}

static void
check_rootpage(void)
{
        Page    page;
        BTPageOpaque rootopaque;

        if (rootblk <= 0 || rootblk >= nblocks)
        {
                fprintf(stderr, "Bogus root block # %lu\n", (long) rootblk);
                return;
        }
        page = GetPage(rootblk);

        rootopaque = (BTPageOpaque) PageGetSpecialPointer(page);
        if (rootopaque->btpo_flags != BTP_ROOT)
                fprintf(stderr, "Bogus rootpage flags 0x%x\n", 
rootopaque->btpo_flags);
        if (rootopaque->btpo.level != rootlevel)
                fprintf(stderr, "Bogus rootpage level %u, expected %u\n",
                                rootopaque->btpo.level, rootlevel);
        if (rootopaque->btpo_prev != P_NONE)
                fprintf(stderr, "Bogus rootpage left-link 0x%x\n",
                                rootopaque->btpo_prev);
        if (rootopaque->btpo_next != P_NONE)
                fprintf(stderr, "Bogus rootpage right-link 0x%x\n",
                                rootopaque->btpo_next);
}

static int
print_text_key(unsigned char *tdata, int tsize)
{
        int             orig_tsize = tsize;
        int             dsize;

        Assert(tsize >= 4);
        dsize = *((int *) tdata);
        tdata += 4;
        tsize -= 4;
        dsize -= 4;
        Assert(dsize >= 0);
        Assert(tsize >= dsize);

        while (dsize-- > 0)
        {
                printf("%c", *tdata);
                tdata++;
                tsize--;
        }
        printf("\t");

        // kluge alignment
        while ((long) tdata % MAXIMUM_ALIGNOF)
        {
                tdata++;
                tsize--;
        }
        return orig_tsize - tsize;
}

static int
print_int_key(unsigned char *tdata, int tsize)
{
        Assert(tsize >= 4);
        printf("%d\t", *((int *) tdata));
        return 4;
}

static void
print_item(Page page, BlockNumber blk, OffsetNumber off)
{
        BTPageOpaque opaque;
        BTItem          btitem;
        IndexTuple      itup;
        ItemPointer ip;

        opaque = (BTPageOpaque) PageGetSpecialPointer(page);

        btitem = (BTItem) PageGetItem(page, PageGetItemId(page, off));
        itup = &(btitem->bti_itup);
        ip = &itup->t_tid;

        /* no key in upper-level first data item */
        if (P_ISLEAF(opaque) || off != P_FIRSTDATAKEY(opaque))
        {
                int                     tsize;
                unsigned char *tdata;
                int                     dsize;

                tsize = IndexTupleSize(itup) - sizeof(IndexTupleData);
                tdata = (unsigned char *) itup + sizeof(IndexTupleData);

                dsize = print_int_key(tdata, tsize);
                tdata += dsize; tsize -= dsize;

                dsize = print_int_key(tdata, tsize);
                tdata += dsize; tsize -= dsize;
        }

        if (off >= P_FIRSTDATAKEY(opaque))
                printf("\t%d\t%d",
                           ItemPointerGetBlockNumber(ip),
                           ItemPointerGetOffsetNumber(ip));
        else
                printf("\thigh key on page %u", blk);

        printf("\n");
}

static void
check_level(uint32 level, BlockNumber leftpage, uint32 expected_pages)
{
        uint32          npages = 0;
        BlockNumber     prev = P_NONE;
        BlockNumber     next;
        BlockNumber     blk;
        Page    page;
        BTPageOpaque opaque;

        for (blk = leftpage; blk != P_NONE; blk = next)
        {
                if (blk <= 0 || blk >= nblocks)
                {
                        fprintf(stderr, "Bogus right-link %u from %u\n", blk, 
prev);
                        break;
                }
                npages++;
                page = GetPage(blk);
                opaque = (BTPageOpaque) PageGetSpecialPointer(page);
                if (opaque->btpo_prev != prev)
                        fprintf(stderr, "Bogus left-link %u on %u (expected 
%u)\n",
                                        opaque->btpo_prev, blk, prev);
                if (opaque->btpo.level != level)
                        fprintf(stderr, "Bogus level %u on %u (expected %u)\n",
                                        opaque->btpo.level, blk, level);
                if (opaque->btpo_flags & (BTP_DELETED | BTP_META))
                        fprintf(stderr, "Bogus flags 0x%x on %u\n",
                                        opaque->btpo_flags, blk);

#define SHOW_LEVEL      0
#define SHOW_HI_KEY     1

                if (level == SHOW_LEVEL)
                {
                        OffsetNumber low,
                                high;

                        low = P_FIRSTDATAKEY(opaque);
                        high = PageGetMaxOffsetNumber(page);
                        for (; low <= high; low++)
                                print_item(page, blk, low);

#if SHOW_HI_KEY
                        if (!P_RIGHTMOST(opaque))
                                print_item(page, blk, P_HIKEY);
#endif
                }

                next = opaque->btpo_next;
                prev = blk;
        }
}

static void
check_levels(void)
{
        BlockNumber *leftpages;
        uint32 *levelcount;
        uint32  ndeleted = 0;
        BlockNumber blk;
        Page    page;
        BTPageOpaque opaque;
        uint32  lev;

        leftpages = calloc(sizeof(BlockNumber), rootlevel + 1);
        Assert(leftpages);
        levelcount = calloc(sizeof(uint32), rootlevel + 1);
        Assert(levelcount);

        for (blk = 1; blk < nblocks; blk++)
        {
                page = GetPage(blk);
                opaque = (BTPageOpaque) PageGetSpecialPointer(page);
                if (opaque->btpo_flags & BTP_DELETED)
                {
                        ndeleted++;
                        continue;
                }
                if ((opaque->btpo_flags & BTP_META) ||
                        ((opaque->btpo_flags & BTP_ROOT) && blk != rootblk) ||
                        ((opaque->btpo_flags & BTP_LEAF) && opaque->btpo.level 
!= 0))
                        fprintf(stderr, "Bogus flags 0x%x on page %u\n",
                                        opaque->btpo_flags, blk);
                if (opaque->btpo.level > rootlevel)
                {
                        fprintf(stderr, "Bogus level %u on page %u\n",
                                        opaque->btpo.level, blk);
                        continue;
                }
                levelcount[opaque->btpo.level]++;
                if (opaque->btpo_prev == P_NONE)
                {
                        if (leftpages[opaque->btpo.level] == P_NONE)
                                leftpages[opaque->btpo.level] = blk;
                        else
                                fprintf(stderr, "Multiple leftmost pages for 
level %u: %u, %u\n",
                                                opaque->btpo.level,
                                                leftpages[opaque->btpo.level],
                                                blk);
                }
        }

        for (lev = 0; lev <= rootlevel; lev++)
        {
                if (leftpages[lev] != P_NONE)
                        check_level(lev, leftpages[lev], levelcount[lev]);
        }
}

static void
check_downlinks(void)
{
        BlockNumber blk;

        for (blk = 1; blk < nblocks; blk++)
        {
                Page    page;
                BTPageOpaque opaque;
                OffsetNumber low,
                                        high;

                page = GetPage(blk);
                opaque = (BTPageOpaque) PageGetSpecialPointer(page);
                if (opaque->btpo_flags & BTP_DELETED)
                        continue;
                if (opaque->btpo.level == 0)
                        continue;                       /* nothing to check */
                low = P_FIRSTDATAKEY(opaque);
                high = PageGetMaxOffsetNumber(page);
                /*
                 * Internal page should always contain keys.
                 */
                if (high < low)
                        fprintf(stderr, "empty internal page %u (level %u)\n",
                                        blk, opaque->btpo.level);
                for (; low <= high; low++)
                {
                        BTItem          btitem;
                        IndexTuple      itup;
                        BlockNumber cblk;
                        Page    cpage;
                        BTPageOpaque copaque;

                        btitem = (BTItem) PageGetItem(page, PageGetItemId(page, 
low));
                        itup = &(btitem->bti_itup);
                        cblk = ItemPointerGetBlockNumber(&itup->t_tid);
                        if (cblk <= 0 || cblk >= nblocks)
                        {
                                fprintf(stderr, "Bogus downlink %u in %u/%u\n",
                                                cblk, blk, low);
                                continue;
                        }
                        cpage = GetPage(cblk);
                        copaque = (BTPageOpaque) PageGetSpecialPointer(cpage);
                        if (copaque->btpo_flags & BTP_DELETED)
                        {
                                fprintf(stderr, "Downlink %u in %u/%u points to 
deleted page\n",
                                                cblk, blk, low);
                                continue;
                        }
                        if (copaque->btpo.level != opaque->btpo.level-1)
                                fprintf(stderr, "Downlink %u in %u/%u (level 
%u) points to page of level %u\n",
                                                cblk, blk, low, 
opaque->btpo.level, copaque->btpo.level);
                }
        }
}

int
main(int argc, char **argv)
{
        FILE   *infile;
        unsigned long fsize;

        /* Read in the file */
        if (argc != 2)
        {
                fprintf(stderr, "usage: checkindex filename\n");
                return 1;
        }
        infile = fopen(argv[1], "rb");
        if (!infile)
        {
                perror(argv[1]);
                return 1;
        }
        if (fseek(infile, 0, SEEK_END) < 0)
        {
                perror(argv[1]);
                return 1;
        }
        fsize = ftell(infile);
        if (fseek(infile, 0, SEEK_SET) < 0)
        {
                perror(argv[1]);
                return 1;
        }
        if (fsize % BLCKSZ)
                fprintf(stderr, "Size of input file is not a multiple of 
BLCKSZ\n");
        nblocks = fsize / BLCKSZ;
        buffers = malloc(nblocks * BLCKSZ);
        if (!buffers)
        {
                fprintf(stderr, "out of memory\n");
                return 1;
        }
        if (fread(buffers, 1, nblocks * BLCKSZ, infile) != nblocks * BLCKSZ)
        {
                perror("short read");
                return 1;
        }
        fclose(infile);

        /* Start checking */
        check_metapage();
        check_rootpage();
        check_levels();
        check_downlinks();

        return 0;
}


/*
 * ExceptionalCondition - Handles the failure of an Assert()
 */
int
ExceptionalCondition(char *conditionName,
                                         char *errorType,
                                         char *fileName,
                                         int lineNumber)
{
        if (!PointerIsValid(conditionName)
                || !PointerIsValid(fileName)
                || !PointerIsValid(errorType))
                fprintf(stderr, "TRAP: ExceptionalCondition: bad arguments\n");
        else
        {
                fprintf(stderr, "TRAP: %s(\"%s\", File: \"%s\", Line: %d)\n",
                                errorType, conditionName,
                                fileName, lineNumber);
        }

        abort();

        return 0;
}
---------------------------(end of broadcast)---------------------------
TIP 2: Don't 'kill -9' the postmaster

Reply via email to