Hi. I'm just posting this WIP patch where I've renamed fastbloat to pgstatbloat as suggested by Tomas, and added in the documentation, and so on. I still have to incorporate Amit's comments about the estimation of reltuples according to the way vacuum does it, and I expect to post that tomorrow (I just need to test a little more).
In the meantime, if anyone else was having trouble installing the extension due to the incorrect version in the control file, this is the patch you should be using. -- Abhijit
>From f809e070e8ea13b74c6206ca67a7eaf2a32e60fa Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen <a...@2ndquadrant.com> Date: Fri, 26 Dec 2014 12:37:13 +0530 Subject: Add pgstatbloat to pgstattuple --- contrib/pgstattuple/Makefile | 4 +- contrib/pgstattuple/pgstatbloat.c | 346 +++++++++++++++++++++ contrib/pgstattuple/pgstattuple--1.2--1.3.sql | 18 ++ .../{pgstattuple--1.2.sql => pgstattuple--1.3.sql} | 18 +- contrib/pgstattuple/pgstattuple.control | 2 +- doc/src/sgml/pgstattuple.sgml | 135 ++++++++ 6 files changed, 519 insertions(+), 4 deletions(-) create mode 100644 contrib/pgstattuple/pgstatbloat.c create mode 100644 contrib/pgstattuple/pgstattuple--1.2--1.3.sql rename contrib/pgstattuple/{pgstattuple--1.2.sql => pgstattuple--1.3.sql} (73%) diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile index 862585c..d7d27a5 100644 --- a/contrib/pgstattuple/Makefile +++ b/contrib/pgstattuple/Makefile @@ -1,10 +1,10 @@ # contrib/pgstattuple/Makefile MODULE_big = pgstattuple -OBJS = pgstattuple.o pgstatindex.o $(WIN32RES) +OBJS = pgstattuple.o pgstatindex.o pgstatbloat.o $(WIN32RES) EXTENSION = pgstattuple -DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql +DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql PGFILEDESC = "pgstattuple - tuple-level statistics" REGRESS = pgstattuple diff --git a/contrib/pgstattuple/pgstatbloat.c b/contrib/pgstattuple/pgstatbloat.c new file mode 100644 index 0000000..15c2cb9 --- /dev/null +++ b/contrib/pgstattuple/pgstatbloat.c @@ -0,0 +1,346 @@ +/* + * contrib/pgstattuple/pgstatbloat.c + * + * Abhijit Menon-Sen <a...@2ndquadrant.com> + * Portions Copyright (c) 2001,2002 Tatsuo Ishii (from pg_stattuple) + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose, without fee, and without a + * written agreement is hereby granted, provided that the above + * copyright notice and this paragraph and the following two + * paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, + * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS + * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS + * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "postgres.h" + +#include "access/visibilitymap.h" +#include "access/transam.h" +#include "access/xact.h" +#include "access/multixact.h" +#include "access/htup_details.h" +#include "catalog/namespace.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/freespace.h" +#include "storage/procarray.h" +#include "storage/lmgr.h" +#include "utils/builtins.h" +#include "utils/tqual.h" +#include "commands/vacuum.h" + +PG_FUNCTION_INFO_V1(pgstatbloat); + +/* + * tuple_percent, dead_tuple_percent and free_percent are computable, + * so not defined here. + */ +typedef struct pgstatbloat_output_type +{ + uint64 table_len; + uint64 tuple_count; + uint64 tuple_len; + uint64 dead_tuple_count; + uint64 dead_tuple_len; + uint64 free_space; + uint64 total_pages; + uint64 scanned_pages; +} pgstatbloat_output_type; + +static Datum build_output_type(pgstatbloat_output_type *stat, + FunctionCallInfo fcinfo); +static Datum fbstat_relation(Relation rel, FunctionCallInfo fcinfo); +static Datum fbstat_heap(Relation rel, FunctionCallInfo fcinfo); + +/* + * build a pgstatbloat_output_type tuple + */ +static Datum +build_output_type(pgstatbloat_output_type *stat, FunctionCallInfo fcinfo) +{ +#define NCOLUMNS 10 +#define NCHARS 32 + + HeapTuple tuple; + char *values[NCOLUMNS]; + char values_buf[NCOLUMNS][NCHARS]; + int i; + double tuple_percent; + double dead_tuple_percent; + double free_percent; /* free/reusable space in % */ + double scanned_percent; + TupleDesc tupdesc; + AttInMetadata *attinmeta; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* + * Generate attribute metadata needed later to produce tuples from raw C + * strings + */ + attinmeta = TupleDescGetAttInMetadata(tupdesc); + + if (stat->table_len == 0) + { + tuple_percent = 0.0; + dead_tuple_percent = 0.0; + free_percent = 0.0; + } + else + { + tuple_percent = 100.0 * stat->tuple_len / stat->table_len; + dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len; + free_percent = 100.0 * stat->free_space / stat->table_len; + } + + scanned_percent = 0.0; + if (stat->total_pages != 0) + { + scanned_percent = 100 * stat->scanned_pages / stat->total_pages; + } + + for (i = 0; i < NCOLUMNS; i++) + values[i] = values_buf[i]; + i = 0; + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->table_len); + snprintf(values[i++], NCHARS, "%.2f", scanned_percent); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_count); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_len); + snprintf(values[i++], NCHARS, "%.2f", tuple_percent); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_count); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_len); + snprintf(values[i++], NCHARS, "%.2f", dead_tuple_percent); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->free_space); + snprintf(values[i++], NCHARS, "%.2f", free_percent); + + tuple = BuildTupleFromCStrings(attinmeta, values); + + return HeapTupleGetDatum(tuple); +} + +/* Returns live/dead tuple statistics for the given relid. */ + +Datum +pgstatbloat(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Relation rel; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to use pgstatbloat functions")))); + + rel = relation_open(relid, AccessShareLock); + + PG_RETURN_DATUM(fbstat_relation(rel, fcinfo)); +} + +/* + * A helper function to reject unsupported relation types. We depend on + * the visibility map to decide which pages we can skip, so we can't + * support indexes, for example, which don't have a VM. + */ + +static Datum +fbstat_relation(Relation rel, FunctionCallInfo fcinfo) +{ + const char *err; + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + switch (rel->rd_rel->relkind) + { + case RELKIND_RELATION: + case RELKIND_MATVIEW: + return fbstat_heap(rel, fcinfo); + case RELKIND_TOASTVALUE: + err = "toast value"; + break; + case RELKIND_SEQUENCE: + err = "sequence"; + break; + case RELKIND_INDEX: + err = "index"; + break; + case RELKIND_VIEW: + err = "view"; + break; + case RELKIND_COMPOSITE_TYPE: + err = "composite type"; + break; + case RELKIND_FOREIGN_TABLE: + err = "foreign table"; + break; + default: + err = "unknown"; + break; + } + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"%s\" (%s) is not supported", + RelationGetRelationName(rel), err))); + return 0; +} + +/* + * This function takes an already open relation and scans its pages, + * skipping those that have the corresponding visibility map bit set. + * For pages we skip, we find the free space from the free space map + * and approximate tuple_len on that basis. For the others, we count + * the exact number of dead tuples etc. + * + * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but + * we do not try to avoid skipping single pages. + */ + +static Datum +fbstat_heap(Relation rel, FunctionCallInfo fcinfo) +{ + BlockNumber scanned, + nblocks, + blkno; + Buffer vmbuffer = InvalidBuffer; + pgstatbloat_output_type stat = {0}; + BufferAccessStrategy bstrategy; + TransactionId OldestXmin; + + OldestXmin = GetOldestXmin(rel, true); + bstrategy = GetAccessStrategy(BAS_BULKREAD); + + scanned = 0; + nblocks = RelationGetNumberOfBlocks(rel); + + for (blkno = 0; blkno < nblocks; blkno++) + { + Buffer buf; + Page page; + OffsetNumber offnum, + maxoff; + Size freespace; + + CHECK_FOR_INTERRUPTS(); + + /* + * If the page has only visible tuples, then we can find out the + * free space from the FSM and move on. + */ + + if (visibilitymap_test(rel, blkno, &vmbuffer)) + { + freespace = GetRecordedFreeSpace(rel, blkno); + stat.tuple_len += BLCKSZ - freespace; + stat.free_space += freespace; + continue; + } + + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, + RBM_NORMAL, bstrategy); + + LockBuffer(buf, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buf); + + stat.free_space += PageGetHeapFreeSpace(page); + + if (PageIsNew(page) || PageIsEmpty(page)) + { + UnlockReleaseBuffer(buf); + continue; + } + + scanned++; + + /* + * Look at each tuple on the page and decide whether it's live + * or dead, then count it and its size. Unlike lazy_scan_heap, + * we can afford to ignore problems and special cases. + */ + + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleData tuple; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) || + ItemIdIsDead(itemid)) + { + continue; + } + + Assert(ItemIdIsNormal(itemid)); + + ItemPointerSet(&(tuple.t_self), blkno, offnum); + + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + + switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf)) + { + case HEAPTUPLE_DEAD: + case HEAPTUPLE_RECENTLY_DEAD: + stat.dead_tuple_len += tuple.t_len; + stat.dead_tuple_count++; + break; + case HEAPTUPLE_LIVE: + stat.tuple_len += tuple.t_len; + stat.tuple_count++; + break; + case HEAPTUPLE_INSERT_IN_PROGRESS: + case HEAPTUPLE_DELETE_IN_PROGRESS: + break; + default: + elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); + break; + } + } + + UnlockReleaseBuffer(buf); + } + + stat.table_len = (uint64) nblocks * BLCKSZ; + stat.tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned, + stat.tuple_count); + stat.total_pages = nblocks; + stat.scanned_pages = scanned; + + if (BufferIsValid(vmbuffer)) + { + ReleaseBuffer(vmbuffer); + vmbuffer = InvalidBuffer; + } + + relation_close(rel, AccessShareLock); + + return build_output_type(&stat, fcinfo); +} diff --git a/contrib/pgstattuple/pgstattuple--1.2--1.3.sql b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql new file mode 100644 index 0000000..dba7993 --- /dev/null +++ b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql @@ -0,0 +1,18 @@ +/* contrib/pgstattuple/pgstattuple--1.2--1.3.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.3'" to load this file. \quit + +CREATE FUNCTION pgstatbloat(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT free_space BIGINT, -- exact free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'pgstatbloat' +LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple--1.2.sql b/contrib/pgstattuple/pgstattuple--1.3.sql similarity index 73% rename from contrib/pgstattuple/pgstattuple--1.2.sql rename to contrib/pgstattuple/pgstattuple--1.3.sql index e5fa2f5..93593ee 100644 --- a/contrib/pgstattuple/pgstattuple--1.2.sql +++ b/contrib/pgstattuple/pgstattuple--1.3.sql @@ -1,4 +1,4 @@ -/* contrib/pgstattuple/pgstattuple--1.2.sql */ +/* contrib/pgstattuple/pgstattuple--1.3.sql */ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION pgstattuple" to load this file. \quit @@ -77,3 +77,19 @@ CREATE FUNCTION pg_relpages(IN relname regclass) RETURNS BIGINT AS 'MODULE_PATHNAME', 'pg_relpagesbyid' LANGUAGE C STRICT; + +/* New stuff in 1.3 begins here */ + +CREATE FUNCTION pgstatbloat(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT free_space BIGINT, -- exact free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'pgstatbloat' +LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple.control b/contrib/pgstattuple/pgstattuple.control index a7cf47f..c03b180 100644 --- a/contrib/pgstattuple/pgstattuple.control +++ b/contrib/pgstattuple/pgstattuple.control @@ -1,5 +1,5 @@ # pgstattuple extension comment = 'show tuple-level statistics' -default_version = '1.2' +default_version = '1.3' module_pathname = '$libdir/pgstattuple' relocatable = true diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml index 9cabd71..2cd6cdc 100644 --- a/doc/src/sgml/pgstattuple.sgml +++ b/doc/src/sgml/pgstattuple.sgml @@ -358,6 +358,140 @@ pending_tuples | 0 </listitem> </varlistentry> + <varlistentry> + <term> + <indexterm> + <primary>pgstatbloat</primary> + </indexterm> + <function>pgstatbloat(regclass) returns record</> + </term> + + <listitem> + <para> + <function>pgstatbloat</function> is a faster alternative to + <function>pgstattuple</function> that returns approximate results. + The argument is the target relation's OID. + For example: +<programlisting> +test=> SELECT * FROM pgstatbloat('pg_catalog.pg_proc'::regclass); +-[ RECORD 1 ]--------+------- +table_len | 573440 +scanned_percent | 2 +approx_tuple_count | 2740 +approx_tuple_len | 561210 +approx_tuple_percent | 97.87 +dead_tuple_count | 0 +dead_tuple_len | 0 +dead_tuple_percent | 0 +free_space | 11996 +free_percent | 2.09 +</programlisting> + The output columns are described in <xref linkend="pgstatbloat-columns">. + </para> + + <para> + Whereas <function>pgstattuple</function> always performs a + full-table scan and returns an exact count of live and dead tuples + (and their sizes) and free space, <function>pgstatbloat</function> + tries to avoid the full-table scan and returns exact dead tuple and + free space statistics along with an approximation of the number and + size of live tuples. + </para> + + <para> + It does this by skipping pages that have only visible tuples + according to the visibility map (if a page has the corresponding VM + bit set, then it is assumed to contain no dead tuples). For such + pages, it derives the free space value from the free space map, and + assumes that the rest of the space on the page is taken up by live + tuples. + </para> + + <para> + For pages that cannot be skipped, it scans each tuple, recording its + presence and size in the appropriate counters, and adding up the + free space on the page. At the end, it estimates the total number of + live tuples based on the number of pages and tuples scanned (in the + same way that VACUUM estimates pg_class.reltuples). + </para> + + <table id="pgstatbloat-columns"> + <title><function>pgstatbloat</function> Output Columns</title> + <tgroup cols="3"> + <thead> + <row> + <entry>Column</entry> + <entry>Type</entry> + <entry>Description</entry> + </row> + </thead> + + <tbody> + <row> + <entry><structfield>table_len</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Physical relation length in bytes (exact)</entry> + </row> + <row> + <entry><structfield>scanned_percent</structfield></entry> + <entry><type>float8</type></entry> + <entry>Percentage of table scanned</entry> + </row> + <row> + <entry><structfield>tuple_count</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Number of live tuples (estimated)</entry> + </row> + <row> + <entry><structfield>tuple_len</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Total length of live tuples in bytes (estimated)</entry> + </row> + <row> + <entry><structfield>tuple_percent</structfield></entry> + <entry><type>float8</type></entry> + <entry>Percentage of live tuples</entry> + </row> + <row> + <entry><structfield>dead_tuple_count</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Number of dead tuples (exact)</entry> + </row> + <row> + <entry><structfield>dead_tuple_len</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Total length of dead tuples in bytes (exact)</entry> + </row> + <row> + <entry><structfield>dead_tuple_percent</structfield></entry> + <entry><type>float8</type></entry> + <entry>Percentage of dead tuples</entry> + </row> + <row> + <entry><structfield>free_space</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Total free space in bytes (exact)</entry> + </row> + <row> + <entry><structfield>free_percent</structfield></entry> + <entry><type>float8</type></entry> + <entry>Percentage of free space (exact)</entry> + </row> + + </tbody> + </tgroup> + </table> + + <para> + In the above output, the free space figures may not match the + <function>pgstattuple</function> output exactly, because the free + space map gives us an exact figure, but is not guaranteed to be + accurate. + </para> + + </listitem> + </varlistentry> + </variablelist> </sect2> @@ -366,6 +500,7 @@ pending_tuples | 0 <para> Tatsuo Ishii and Satoshi Nagayasu + Abhijit Menon-Sen </para> </sect2> -- 1.9.1
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers