Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-05-12 Thread Abhijit Menon-Sen
At 2015-05-13 03:04:11 +0200, and...@anarazel.de wrote:
>
> I can live with that, although I'd personally go with
> pgstattuple_approx()...

I could live with that too. Here's an incremental patch to rename the
function. (I'm not resubmitting the whole thing since you said you've
made some other changes too.)

Thank you.

-- Abhijit
>From 57d597f176294ebfe30efa6d73569505cbb41e31 Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen 
Date: Wed, 13 May 2015 09:19:07 +0530
Subject: Rename pgstatapprox to pgstattuple_approx

---
 contrib/pgstattuple/pgstatapprox.c|  4 ++--
 contrib/pgstattuple/pgstattuple--1.2--1.3.sql |  4 ++--
 contrib/pgstattuple/pgstattuple--1.3.sql  |  4 ++--
 doc/src/sgml/pgstattuple.sgml | 12 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index 46f5bc0..f3b5671 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -21,7 +21,7 @@
 #include "utils/tqual.h"
 #include "commands/vacuum.h"
 
-PG_FUNCTION_INFO_V1(pgstatapprox);
+PG_FUNCTION_INFO_V1(pgstattuple_approx);
 
 typedef struct output_type
 {
@@ -234,7 +234,7 @@ build_tuple(output_type *stat, FunctionCallInfo fcinfo)
  */
 
 Datum
-pgstatapprox(PG_FUNCTION_ARGS)
+pgstattuple_approx(PG_FUNCTION_ARGS)
 {
 	Oid			relid = PG_GETARG_OID(0);
 	Relation	rel;
diff --git a/contrib/pgstattuple/pgstattuple--1.2--1.3.sql b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql
index bfcf464..99301a2 100644
--- a/contrib/pgstattuple/pgstattuple--1.2--1.3.sql
+++ b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql
@@ -3,7 +3,7 @@
 -- complain if script is sourced in psql, rather than via ALTER EXTENSION
 \echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.3'" to load this file. \quit
 
-CREATE FUNCTION pgstatapprox(IN reloid regclass,
+CREATE FUNCTION pgstattuple_approx(IN reloid regclass,
 OUT table_len BIGINT,   -- physical table length in bytes
 OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned
 OUT approx_tuple_count BIGINT,  -- estimated number of live tuples
@@ -14,5 +14,5 @@ CREATE FUNCTION pgstatapprox(IN reloid regclass,
 OUT dead_tuple_percent FLOAT8,  -- dead tuples in % (based on estimate)
 OUT approx_free_space BIGINT,   -- estimated free space in bytes
 OUT approx_free_percent FLOAT8) -- free space in % (based on estimate)
-AS 'MODULE_PATHNAME', 'pgstatapprox'
+AS 'MODULE_PATHNAME', 'pgstattuple_approx'
 LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple--1.3.sql b/contrib/pgstattuple/pgstattuple--1.3.sql
index f69b619..f3996e7 100644
--- a/contrib/pgstattuple/pgstattuple--1.3.sql
+++ b/contrib/pgstattuple/pgstattuple--1.3.sql
@@ -80,7 +80,7 @@ LANGUAGE C STRICT;
 
 /* New stuff in 1.3 begins here */
 
-CREATE FUNCTION pgstatapprox(IN reloid regclass,
+CREATE FUNCTION pgstattuple_approx(IN reloid regclass,
 OUT table_len BIGINT,   -- physical table length in bytes
 OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned
 OUT approx_tuple_count BIGINT,  -- estimated number of live tuples
@@ -91,5 +91,5 @@ CREATE FUNCTION pgstatapprox(IN reloid regclass,
 OUT dead_tuple_percent FLOAT8,  -- dead tuples in % (based on estimate)
 OUT approx_free_space BIGINT,   -- estimated free space in bytes
 OUT approx_free_percent FLOAT8) -- free space in % (based on estimate)
-AS 'MODULE_PATHNAME', 'pgstatapprox'
+AS 'MODULE_PATHNAME', 'pgstattuple_approx'
 LANGUAGE C STRICT;
diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml
index 4cba006..cca6dc9 100644
--- a/doc/src/sgml/pgstattuple.sgml
+++ b/doc/src/sgml/pgstattuple.sgml
@@ -361,19 +361,19 @@ pending_tuples | 0

 
  
-  pgstatapprox
+  pgstattuple_approx
  
- pgstatapprox(regclass) returns record
+ pgstattuple_approx(regclass) returns record
 
 
 
  
-  pgstatapprox is a faster alternative to
+  pgstattuple_approx is a faster alternative to
   pgstattuple that returns approximate results.
   The argument is the target relation's OID.
   For example:
 
-test=> SELECT * FROM pgstatapprox('pg_catalog.pg_proc'::regclass);
+test=> SELECT * FROM pgstattuple_approx('pg_catalog.pg_proc'::regclass);
 -[ RECORD 1 ]+---
 table_len| 573440
 scanned_percent  | 2
@@ -392,7 +392,7 @@ approx_free_percent  | 2.09
  
  Whereas pgstattuple always performs a
  full-table scan and returns an exact count of live and dead tuples
- (and their sizes) and free space, pgstatapprox
+ (and their sizes) and free space, pgstattuple_approx
  tries to avoid the full-table scan and returns exact dead tuple
  statistics along with an approximation of the number and
  size of live tuples and free space.
@@ -416,7 +416,7 @@ approx_free_percent  | 2.09

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-05-12 Thread Andres Freund
On 2015-05-12 00:42:14 +0530, Abhijit Menon-Sen wrote:
> At 2015-05-11 19:15:47 +0200, and...@anarazel.de wrote:
> > I don't really care how it's named, as long as it makes clear that
> > it's not an exact measurement.
> 
> Not having heard any better suggestions, I picked "pgstatapprox" as a
> compromise between length and familiarity/consistency with pgstattuple.

I can live with that, although I'd personally go with
pgstattuple_approx()...

Other than that and some minor local changes I've made, I'm ready to
commit this.

Greetings,

Andres Freund


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-05-11 Thread Abhijit Menon-Sen
At 2015-05-11 19:15:47 +0200, and...@anarazel.de wrote:
>
> TBH, I'd rather not touch unrelated things right now. We're pretty
> badly behind...

OK. That patch is independent; just ignore it.

> I don't really care how it's named, as long as it makes clear that
> it's not an exact measurement.

Not having heard any better suggestions, I picked "pgstatapprox" as a
compromise between length and familiarity/consistency with pgstattuple.

> > Should I count the space it would have free if it were initialised,
> > but leave the page alone for VACUUM to deal with?

And this is what the attached patch does.

I also cleaned up a few things that I didn't like but had left alone to
make the code look similar to pgstattuple. In particular, build_tuple()
now does nothing but build a tuple from values calculated earlier in
pgstatapprox_heap().

Thank you.

-- Abhijit

P.S. What, if anything, should be done about the complicated and likely
not very useful skip-only-min#-blocks logic in lazy_scan_heap?
>From 0a99fc61d36e3a3ff4003db95e5c299a5f07a850 Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen 
Date: Fri, 26 Dec 2014 12:37:13 +0530
Subject: Add pgstatapprox to pgstattuple

---
 contrib/pgstattuple/Makefile   |   4 +-
 contrib/pgstattuple/pgstatapprox.c | 277 +
 contrib/pgstattuple/pgstattuple--1.2--1.3.sql  |  18 ++
 .../{pgstattuple--1.2.sql => pgstattuple--1.3.sql} |  18 +-
 contrib/pgstattuple/pgstattuple.control|   2 +-
 doc/src/sgml/pgstattuple.sgml  | 135 ++
 6 files changed, 450 insertions(+), 4 deletions(-)
 create mode 100644 contrib/pgstattuple/pgstatapprox.c
 create mode 100644 contrib/pgstattuple/pgstattuple--1.2--1.3.sql
 rename contrib/pgstattuple/{pgstattuple--1.2.sql => pgstattuple--1.3.sql} (72%)

diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 862585c..6083dab 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -1,10 +1,10 @@
 # contrib/pgstattuple/Makefile
 
 MODULE_big	= pgstattuple
-OBJS		= pgstattuple.o pgstatindex.o $(WIN32RES)
+OBJS		= pgstattuple.o pgstatindex.o pgstatapprox.o $(WIN32RES)
 
 EXTENSION = pgstattuple
-DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
 PGFILEDESC = "pgstattuple - tuple-level statistics"
 
 REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
new file mode 100644
index 000..46f5bc0
--- /dev/null
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -0,0 +1,277 @@
+/*
+ * contrib/pgstattuple/pgstatapprox.c
+ * Fast bloat estimation functions
+ */
+
+#include "postgres.h"
+
+#include "access/visibilitymap.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/multixact.h"
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/procarray.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/tqual.h"
+#include "commands/vacuum.h"
+
+PG_FUNCTION_INFO_V1(pgstatapprox);
+
+typedef struct output_type
+{
+	uint64		table_len;
+	uint64		scanned_percent;
+	uint64		tuple_count;
+	uint64		tuple_len;
+	double		tuple_percent;
+	uint64		dead_tuple_count;
+	uint64		dead_tuple_len;
+	double		dead_tuple_percent;
+	uint64		free_space;
+	double		free_percent;
+} output_type;
+
+#define NUM_OUTPUT_COLUMNS 10
+
+/*
+ * This function takes an already open relation and scans its pages,
+ * skipping those that have the corresponding visibility map bit set.
+ * For pages we skip, we find the free space from the free space map
+ * and approximate tuple_len on that basis. For the others, we count
+ * the exact number of dead tuples etc.
+ *
+ * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
+ * we do not try to avoid skipping single pages.
+ */
+
+static void
+pgstatapprox_heap(Relation rel, output_type *stat)
+{
+	BlockNumber scanned,
+nblocks,
+blkno;
+	Buffer		vmbuffer = InvalidBuffer;
+	BufferAccessStrategy bstrategy;
+	TransactionId OldestXmin;
+	uint64		misc_count = 0;
+
+	OldestXmin = GetOldestXmin(rel, true);
+	bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
+	nblocks = RelationGetNumberOfBlocks(rel);
+	scanned = 0;
+
+	for (blkno = 0; blkno < nblocks; blkno++)
+	{
+		Buffer		buf;
+		Page		page;
+		OffsetNumber offnum,
+	maxoff;
+		Size		freespace;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * If the page has only visible tuples, then we can find out the
+		 * free space from the FSM and move on.
+		 */
+
+		if (visibilitymap_test(rel, blkno, &vmbuffer))
+		{
+			freespace = GetRecordedFreeSpace(rel, blkno);
+			stat->tuple_len += BLCKSZ - freespace;
+			stat->free_space += freespa

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-05-11 Thread Andres Freund
Hi,

On 2015-05-11 16:57:08 +0530, Abhijit Menon-Sen wrote:
> I've attached an updated patch for pgstatbloat, as well as a patch to
> replace two uses of BuildTupleFromCStrings() elsewhere in pgstattuple.

TBH, I'd rather not touch unrelated things right now. We're pretty badly
behind...

> I've made the changes you mentioned in your earlier mail, except that I
> have not changed the name pending further suggestions about what would
> be the best name.

I don't really care how it's named, as long as it makes clear that it's
not an exact measurement.

> > > I haven't checked, but I'm not sure that it's safe/meaningful to
> > > call PageGetHeapFreeSpace() on a new page.
> > 
> > OK, I'll check and fix if necessary.
> 
> You're right, PageGetHeapFreeSpace() isn't safe on a new page. I've
> added a guard to that call in the attached patch, but I'm not sure
> that's the right thing to do.

> Should I copy the orphaned new-page handling from lazy_scan_heap? What
> about for empty pages? Neither feels like a very good thing to do, but
> then neither does skipping the new page silently. Should I count the
> space it would have free if it were initialised, but leave the page
> alone for VACUUM to deal with? Or just leave it as it is?

I think there's absolutely no need for pgstattuple to do anything
here. It's not even desirable.

> + LockBuffer(buf, BUFFER_LOCK_SHARE);
> +
> + page = BufferGetPage(buf);
> +
> + if (!PageIsNew(page))
> + stat->free_space += PageGetHeapFreeSpace(page);
> +
> + if (PageIsNew(page) || PageIsEmpty(page))
> + {
> + UnlockReleaseBuffer(buf);
> + continue;
> + }

Shouldn't new pages be counted as being fully free or at least bloat?
Just disregarding them seems wrong.

Greetings,

Andres Freund


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-05-11 Thread Abhijit Menon-Sen
Hi Andres.

I've attached an updated patch for pgstatbloat, as well as a patch to
replace two uses of BuildTupleFromCStrings() elsewhere in pgstattuple.

I've made the changes you mentioned in your earlier mail, except that I
have not changed the name pending further suggestions about what would
be the best name.

Also:

At 2015-05-09 15:36:49 +0530, a...@2ndquadrant.com wrote:
>
> At 2015-05-09 02:20:51 +0200, and...@anarazel.de wrote:
> >
> > I haven't checked, but I'm not sure that it's safe/meaningful to
> > call PageGetHeapFreeSpace() on a new page.
> 
> OK, I'll check and fix if necessary.

You're right, PageGetHeapFreeSpace() isn't safe on a new page. I've
added a guard to that call in the attached patch, but I'm not sure
that's the right thing to do.

Should I copy the orphaned new-page handling from lazy_scan_heap? What
about for empty pages? Neither feels like a very good thing to do, but
then neither does skipping the new page silently. Should I count the
space it would have free if it were initialised, but leave the page
alone for VACUUM to deal with? Or just leave it as it is?

-- Abhijit
>From 421267bba8394255feed8f9b9424d25d0be9f979 Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen 
Date: Mon, 11 May 2015 15:54:48 +0530
Subject: Make pgstattuple use heap_form_tuple instead of
 BuildTupleFromCStrings

---
 contrib/pgstattuple/pgstatindex.c | 43 ++---
 contrib/pgstattuple/pgstattuple.c | 45 ++-
 2 files changed, 37 insertions(+), 51 deletions(-)

diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c
index a2ea5d7..608f729 100644
--- a/contrib/pgstattuple/pgstatindex.c
+++ b/contrib/pgstattuple/pgstatindex.c
@@ -257,7 +257,8 @@ pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo)
 	{
 		TupleDesc	tupleDesc;
 		int			j;
-		char	   *values[10];
+		Datum		values[10];
+		bool		nulls[10];
 		HeapTuple	tuple;
 
 		/* Build a tuple descriptor for our result type */
@@ -265,33 +266,31 @@ pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo)
 			elog(ERROR, "return type must be a row type");
 
 		j = 0;
-		values[j++] = psprintf("%d", indexStat.version);
-		values[j++] = psprintf("%d", indexStat.level);
-		values[j++] = psprintf(INT64_FORMAT,
-			   (indexStat.root_pages +
-indexStat.leaf_pages +
-indexStat.internal_pages +
-indexStat.deleted_pages +
-indexStat.empty_pages) * BLCKSZ);
-		values[j++] = psprintf("%u", indexStat.root_blkno);
-		values[j++] = psprintf(INT64_FORMAT, indexStat.internal_pages);
-		values[j++] = psprintf(INT64_FORMAT, indexStat.leaf_pages);
-		values[j++] = psprintf(INT64_FORMAT, indexStat.empty_pages);
-		values[j++] = psprintf(INT64_FORMAT, indexStat.deleted_pages);
+		values[j++] = Int32GetDatum(indexStat.version);
+		values[j++] = Int32GetDatum(indexStat.level);
+		values[j++] = Int64GetDatum((indexStat.root_pages +
+	 indexStat.leaf_pages +
+	 indexStat.internal_pages +
+	 indexStat.deleted_pages +
+	 indexStat.empty_pages) * BLCKSZ);
+		values[j++] = Int32GetDatum(indexStat.root_blkno);
+		values[j++] = Int32GetDatum(indexStat.internal_pages);
+		values[j++] = Int32GetDatum(indexStat.leaf_pages);
+		values[j++] = Int32GetDatum(indexStat.empty_pages);
+		values[j++] = Int32GetDatum(indexStat.deleted_pages);
 		if (indexStat.max_avail > 0)
-			values[j++] = psprintf("%.2f",
-   100.0 - (double) indexStat.free_space / (double) indexStat.max_avail * 100.0);
+			values[j++] = Float8GetDatum(100.0 - (double) indexStat.free_space / (double) indexStat.max_avail * 100.0);
 		else
-			values[j++] = pstrdup("NaN");
+			values[j++] = CStringGetDatum("NaN");
 		if (indexStat.leaf_pages > 0)
-			values[j++] = psprintf("%.2f",
-   (double) indexStat.fragments / (double) indexStat.leaf_pages * 100.0);
+			values[j++] = Float8GetDatum((double) indexStat.fragments / (double) indexStat.leaf_pages * 100.0);
 		else
-			values[j++] = pstrdup("NaN");
+			values[j++] = CStringGetDatum("NaN");
 
-		tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
-	   values);
+		for (j = 0; j < 10; j++)
+			nulls[j] = false;
 
+		tuple = heap_form_tuple(tupleDesc, values, nulls);
 		result = HeapTupleGetDatum(tuple);
 	}
 
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
index c3a8b1d..552f188 100644
--- a/contrib/pgstattuple/pgstattuple.c
+++ b/contrib/pgstattuple/pgstattuple.c
@@ -85,28 +85,20 @@ static Datum
 build_pgstattuple_type(pgstattuple_type *stat, FunctionCallInfo fcinfo)
 {
 #define NCOLUMNS	9
-#define NCHARS		32
 
 	HeapTuple	tuple;
-	char	   *values[NCOLUMNS];
-	char		values_buf[NCOLUMNS][NCHARS];
+	Datum		values[NCOLUMNS];
+	bool		nulls[NCOLUMNS];
 	int			i;
 	double		tuple_percent;
 	double		dead_tuple_percent;
 	double		free_percent;	/* free/reusable space in % */
 	TupleDesc	tupdesc;
-	AttInMetadata *attinmeta;
 
 	/* Build a tuple descriptor for our 

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-05-09 Thread Abhijit Menon-Sen
At 2015-05-09 02:20:51 +0200, and...@anarazel.de wrote:
>
> > + * Abhijit Menon-Sen 
> > + * Portions Copyright (c) 2001,2002Tatsuo Ishii (from pgstattuple)
> 
> I think for new extension we don't really add authors and such anymore.

OK, I'll get rid of the boilerplate.

> Hm. I do wonder if this should really be called 'statbloat'. Perhaps
> it'd more appropriately be called pg_estimate_bloat or somesuch?

Since we've moved it into pgstattuple, perhaps pgstattuple_approximate()
or pgstattuple_estimated() would be a better name. I don't really care,
I'll change it to whatever people like.

> Also, is free_space really exact? The fsm isn't byte exact.

You're right, I'll fix that.

> Why go through C strings?  I personally think we really shouldn't add
> more callers to BuildTupleFromCStrings, it's such an absurd interface.

I just copied this more or less blindly from pgstattuple. I'll fix it
and submit a separate patch to fix the equivalent code in pgstattuple.

> I think it'd actually be fine to just say that the relation has to be
> a table or matview.

Good point. Agreed.

> I don't believe that rationale is really true these days. I'd much
> rather just rip this out here than copy the rather complex logic.

Yes, thanks, I very much agree that this isn't really worth copying.
I'll drop it in my next submission.

> I haven't checked, but I'm not sure that it's safe/meaningful to call
> PageGetHeapFreeSpace() on a new page.

OK, I'll check and fix if necessary.

Thanks for the feedback. I'll submit a revised patch shortly.

-- Abhijit


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-05-08 Thread Andres Freund
Hi,

On 2015-04-24 08:46:48 +0530, Abhijit Menon-Sen wrote:
> diff --git a/contrib/pgstattuple/pgstatbloat.c 
> b/contrib/pgstattuple/pgstatbloat.c
> new file mode 100644
> index 000..612e22b
> --- /dev/null
> +++ b/contrib/pgstattuple/pgstatbloat.c
> @@ -0,0 +1,389 @@
> +/*
> + * contrib/pgstattuple/pgstatbloat.c
> + *
> + * Abhijit Menon-Sen 
> + * Portions Copyright (c) 2001,2002  Tatsuo Ishii (from pgstattuple)

I think for new extension we don't really add authors and such anymore.

> + * Permission to use, copy, modify, and distribute this software and
> + * its documentation for any purpose, without fee, and without a
> + * written agreement is hereby granted, provided that the above
> + * copyright notice and this paragraph and the following two
> + * paragraphs appear in all copies.
> + *
> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
> + * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
> + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
> + * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
> + * OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
> + * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
> + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
> + */

Shouldn't be here in a contrib module.

> +PG_FUNCTION_INFO_V1(pgstatbloat);

> +CREATE FUNCTION pgstatbloat(IN reloid regclass,
> +OUT table_len BIGINT,   -- physical table length in bytes
> +OUT scanned_percent FLOAT8, -- what percentage of the table's 
> pages was scanned
> +OUT approx_tuple_count BIGINT,  -- estimated number of live tuples
> +OUT approx_tuple_len BIGINT,-- estimated total length in bytes 
> of live tuples
> +OUT approx_tuple_percent FLOAT8,-- live tuples in % (based on 
> estimate)
> +OUT dead_tuple_count BIGINT,-- exact number of dead tuples
> +OUT dead_tuple_len BIGINT,  -- exact total length in bytes of 
> dead tuples
> +OUT dead_tuple_percent FLOAT8,  -- dead tuples in % (based on 
> estimate)
> +OUT free_space BIGINT,  -- exact free space in bytes
> +OUT free_percent FLOAT8)-- free space in %

Hm. I do wonder if this should really be called 'statbloat'. Perhaps
it'd more appropriately be called pg_estimate_bloat or somesuch?

Also, is free_space really exact? The fsm isn't byte exact.

> +static Datum
> +build_output_type(pgstatbloat_output_type *stat, FunctionCallInfo fcinfo)
> +{
> +#define NCOLUMNS 10
> +#define NCHARS   32
> +
> + HeapTuple   tuple;
> + char   *values[NCOLUMNS];
> + charvalues_buf[NCOLUMNS][NCHARS];
> + int i;
> + double  tuple_percent;
> + double  dead_tuple_percent;
> + double  free_percent;   /* free/reusable space in % */
> + double  scanned_percent;
> + TupleDesc   tupdesc;
> + AttInMetadata *attinmeta;
> +
> + /* Build a tuple descriptor for our result type */
> + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
> + elog(ERROR, "return type must be a row type");
> +
> + /*
> +  * Generate attribute metadata needed later to produce tuples from raw C
> +  * strings
> +  */
> + attinmeta = TupleDescGetAttInMetadata(tupdesc);
> +
> + if (stat->table_len == 0)
> + {
> + tuple_percent = 0.0;
> + dead_tuple_percent = 0.0;
> + free_percent = 0.0;
> + }
> + else
> + {
> + tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
> + dead_tuple_percent = 100.0 * stat->dead_tuple_len / 
> stat->table_len;
> + free_percent = 100.0 * stat->free_space / stat->table_len;
> + }
> +
> + scanned_percent = 0.0;
> + if (stat->total_pages != 0)
> + {
> + scanned_percent = 100 * stat->scanned_pages / stat->total_pages;
> + }
> +
> + for (i = 0; i < NCOLUMNS; i++)
> + values[i] = values_buf[i];
> + i = 0;
> + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->table_len);
> + snprintf(values[i++], NCHARS, "%.2f", scanned_percent);
> + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_count);
> + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_len);
> + snprintf(values[i++], NCHARS, "%.2f", tuple_percent);
> + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_count);
> + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_len);
> + snprintf(values[i++], NCHARS, "%.2f", dead_tuple_percent);
> + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->free_space);
> + snprintf(values[

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-04-25 Thread Amit Kapila
On Fri, Apr 24, 2015 at 8:05 PM, Tomas Vondra 
wrote:
>
>
>
> On 04/24/15 14:58, Amit Kapila wrote:
>>
>> On Fri, Apr 24, 2015 at 8:46 AM, Abhijit Menon-Sen > > wrote:
>>  >
>>  > At 2015-04-24 08:35:40 +0530, amit.kapil...@gmail.com
>>  wrote:
>>  > >
>>  > > > Just stick a PG_RETURN_NULL() at the end?
>>  > >
>>  > > That should also work.
>>  >
>>  > OK, updated patch attached with just that one change.
>>  >
>>
>> Patch looks good to me, will mark it as Ready for Committer if Tomas
>> or anyone else doesn't have more to add in terms of review.
>
>
> FWIW, I'm OK with the patch as is. Your reviews were spot on.
>

Thanks, I have marked this patch as Ready For Committer.


With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-04-24 Thread Tomas Vondra



On 04/24/15 14:58, Amit Kapila wrote:

On Fri, Apr 24, 2015 at 8:46 AM, Abhijit Menon-Sen mailto:a...@2ndquadrant.com>> wrote:
 >
 > At 2015-04-24 08:35:40 +0530, amit.kapil...@gmail.com
 wrote:
 > >
 > > > Just stick a PG_RETURN_NULL() at the end?
 > >
 > > That should also work.
 >
 > OK, updated patch attached with just that one change.
 >

Patch looks good to me, will mark it as Ready for Committer if Tomas
or anyone else doesn't have more to add in terms of review.


FWIW, I'm OK with the patch as is. Your reviews were spot on.

regards

--
Tomas Vondra   http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-04-24 Thread Amit Kapila
On Fri, Apr 24, 2015 at 8:46 AM, Abhijit Menon-Sen 
wrote:
>
> At 2015-04-24 08:35:40 +0530, amit.kapil...@gmail.com wrote:
> >
> > > Just stick a PG_RETURN_NULL() at the end?
> >
> > That should also work.
>
> OK, updated patch attached with just that one change.
>

Patch looks good to me, will mark it as Ready for Committer if Tomas
or anyone else doesn't have more to add in terms of review.

With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-04-23 Thread Abhijit Menon-Sen
At 2015-04-24 08:35:40 +0530, amit.kapil...@gmail.com wrote:
>
> > Just stick a PG_RETURN_NULL() at the end?
> 
> That should also work.

OK, updated patch attached with just that one change.

I'm not doing anything about the rename. I don't know if it's possible
to make patch(1) rename a file at all (it wasn't, but maybe something
has changed recently?).

Note to friendly neighbourhood committers: the patch is expected to
rename pgstattuple--1.2.sql to pgstattuple--1.3.sql, which it does
if applied using git apply.

-- Abhijit
>From f67ddc68ab03fe8237058dcdca1e410ef9e95b5e Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen 
Date: Fri, 26 Dec 2014 12:37:13 +0530
Subject: Add pgstatbloat to pgstattuple

---
 contrib/pgstattuple/Makefile   |   4 +-
 contrib/pgstattuple/pgstatbloat.c  | 389 +
 contrib/pgstattuple/pgstattuple--1.2--1.3.sql  |  18 +
 .../{pgstattuple--1.2.sql => pgstattuple--1.3.sql} |  18 +-
 contrib/pgstattuple/pgstattuple.control|   2 +-
 doc/src/sgml/pgstattuple.sgml  | 135 +++
 6 files changed, 562 insertions(+), 4 deletions(-)
 create mode 100644 contrib/pgstattuple/pgstatbloat.c
 create mode 100644 contrib/pgstattuple/pgstattuple--1.2--1.3.sql
 rename contrib/pgstattuple/{pgstattuple--1.2.sql => pgstattuple--1.3.sql} (73%)

diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 862585c..d7d27a5 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -1,10 +1,10 @@
 # contrib/pgstattuple/Makefile
 
 MODULE_big	= pgstattuple
-OBJS		= pgstattuple.o pgstatindex.o $(WIN32RES)
+OBJS		= pgstattuple.o pgstatindex.o pgstatbloat.o $(WIN32RES)
 
 EXTENSION = pgstattuple
-DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
 PGFILEDESC = "pgstattuple - tuple-level statistics"
 
 REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/pgstatbloat.c b/contrib/pgstattuple/pgstatbloat.c
new file mode 100644
index 000..612e22b
--- /dev/null
+++ b/contrib/pgstattuple/pgstatbloat.c
@@ -0,0 +1,389 @@
+/*
+ * contrib/pgstattuple/pgstatbloat.c
+ *
+ * Abhijit Menon-Sen 
+ * Portions Copyright (c) 2001,2002	Tatsuo Ishii (from pgstattuple)
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose, without fee, and without a
+ * written agreement is hereby granted, provided that the above
+ * copyright notice and this paragraph and the following two
+ * paragraphs appear in all copies.
+ *
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
+ * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
+ * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
+ * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
+ * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "postgres.h"
+
+#include "access/visibilitymap.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/multixact.h"
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/procarray.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/tqual.h"
+#include "commands/vacuum.h"
+
+PG_FUNCTION_INFO_V1(pgstatbloat);
+
+/*
+ * tuple_percent, dead_tuple_percent and free_percent are computable,
+ * so not defined here.
+ */
+typedef struct pgstatbloat_output_type
+{
+	uint64		table_len;
+	uint64		tuple_count;
+	uint64		misc_count;
+	uint64		tuple_len;
+	uint64		dead_tuple_count;
+	uint64		dead_tuple_len;
+	uint64		free_space;
+	uint64		total_pages;
+	uint64		scanned_pages;
+} pgstatbloat_output_type;
+
+static Datum build_output_type(pgstatbloat_output_type *stat,
+			   FunctionCallInfo fcinfo);
+static Datum pgstatbloat_heap(Relation rel, FunctionCallInfo fcinfo);
+
+/*
+ * build a pgstatbloat_output_type tuple
+ */
+static Datum
+build_output_type(pgstatbloat_output_type *stat, FunctionCallInfo fcinfo)
+{
+#define NCOLUMNS	10
+#define NCHARS		32
+
+	HeapTuple	tuple;
+	char	   *values[NCOLUMNS];
+	char		values_buf[NCOLUMNS][NCHARS];
+	int			i;
+	double		tuple_percent;
+	double		dead_tuple_percent;
+	double		free_percent;	/* free/reusable space in % */
+	double		scanned_percent;
+	TupleDesc	tupdesc;
+	AttInMetadata *attinmeta;
+
+	/* Build a tuple descriptor for our result type */
+	if (get_cal

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-04-23 Thread Amit Kapila
On Fri, Apr 24, 2015 at 8:04 AM, Abhijit Menon-Sen 
wrote:
>
> At 2015-04-24 07:22:27 +0530, amit.kapil...@gmail.com wrote:
> >
> > Few minor issues:
> > 1.
> > warning on windows
> >
> > \pgstatbloat.c(193): warning C4715: 'pgstatbloat' : not all control
paths
> > return a value
>
> This is because the function ends with an ereport(ERROR, …). What would
> you suggest here?

For similar code in function (pgstattuple.c->pgstat_relation()), it simply
return 0;

> Just stick a PG_RETURN_NULL() at the end?
>

That should also work.

> > 2.
> > Failed to install pgstattuple--1.3.sql
> >
> > You have to name sql file as pgstattuple--1.3.sql rather
> > than pgstattuple--1.2.sql
>
> How are you applying the patch? It already contains this:
>
> diff --git a/contrib/pgstattuple/pgstattuple--1.2.sql
b/contrib/pgstattuple/pgstattuple--1.3.sql
> similarity index 73%
> rename from contrib/pgstattuple/pgstattuple--1.2.sql
> rename to contrib/pgstattuple/pgstattuple--1.3.sql
> index e5fa2f5..93593ee 100644
> --- a/contrib/pgstattuple/pgstattuple--1.2.sql
> +++ b/contrib/pgstattuple/pgstattuple--1.3.sql
>
> So it should do the right thing with git apply.
>

I was using patch -p1 ...


With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-04-23 Thread Abhijit Menon-Sen
At 2015-04-24 07:22:27 +0530, amit.kapil...@gmail.com wrote:
>
> Few minor issues:
> 1.
> warning on windows
> 
> \pgstatbloat.c(193): warning C4715: 'pgstatbloat' : not all control paths
> return a value

This is because the function ends with an ereport(ERROR, …). What would
you suggest here? Just stick a PG_RETURN_NULL() at the end?

> 2.
> Failed to install pgstattuple--1.3.sql
> 
> You have to name sql file as pgstattuple--1.3.sql rather
> than pgstattuple--1.2.sql

How are you applying the patch? It already contains this:

diff --git a/contrib/pgstattuple/pgstattuple--1.2.sql 
b/contrib/pgstattuple/pgstattuple--1.3.sql
similarity index 73%
rename from contrib/pgstattuple/pgstattuple--1.2.sql
rename to contrib/pgstattuple/pgstattuple--1.3.sql
index e5fa2f5..93593ee 100644
--- a/contrib/pgstattuple/pgstattuple--1.2.sql
+++ b/contrib/pgstattuple/pgstattuple--1.3.sql

So it should do the right thing with git apply.

-- Abhijit


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-04-23 Thread Amit Kapila
On Wed, Apr 22, 2015 at 6:33 PM, Abhijit Menon-Sen 
wrote:
>
> At 2015-04-18 12:28:36 +0530, amit.kapil...@gmail.com wrote:
> >
> > I think you have missed to address the below point:
> >
> > >> 4) prefetch
>
> Updated patch attached, as well as the diff against the earlier version
> just to make it easier to see the exact change I made (which is to copy
> the skip logic from lazy_scan_heap, as you suggested).
>

Few minor issues:
1.
warning on windows

\pgstatbloat.c(193): warning C4715: 'pgstatbloat' : not all control paths
return a value

2.
Failed to install pgstattuple--1.3.sql

You have to name sql file as pgstattuple--1.3.sql rather
than pgstattuple--1.2.sql


Other than above 2 points, patch looks good to me.



With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-04-22 Thread Abhijit Menon-Sen
At 2015-04-18 12:28:36 +0530, amit.kapil...@gmail.com wrote:
>
> I think you have missed to address the below point:
> 
> >> 4) prefetch

Updated patch attached, as well as the diff against the earlier version
just to make it easier to see the exact change I made (which is to copy
the skip logic from lazy_scan_heap, as you suggested).

I'm not entirely convinced that this change is worthwhile, but it's easy
to make and difficult to argue against, so here it is. (I did test, and
it seems to work OK after the change.)

Amit, Tomas, thanks again for your review comments.

-- Abhijit
>From 3edb5426292d6097cb66339b865e99bf4f766646 Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen 
Date: Fri, 26 Dec 2014 12:37:13 +0530
Subject: Add pgstatbloat to pgstattuple

---
 contrib/pgstattuple/Makefile   |   4 +-
 contrib/pgstattuple/pgstatbloat.c  | 387 +
 contrib/pgstattuple/pgstattuple--1.2--1.3.sql  |  18 +
 .../{pgstattuple--1.2.sql => pgstattuple--1.3.sql} |  18 +-
 contrib/pgstattuple/pgstattuple.control|   2 +-
 doc/src/sgml/pgstattuple.sgml  | 135 +++
 6 files changed, 560 insertions(+), 4 deletions(-)
 create mode 100644 contrib/pgstattuple/pgstatbloat.c
 create mode 100644 contrib/pgstattuple/pgstattuple--1.2--1.3.sql
 rename contrib/pgstattuple/{pgstattuple--1.2.sql => pgstattuple--1.3.sql} (73%)

diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 862585c..d7d27a5 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -1,10 +1,10 @@
 # contrib/pgstattuple/Makefile
 
 MODULE_big	= pgstattuple
-OBJS		= pgstattuple.o pgstatindex.o $(WIN32RES)
+OBJS		= pgstattuple.o pgstatindex.o pgstatbloat.o $(WIN32RES)
 
 EXTENSION = pgstattuple
-DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
 PGFILEDESC = "pgstattuple - tuple-level statistics"
 
 REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/pgstatbloat.c b/contrib/pgstattuple/pgstatbloat.c
new file mode 100644
index 000..b19459a
--- /dev/null
+++ b/contrib/pgstattuple/pgstatbloat.c
@@ -0,0 +1,387 @@
+/*
+ * contrib/pgstattuple/pgstatbloat.c
+ *
+ * Abhijit Menon-Sen 
+ * Portions Copyright (c) 2001,2002	Tatsuo Ishii (from pgstattuple)
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose, without fee, and without a
+ * written agreement is hereby granted, provided that the above
+ * copyright notice and this paragraph and the following two
+ * paragraphs appear in all copies.
+ *
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
+ * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
+ * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
+ * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
+ * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "postgres.h"
+
+#include "access/visibilitymap.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/multixact.h"
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/procarray.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/tqual.h"
+#include "commands/vacuum.h"
+
+PG_FUNCTION_INFO_V1(pgstatbloat);
+
+/*
+ * tuple_percent, dead_tuple_percent and free_percent are computable,
+ * so not defined here.
+ */
+typedef struct pgstatbloat_output_type
+{
+	uint64		table_len;
+	uint64		tuple_count;
+	uint64		misc_count;
+	uint64		tuple_len;
+	uint64		dead_tuple_count;
+	uint64		dead_tuple_len;
+	uint64		free_space;
+	uint64		total_pages;
+	uint64		scanned_pages;
+} pgstatbloat_output_type;
+
+static Datum build_output_type(pgstatbloat_output_type *stat,
+			   FunctionCallInfo fcinfo);
+static Datum pgstatbloat_heap(Relation rel, FunctionCallInfo fcinfo);
+
+/*
+ * build a pgstatbloat_output_type tuple
+ */
+static Datum
+build_output_type(pgstatbloat_output_type *stat, FunctionCallInfo fcinfo)
+{
+#define NCOLUMNS	10
+#define NCHARS		32
+
+	HeapTuple	tuple;
+	char	   *values[NCOLUMNS];
+	char		values_buf[NCOLUMNS][NCHARS];
+	int			i;
+	double		tuple_percent;
+	double		dead_tuple_percent;
+	double		free_percent;	/* free/reusable space in % */
+	double		scanned_percent;
+	TupleDesc	tupdesc;
+	AttInMetadata *attinmeta;
+
+	/* Build a 

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-04-17 Thread Amit Kapila
On Fri, Apr 3, 2015 at 9:07 PM, Abhijit Menon-Sen 
wrote:
>
> At 2015-03-31 22:43:49 +0530, a...@2ndquadrant.com wrote:
> >
> > I'm just posting this WIP patch where I've renamed fastbloat to
> > pgstatbloat as suggested by Tomas, and added in the documentation, and
> > so on.
>
> Here's the revised version that also adds the count of RECENTLY_DEAD and
> INSERT/DELETE_IN_PROGRESS tuples to the call to vac_estimate_reltuples.
>

I think you have missed to address the below point:

>> 4) prefetch
>>
>>fbstat_heap is using visibility map to skip fully-visible pages,
>>which is nice, but if we skip too many pages it breaks readahead
>>similarly to bitmap heap scan. I believe this is another place where
>>effective_io_concurrency (i.e. prefetch) would be appropriate.
>>

> Good point.  We can even think of using the technique used by Vacuum
> which is skip only when we can skip atleast SKIP_PAGES_THRESHOLD
> pages.


Apart from this, one minor point:
+typedef struct pgstatbloat_output_type
+{
+ uint64 table_len;
+ uint64 tuple_count;
+ uint64 misc_count;


misc_count sounds out of place, may be non_live_count?

With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-04-03 Thread Abhijit Menon-Sen
At 2015-03-31 22:43:49 +0530, a...@2ndquadrant.com wrote:
>
> I'm just posting this WIP patch where I've renamed fastbloat to
> pgstatbloat as suggested by Tomas, and added in the documentation, and
> so on.

Here's the revised version that also adds the count of RECENTLY_DEAD and
INSERT/DELETE_IN_PROGRESS tuples to the call to vac_estimate_reltuples.

Tomas, I agree that the build_output_tuple function could use cleaning
up, but it's the same as the corresponding code in pgstattuple, and it
seems to me reasonable to clean both up together in a separate patch.

-- Abhijit
>From d11b84627438fca12955dfad77042be0a27c9acc Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen 
Date: Fri, 26 Dec 2014 12:37:13 +0530
Subject: Add pgstatbloat to pgstattuple

---
 contrib/pgstattuple/Makefile   |   4 +-
 contrib/pgstattuple/pgstatbloat.c  | 340 +
 contrib/pgstattuple/pgstattuple--1.2--1.3.sql  |  18 ++
 .../{pgstattuple--1.2.sql => pgstattuple--1.3.sql} |  18 +-
 contrib/pgstattuple/pgstattuple.control|   2 +-
 doc/src/sgml/pgstattuple.sgml  | 135 
 6 files changed, 513 insertions(+), 4 deletions(-)
 create mode 100644 contrib/pgstattuple/pgstatbloat.c
 create mode 100644 contrib/pgstattuple/pgstattuple--1.2--1.3.sql
 rename contrib/pgstattuple/{pgstattuple--1.2.sql => pgstattuple--1.3.sql} (73%)

diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 862585c..d7d27a5 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -1,10 +1,10 @@
 # contrib/pgstattuple/Makefile
 
 MODULE_big	= pgstattuple
-OBJS		= pgstattuple.o pgstatindex.o $(WIN32RES)
+OBJS		= pgstattuple.o pgstatindex.o pgstatbloat.o $(WIN32RES)
 
 EXTENSION = pgstattuple
-DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
 PGFILEDESC = "pgstattuple - tuple-level statistics"
 
 REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/pgstatbloat.c b/contrib/pgstattuple/pgstatbloat.c
new file mode 100644
index 000..2dd1900
--- /dev/null
+++ b/contrib/pgstattuple/pgstatbloat.c
@@ -0,0 +1,340 @@
+/*
+ * contrib/pgstattuple/pgstatbloat.c
+ *
+ * Abhijit Menon-Sen 
+ * Portions Copyright (c) 2001,2002	Tatsuo Ishii (from pgstattuple)
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose, without fee, and without a
+ * written agreement is hereby granted, provided that the above
+ * copyright notice and this paragraph and the following two
+ * paragraphs appear in all copies.
+ *
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
+ * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
+ * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
+ * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
+ * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "postgres.h"
+
+#include "access/visibilitymap.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/multixact.h"
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/procarray.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/tqual.h"
+#include "commands/vacuum.h"
+
+PG_FUNCTION_INFO_V1(pgstatbloat);
+
+/*
+ * tuple_percent, dead_tuple_percent and free_percent are computable,
+ * so not defined here.
+ */
+typedef struct pgstatbloat_output_type
+{
+	uint64		table_len;
+	uint64		tuple_count;
+	uint64		misc_count;
+	uint64		tuple_len;
+	uint64		dead_tuple_count;
+	uint64		dead_tuple_len;
+	uint64		free_space;
+	uint64		total_pages;
+	uint64		scanned_pages;
+} pgstatbloat_output_type;
+
+static Datum build_output_type(pgstatbloat_output_type *stat,
+			   FunctionCallInfo fcinfo);
+static Datum pgstatbloat_heap(Relation rel, FunctionCallInfo fcinfo);
+
+/*
+ * build a pgstatbloat_output_type tuple
+ */
+static Datum
+build_output_type(pgstatbloat_output_type *stat, FunctionCallInfo fcinfo)
+{
+#define NCOLUMNS	10
+#define NCHARS		32
+
+	HeapTuple	tuple;
+	char	   *values[NCOLUMNS];
+	char		values_buf[NCOLUMNS][NCHARS];
+	int			i;
+	double		tuple_percent;
+	double		dead_tuple_percent;
+	double		free_percent;	/* free/reusable space in % */
+	double		scanned_percent;
+	TupleDesc	tupdesc;
+	AttInMetadata *attinmeta;
+
+	/* Build a tuple 

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-03-31 Thread Abhijit Menon-Sen
Hi.

I'm just posting this WIP patch where I've renamed fastbloat to
pgstatbloat as suggested by Tomas, and added in the documentation, and
so on. I still have to incorporate Amit's comments about the estimation
of reltuples according to the way vacuum does it, and I expect to post
that tomorrow (I just need to test a little more).

In the meantime, if anyone else was having trouble installing the
extension due to the incorrect version in the control file, this is the
patch you should be using.

-- Abhijit
>From f809e070e8ea13b74c6206ca67a7eaf2a32e60fa Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen 
Date: Fri, 26 Dec 2014 12:37:13 +0530
Subject: Add pgstatbloat to pgstattuple

---
 contrib/pgstattuple/Makefile   |   4 +-
 contrib/pgstattuple/pgstatbloat.c  | 346 +
 contrib/pgstattuple/pgstattuple--1.2--1.3.sql  |  18 ++
 .../{pgstattuple--1.2.sql => pgstattuple--1.3.sql} |  18 +-
 contrib/pgstattuple/pgstattuple.control|   2 +-
 doc/src/sgml/pgstattuple.sgml  | 135 
 6 files changed, 519 insertions(+), 4 deletions(-)
 create mode 100644 contrib/pgstattuple/pgstatbloat.c
 create mode 100644 contrib/pgstattuple/pgstattuple--1.2--1.3.sql
 rename contrib/pgstattuple/{pgstattuple--1.2.sql => pgstattuple--1.3.sql} (73%)

diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 862585c..d7d27a5 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -1,10 +1,10 @@
 # contrib/pgstattuple/Makefile
 
 MODULE_big	= pgstattuple
-OBJS		= pgstattuple.o pgstatindex.o $(WIN32RES)
+OBJS		= pgstattuple.o pgstatindex.o pgstatbloat.o $(WIN32RES)
 
 EXTENSION = pgstattuple
-DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
 PGFILEDESC = "pgstattuple - tuple-level statistics"
 
 REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/pgstatbloat.c b/contrib/pgstattuple/pgstatbloat.c
new file mode 100644
index 000..15c2cb9
--- /dev/null
+++ b/contrib/pgstattuple/pgstatbloat.c
@@ -0,0 +1,346 @@
+/*
+ * contrib/pgstattuple/pgstatbloat.c
+ *
+ * Abhijit Menon-Sen 
+ * Portions Copyright (c) 2001,2002	Tatsuo Ishii (from pg_stattuple)
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose, without fee, and without a
+ * written agreement is hereby granted, provided that the above
+ * copyright notice and this paragraph and the following two
+ * paragraphs appear in all copies.
+ *
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
+ * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
+ * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
+ * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
+ * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "postgres.h"
+
+#include "access/visibilitymap.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/multixact.h"
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/procarray.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/tqual.h"
+#include "commands/vacuum.h"
+
+PG_FUNCTION_INFO_V1(pgstatbloat);
+
+/*
+ * tuple_percent, dead_tuple_percent and free_percent are computable,
+ * so not defined here.
+ */
+typedef struct pgstatbloat_output_type
+{
+	uint64		table_len;
+	uint64		tuple_count;
+	uint64		tuple_len;
+	uint64		dead_tuple_count;
+	uint64		dead_tuple_len;
+	uint64		free_space;
+	uint64		total_pages;
+	uint64		scanned_pages;
+} pgstatbloat_output_type;
+
+static Datum build_output_type(pgstatbloat_output_type *stat,
+			   FunctionCallInfo fcinfo);
+static Datum fbstat_relation(Relation rel, FunctionCallInfo fcinfo);
+static Datum fbstat_heap(Relation rel, FunctionCallInfo fcinfo);
+
+/*
+ * build a pgstatbloat_output_type tuple
+ */
+static Datum
+build_output_type(pgstatbloat_output_type *stat, FunctionCallInfo fcinfo)
+{
+#define NCOLUMNS	10
+#define NCHARS		32
+
+	HeapTuple	tuple;
+	char	   *values[NCOLUMNS];
+	char		values_buf[NCOLUMNS][NCHARS];
+	int			i;
+	double		tuple_percent;
+	double		dead_tuple_percent;
+	double		free_percent;	/* free/reusable space in % */
+	double		scanned_percent;
+	TupleDesc	tupdesc;
+	AttInMetadata *attinmeta;
+
+	/* Build a tuple descriptor for our resu

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-03-03 Thread Amit Kapila
On Mon, Feb 23, 2015 at 7:11 AM, Tomas Vondra 
wrote:
> On 28.1.2015 05:03, Abhijit Menon-Sen wrote:
> > At 2015-01-27 17:00:27 -0600, jim.na...@bluetreble.com wrote:
> >>
> Otherwise, the code looks OK to me. Now, there are a few features I'd
> like to have for production use (to minimize the impact):
>
> 1) no index support :-(
>
>I'd like to see support for more relation types (at least btree
>indexes). Are there any plans for that? Do we have an idea on how to
>compute that?
>
> 2) sampling just a portion of the table
>
>For example, being able to sample just 5% of blocks, making it less
>obtrusive, especially on huge tables. Interestingly, there's a
>TABLESAMPLE patch in this CF, so maybe it's possible to reuse some
>of the methods (e.g. functions behind SYSTEM sampling)?
>
> 3) throttling
>
>Another feature minimizing impact of running this on production might
>be some sort of throttling, e.g. saying 'limit the scan to 4 MB/s'
>or something along those lines.
>

I think these features could be done separately if anybody is interested.
The patch in its proposed form seems useful to me.

> 4) prefetch
>
>fbstat_heap is using visibility map to skip fully-visible pages,
>which is nice, but if we skip too many pages it breaks readahead
>similarly to bitmap heap scan. I believe this is another place where
>effective_io_concurrency (i.e. prefetch) would be appropriate.
>

Good point.  We can even think of using the technique used by Vacuum
which is skip only when we can skip atleast SKIP_PAGES_THRESHOLD
pages.

With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-03-03 Thread Amit Kapila
On Fri, Dec 26, 2014 at 1:08 PM, Abhijit Menon-Sen 
wrote:
>
> At 2014-09-25 15:40:11 +0530, a...@2ndquadrant.com wrote:
> >
> > All right, then I'll post a version that addresses Amit's other
> > points, adds a new file/function to pgstattuple, acquires content
> > locks, and uses HeapTupleSatisfiesVacuum, hint-bit setting and all.
>
> Sorry, I forgot to post this patch. It does what I listed above, and
> seems to work fine (it's still quite a lot faster than pgstattuple
> in many cases).
>

I think one of the comment is not handled in latest patch.
5. Don't we need the handling for empty page (PageIsEmpty()) case?

I think we should have siimilar for PageIsEmpty()
as you have done for PageIsNew() in your patch.

+ stat.tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned,
+  stat.tuple_count);

Here for scanned tuples, you have used the live tuple counter
whereas I think we need to count HEAPTUPLE_INSERT_IN_PROGRESS
and HEAPTUPLE_DELETE_IN_PROGRESS and
HEAPTUPLE_RECENTLY_DEAD.

Refer the similar logic in Vacuum.

Although I am not in favor of using HeapTupleSatisfiesVacuum(), however
if you want to use it then lets be consistent with what Vacuum does
for estimation of tuples.


> A couple of remaining questions:
>
> 1. I didn't change the handling of LP_DEAD items, because the way it is
>now matches what pgstattuple counts. I'm open to changing it, though.
>Maybe it makes sense to count LP_DEAD items iff lp_len != 0? Or just
>leave it as it is? I think it doesn't matter much.
>
> 2. I changed the code to acquire the content locks on the buffer, as
>discussed, but it still uses HeapTupleSatisfiesVacuum. Amit suggested
>using HeapTupleSatisfiesVisibility, but it's not clear to me why that
>would be better. I welcome advice in this matter.
>

The reason to use HeapTupleSatisfiesVacuum() is that it helps us
in freezing and some other similar stuff which is required by
Vacuum.  Also it could be beneficial if we want take specific
action for various result codes of Vaccum. I think as this routine
mostly gives the estimated count, so it might not matter much even
if we use HeapTupleSatisfiesVacuum(), however it is better to be
consistent with pgstat_heap() in this case.
Do you have any reason for using HeapTupleSatisfiesVacuum() rather
than what is used in pgstat_heap()?

>(If anything, I should use HeapTupleIsSurelyDead, which doesn't set
>any hint bits, but which I earlier rejected as too conservative in
>its dead/not-dead decisions for this purpose.)
>
> (I've actually patched the pgstattuple.sgml docs as well, but I want to
> re-read that to make sure it's up to date, and didn't want to wait to
> post the code changes.)
>

Sure, post the same when it is ready.

With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-02-25 Thread Alvaro Herrera
Mark Kirkwood wrote:
> On 26/02/15 13:32, Simon Riggs wrote:
> >On 25 February 2015 at 23:30, Jim Nasby  wrote:
> >
> >>That said, I don't want to block this; I think it's useful. Though, perhaps
> >>it would be better as an extension instead of in contrib? I don't think it
> >>should be very version dependent?
> >
> >The whole point of this is to get it into contrib.
> >
> >It could have published it as an extension months ago.
> >
> 
> Is this intended to replace pg_freespacemap? Not trying to be overprotective
> or anything :-) but is this new extension/module better/faster/stronger/more
> accurate etc? If so then excellent! Ohth was wondering if people either a)
> didn't know about pg_freespacemap or b) consider that it is crap and won't
> use it.

It's a replacement for pgstattuple as far as I recall, not
pg_freespacemap.  The problem with pgstattuple is that it reads every
single page of the table; this fast tool skips reading pages that are
marked all-visible in the visibility map.  The disadvantage of
pg_freespacemap is that it doesn't have page data more recent than the
last vacuum, IIRC.

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-02-25 Thread Mark Kirkwood

On 26/02/15 13:32, Simon Riggs wrote:

On 25 February 2015 at 23:30, Jim Nasby  wrote:


That said, I don't want to block this; I think it's useful. Though, perhaps
it would be better as an extension instead of in contrib? I don't think it
should be very version dependent?


The whole point of this is to get it into contrib.

It could have published it as an extension months ago.



Is this intended to replace pg_freespacemap? Not trying to be 
overprotective or anything :-) but is this new extension/module 
better/faster/stronger/more accurate etc? If so then excellent! Ohth was 
wondering if people either a) didn't know about pg_freespacemap or b) 
consider that it is crap and won't use it.


Cheers

Mark


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-02-25 Thread Simon Riggs
On 25 February 2015 at 23:30, Jim Nasby  wrote:

> That said, I don't want to block this; I think it's useful. Though, perhaps
> it would be better as an extension instead of in contrib? I don't think it
> should be very version dependent?

The whole point of this is to get it into contrib.

It could have published it as an extension months ago.

-- 
 Simon Riggs   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, RemoteDBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-02-25 Thread Jim Nasby

On 2/25/15 2:56 PM, Tomas Vondra wrote:

On 24.2.2015 19:08, Jim Nasby wrote:

On 2/22/15 8:32 PM, Tomas Vondra wrote:

On 23.2.2015 03:20, Jim Nasby wrote:

On 2/22/15 5:41 PM, Tomas Vondra wrote:

Otherwise, the code looks OK to me. Now, there are a few features I'd
like to have for production use (to minimize the impact):

1) no index support:-(

  I'd like to see support for more relation types (at least btree
  indexes). Are there any plans for that? Do we have an idea on
how to
  compute that?


It'd be cleaner if had actual an actual am function for this, but see
below.


2) sampling just a portion of the table

  For example, being able to sample just 5% of blocks, making it
less
  obtrusive, especially on huge tables. Interestingly, there's a
  TABLESAMPLE patch in this CF, so maybe it's possible to reuse some
  of the methods (e.g. functions behind SYSTEM sampling)?

3) throttling

  Another feature minimizing impact of running this on production
might
  be some sort of throttling, e.g. saying 'limit the scan to 4 MB/s'
  or something along those lines.

4) prefetch

  fbstat_heap is using visibility map to skip fully-visible pages,
  which is nice, but if we skip too many pages it breaks readahead
  similarly to bitmap heap scan. I believe this is another place
where
  effective_io_concurrency (i.e. prefetch) would be appropriate.


All of those wishes are solved in one way or another by vacuum and/or
analyze. If we had a hook in the tuple scanning loop and at the end of
vacuum you could just piggy-back on it. But really all we'd need for
vacuum to be able to report this info is one more field in LVRelStats, a
call to GetRecordedFreeSpace for all-visible pages, and some logic to
deal with pages skipped because we couldn't get the vacuum lock.

Should we just add this to vacuum instead?


Possibly. I think the ultimate goal is to be able to get this info
easily and without disrupting the system performance too much (which is
difficult without sampling/throttling). If we can stuff that into
autovacuum reasonably, and then get the info from catalogs, I'm OK with
that.


Doing the counting in vacuum/analyze (auto or not) is quite easy, and it
would happen at the same time we're doing useful work. We would
automatically get the benefit of the throttling and sampling work that
those routines already do.


However I'm not sure putting this into autovacuum is actually possible,
because how do you merge data from multiple partial runs (when each of
them skipped different pages)?


ISTM that's just a form of sampling, no?


Maybe.

I was thinking about collecting the necessary info during the VACUUM
phase, and somehow keeping track of free space in the whole table. I
thought there would be trouble exactly because this phase only processes
pages that possibly need vacuuming (so it wouldn't be a truly random
sample, making the estimation tricky).


Well, all-visible pages can still be listed in the FSM, so we'd have at 
least that info. I don't think there can be dead tuples on an 
all-visible page; if so, that means free space is all we care about. So 
when pulling bloat info we'd just have to scan the VSM and FSM; 
presumably that's pretty cheap.



But maybe that's not really true and it is possible to do that somehow.
For example what if we kept track of how much space each VACUUM freed,
and keeping running sum?


Well, then we'd also have to keep track of space we used. I don't think 
we want to do that.


On a completely unrelated note, the idea of logging details about vacuum 
runs in a table is appealing. Forcing users to something like pgBadger 
for that data seems pretty silly to me. But that's obviously a 
completely separate discussion from this one.



It might also be done during the ANALYZE, but that seems a bit
complicated because that's based on a sample of rows, not pages.


Right, but you've got the page anyway so I doubt it'd cost much extra to 
measure the bloat on it. Might want to guard against counting a page 
twice, but...



Also,
the autovacuum_analyze_factor is 0.2 by default, so could end up with up
to 20% bloat without knowing it (vacuum_factor=0.1 is not great either,
but it's better).


... I don't think it's practical to get this bloat measurement terribly 
precise, no do I think we need to. Anyone that needs better than 20% 
accuracy can probably afford to fire off a manual vacuum at the same 
time (or run a scan-only version).



Besides, we don't need the same lock for figuring out bloat. We
could still measure bloat even if we can't vacuum the page, but I
think that's overkill. If we're skipping enough pages to mess with
the bloat measurement then we most likely need to teach vacuum how to
revisit pages.


Also, autovacuum is not the only place
where we free space - we'd have to handle HOT for example, I guess.


I wasn't thinking about trying to keep live bloat statistics, so HOT
wouldn't affect this.


I'm afraid

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-02-25 Thread Tomas Vondra
On 24.2.2015 19:08, Jim Nasby wrote:
> On 2/22/15 8:32 PM, Tomas Vondra wrote:
>> On 23.2.2015 03:20, Jim Nasby wrote:
>>> On 2/22/15 5:41 PM, Tomas Vondra wrote:
 Otherwise, the code looks OK to me. Now, there are a few features I'd
 like to have for production use (to minimize the impact):

 1) no index support:-(

  I'd like to see support for more relation types (at least btree
  indexes). Are there any plans for that? Do we have an idea on
 how to
  compute that?
>>>
>>> It'd be cleaner if had actual an actual am function for this, but see
>>> below.
>>>
 2) sampling just a portion of the table

  For example, being able to sample just 5% of blocks, making it
 less
  obtrusive, especially on huge tables. Interestingly, there's a
  TABLESAMPLE patch in this CF, so maybe it's possible to reuse some
  of the methods (e.g. functions behind SYSTEM sampling)?

 3) throttling

  Another feature minimizing impact of running this on production
 might
  be some sort of throttling, e.g. saying 'limit the scan to 4 MB/s'
  or something along those lines.

 4) prefetch

  fbstat_heap is using visibility map to skip fully-visible pages,
  which is nice, but if we skip too many pages it breaks readahead
  similarly to bitmap heap scan. I believe this is another place
 where
  effective_io_concurrency (i.e. prefetch) would be appropriate.
>>>
>>> All of those wishes are solved in one way or another by vacuum and/or
>>> analyze. If we had a hook in the tuple scanning loop and at the end of
>>> vacuum you could just piggy-back on it. But really all we'd need for
>>> vacuum to be able to report this info is one more field in LVRelStats, a
>>> call to GetRecordedFreeSpace for all-visible pages, and some logic to
>>> deal with pages skipped because we couldn't get the vacuum lock.
>>>
>>> Should we just add this to vacuum instead?
>>
>> Possibly. I think the ultimate goal is to be able to get this info
>> easily and without disrupting the system performance too much (which is
>> difficult without sampling/throttling). If we can stuff that into
>> autovacuum reasonably, and then get the info from catalogs, I'm OK with
>> that.
> 
> Doing the counting in vacuum/analyze (auto or not) is quite easy, and it
> would happen at the same time we're doing useful work. We would
> automatically get the benefit of the throttling and sampling work that
> those routines already do.
> 
>> However I'm not sure putting this into autovacuum is actually possible,
>> because how do you merge data from multiple partial runs (when each of
>> them skipped different pages)?
> 
> ISTM that's just a form of sampling, no?

Maybe.

I was thinking about collecting the necessary info during the VACUUM
phase, and somehow keeping track of free space in the whole table. I
thought there would be trouble exactly because this phase only processes
pages that possibly need vacuuming (so it wouldn't be a truly random
sample, making the estimation tricky).

But maybe that's not really true and it is possible to do that somehow.
For example what if we kept track of how much space each VACUUM freed,
and keeping running sum?

It might also be done during the ANALYZE, but that seems a bit
complicated because that's based on a sample of rows, not pages. Also,
the autovacuum_analyze_factor is 0.2 by default, so could end up with up
to 20% bloat without knowing it (vacuum_factor=0.1 is not great either,
but it's better).


> Besides, we don't need the same lock for figuring out bloat. We
> could still measure bloat even if we can't vacuum the page, but I
> think that's overkill. If we're skipping enough pages to mess with
> the bloat measurement then we most likely need to teach vacuum how to
> revisit pages.
> 
>> Also, autovacuum is not the only place
>> where we free space - we'd have to handle HOT for example, I guess.
> 
> I wasn't thinking about trying to keep live bloat statistics, so HOT
> wouldn't affect this.

I'm afraid this might cause the estimate to drift away over time, but I
guess it depends on implementation - e.g. if doing this in ANALYZE, it'd
be mostly immune to this, while with collecting incremental info during
VACUUM it might be a problem I guess.


Anyway, we don't have a patch trying to do that (certainly not in this
CF). I think it makes sense to add fastbloat() to pageinspect. Maybe
we'll get autovacuum-based solution in the future, but we don't have
that right now.

Actually, wouldn't that be a nice GSoC project? The scope seems about
right, not touching too many parts of the code base, etc.


-- 
Tomas Vondrahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-02-24 Thread Jim Nasby

On 2/22/15 8:32 PM, Tomas Vondra wrote:

On 23.2.2015 03:20, Jim Nasby wrote:

On 2/22/15 5:41 PM, Tomas Vondra wrote:

Otherwise, the code looks OK to me. Now, there are a few features I'd
like to have for production use (to minimize the impact):

1) no index support:-(

 I'd like to see support for more relation types (at least btree
 indexes). Are there any plans for that? Do we have an idea on how to
 compute that?


It'd be cleaner if had actual an actual am function for this, but see
below.


2) sampling just a portion of the table

 For example, being able to sample just 5% of blocks, making it less
 obtrusive, especially on huge tables. Interestingly, there's a
 TABLESAMPLE patch in this CF, so maybe it's possible to reuse some
 of the methods (e.g. functions behind SYSTEM sampling)?

3) throttling

 Another feature minimizing impact of running this on production might
 be some sort of throttling, e.g. saying 'limit the scan to 4 MB/s'
 or something along those lines.

4) prefetch

 fbstat_heap is using visibility map to skip fully-visible pages,
 which is nice, but if we skip too many pages it breaks readahead
 similarly to bitmap heap scan. I believe this is another place where
 effective_io_concurrency (i.e. prefetch) would be appropriate.


All of those wishes are solved in one way or another by vacuum and/or
analyze. If we had a hook in the tuple scanning loop and at the end of
vacuum you could just piggy-back on it. But really all we'd need for
vacuum to be able to report this info is one more field in LVRelStats, a
call to GetRecordedFreeSpace for all-visible pages, and some logic to
deal with pages skipped because we couldn't get the vacuum lock.

Should we just add this to vacuum instead?


Possibly. I think the ultimate goal is to be able to get this info
easily and without disrupting the system performance too much (which is
difficult without sampling/throttling). If we can stuff that into
autovacuum reasonably, and then get the info from catalogs, I'm OK with
that.


Doing the counting in vacuum/analyze (auto or not) is quite easy, and it 
would happen at the same time we're doing useful work. We would 
automatically get the benefit of the throttling and sampling work that 
those routines already do.



However I'm not sure putting this into autovacuum is actually possible,
because how do you merge data from multiple partial runs (when each of
them skipped different pages)?


ISTM that's just a form of sampling, no?

Besides, we don't need the same lock for figuring out bloat. We could 
still measure bloat even if we can't vacuum the page, but I think that's 
overkill. If we're skipping enough pages to mess with the bloat 
measurement then we most likely need to teach vacuum how to revisit pages.



Also, autovacuum is not the only place
where we free space - we'd have to handle HOT for example, I guess.


I wasn't thinking about trying to keep live bloat statistics, so HOT 
wouldn't affect this.

--
Jim Nasby, Data Architect, Blue Treble Consulting
Data in Trouble? Get it in Treble! http://BlueTreble.com


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-02-22 Thread Tomas Vondra
On 23.2.2015 03:20, Jim Nasby wrote:
> On 2/22/15 5:41 PM, Tomas Vondra wrote:
>> Otherwise, the code looks OK to me. Now, there are a few features I'd
>> like to have for production use (to minimize the impact):
>>
>> 1) no index support:-(
>>
>> I'd like to see support for more relation types (at least btree
>> indexes). Are there any plans for that? Do we have an idea on how to
>> compute that?
> 
> It'd be cleaner if had actual an actual am function for this, but see
> below.
> 
>> 2) sampling just a portion of the table
>>
>> For example, being able to sample just 5% of blocks, making it less
>> obtrusive, especially on huge tables. Interestingly, there's a
>> TABLESAMPLE patch in this CF, so maybe it's possible to reuse some
>> of the methods (e.g. functions behind SYSTEM sampling)?
>>
>> 3) throttling
>>
>> Another feature minimizing impact of running this on production might
>> be some sort of throttling, e.g. saying 'limit the scan to 4 MB/s'
>> or something along those lines.
>>
>> 4) prefetch
>>
>> fbstat_heap is using visibility map to skip fully-visible pages,
>> which is nice, but if we skip too many pages it breaks readahead
>> similarly to bitmap heap scan. I believe this is another place where
>> effective_io_concurrency (i.e. prefetch) would be appropriate.
> 
> All of those wishes are solved in one way or another by vacuum and/or
> analyze. If we had a hook in the tuple scanning loop and at the end of
> vacuum you could just piggy-back on it. But really all we'd need for
> vacuum to be able to report this info is one more field in LVRelStats, a
> call to GetRecordedFreeSpace for all-visible pages, and some logic to
> deal with pages skipped because we couldn't get the vacuum lock.
> 
> Should we just add this to vacuum instead?

Possibly. I think the ultimate goal is to be able to get this info
easily and without disrupting the system performance too much (which is
difficult without sampling/throttling). If we can stuff that into
autovacuum reasonably, and then get the info from catalogs, I'm OK with
that.

However I'm not sure putting this into autovacuum is actually possible,
because how do you merge data from multiple partial runs (when each of
them skipped different pages)? Also, autovacuum is not the only place
where we free space - we'd have to handle HOT for example, I guess.


-- 
Tomas Vondrahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-02-22 Thread Jim Nasby

On 2/22/15 5:41 PM, Tomas Vondra wrote:

Otherwise, the code looks OK to me. Now, there are a few features I'd
like to have for production use (to minimize the impact):

1) no index support:-(

I'd like to see support for more relation types (at least btree
indexes). Are there any plans for that? Do we have an idea on how to
compute that?


It'd be cleaner if had actual an actual am function for this, but see below.


2) sampling just a portion of the table

For example, being able to sample just 5% of blocks, making it less
obtrusive, especially on huge tables. Interestingly, there's a
TABLESAMPLE patch in this CF, so maybe it's possible to reuse some
of the methods (e.g. functions behind SYSTEM sampling)?

3) throttling

Another feature minimizing impact of running this on production might
be some sort of throttling, e.g. saying 'limit the scan to 4 MB/s'
or something along those lines.

4) prefetch

fbstat_heap is using visibility map to skip fully-visible pages,
which is nice, but if we skip too many pages it breaks readahead
similarly to bitmap heap scan. I believe this is another place where
effective_io_concurrency (i.e. prefetch) would be appropriate.


All of those wishes are solved in one way or another by vacuum and/or 
analyze. If we had a hook in the tuple scanning loop and at the end of 
vacuum you could just piggy-back on it. But really all we'd need for 
vacuum to be able to report this info is one more field in LVRelStats, a 
call to GetRecordedFreeSpace for all-visible pages, and some logic to 
deal with pages skipped because we couldn't get the vacuum lock.


Should we just add this to vacuum instead?
--
Jim Nasby, Data Architect, Blue Treble Consulting
Data in Trouble? Get it in Treble! http://BlueTreble.com


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-02-22 Thread Tomas Vondra
Hi!

On 28.1.2015 05:03, Abhijit Menon-Sen wrote:
> At 2015-01-27 17:00:27 -0600, jim.na...@bluetreble.com wrote:
>>
>> It would be best to get this into a commit fest so it's not lost.
> 
> It's there already.
> 
> -- Abhijit
> 
> 

I looked at this patch today, so a few comments from me:

1) I believe the patch is slightly broken, because the version was
   bumped to 1.3 but only partially, so I get this on "make install"

$ make -j9 -s install
/usr/bin/install: cannot stat ‘./pgstattuple--1.3.sql’: No such
file or directory
../../src/makefiles/pgxs.mk:129: recipe for target 'install' failed
make[1]: *** [install] Error 1
Makefile:85: recipe for target 'install-pgstattuple-recurse' failed
make: *** [install-pgstattuple-recurse] Error 2
make: *** Waiting for unfinished jobs

   The problem seems to be that Makefile already lists --1.3.sql in the
   DATA section, but the file was not renamed (there's just --1.2.sql).

   After fixing that, it's also necessary to fix the version in the
   control file, otherwise the CREATE EXTENSION fails.

  default_version = '1.2' -> '1.3'

   At least that fixed the install for me.


2) Returning Datum from fbstat_heap, which is a static function seems a
   bit strange to me, I'd use the HeapTuple directly, but meh ...


3) The way the tuple is built by first turning the values into strings
   and then turned back into Datums by calling BuildTupleFromCStrings
   is more serious I guess. Why not to just keep the Datums and call
   heap_form_tuple directly?

   I see the other functions in pgstattuple.c do use the string way, so
   maybe there's a reason for that? Or is this just to keep the code
   consistent in the extension?

4) I really doubt 'fastbloat' is a good name. I propose 'pgstatbloat'
   to make that consistent with the rest of pgstattuple functions. Or
   something similar, but 'fastbloat' is just plain confusing.


Otherwise, the code looks OK to me. Now, there are a few features I'd
like to have for production use (to minimize the impact):

1) no index support :-(

   I'd like to see support for more relation types (at least btree
   indexes). Are there any plans for that? Do we have an idea on how to
   compute that?

2) sampling just a portion of the table

   For example, being able to sample just 5% of blocks, making it less
   obtrusive, especially on huge tables. Interestingly, there's a
   TABLESAMPLE patch in this CF, so maybe it's possible to reuse some
   of the methods (e.g. functions behind SYSTEM sampling)?

3) throttling

   Another feature minimizing impact of running this on production might
   be some sort of throttling, e.g. saying 'limit the scan to 4 MB/s'
   or something along those lines.

4) prefetch

   fbstat_heap is using visibility map to skip fully-visible pages,
   which is nice, but if we skip too many pages it breaks readahead
   similarly to bitmap heap scan. I believe this is another place where
   effective_io_concurrency (i.e. prefetch) would be appropriate.

regards

-- 
Tomas Vondrahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-01-27 Thread Abhijit Menon-Sen
At 2015-01-27 17:00:27 -0600, jim.na...@bluetreble.com wrote:
>
> It would be best to get this into a commit fest so it's not lost.

It's there already.

-- Abhijit


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-01-27 Thread Jim Nasby

On 1/27/15 3:56 AM, Abhijit Menon-Sen wrote:

At 2015-01-26 18:47:29 -0600, jim.na...@bluetreble.com wrote:


Anything happen with this?


Nothing so far.


It would be best to get this into a commit fest so it's not lost.
--
Jim Nasby, Data Architect, Blue Treble Consulting
Data in Trouble? Get it in Treble! http://BlueTreble.com


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-01-27 Thread Abhijit Menon-Sen
At 2015-01-26 18:47:29 -0600, jim.na...@bluetreble.com wrote:
>
> Anything happen with this?

Nothing so far.

-- Abhijit


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2015-01-26 Thread Jim Nasby

On 12/26/14 1:38 AM, Abhijit Menon-Sen wrote:

At 2014-09-25 15:40:11 +0530,a...@2ndquadrant.com  wrote:

>
>All right, then I'll post a version that addresses Amit's other
>points, adds a new file/function to pgstattuple, acquires content
>locks, and uses HeapTupleSatisfiesVacuum, hint-bit setting and all.

Sorry, I forgot to post this patch. It does what I listed above, and
seems to work fine (it's still quite a lot faster than pgstattuple
in many cases).


Anything happen with this?
--
Jim Nasby, Data Architect, Blue Treble Consulting
Data in Trouble? Get it in Treble! http://BlueTreble.com


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-12-25 Thread Abhijit Menon-Sen
At 2014-09-25 15:40:11 +0530, a...@2ndquadrant.com wrote:
>
> All right, then I'll post a version that addresses Amit's other
> points, adds a new file/function to pgstattuple, acquires content
> locks, and uses HeapTupleSatisfiesVacuum, hint-bit setting and all.

Sorry, I forgot to post this patch. It does what I listed above, and
seems to work fine (it's still quite a lot faster than pgstattuple
in many cases).

A couple of remaining questions:

1. I didn't change the handling of LP_DEAD items, because the way it is
   now matches what pgstattuple counts. I'm open to changing it, though.
   Maybe it makes sense to count LP_DEAD items iff lp_len != 0? Or just
   leave it as it is? I think it doesn't matter much.

2. I changed the code to acquire the content locks on the buffer, as
   discussed, but it still uses HeapTupleSatisfiesVacuum. Amit suggested
   using HeapTupleSatisfiesVisibility, but it's not clear to me why that
   would be better. I welcome advice in this matter.

   (If anything, I should use HeapTupleIsSurelyDead, which doesn't set
   any hint bits, but which I earlier rejected as too conservative in
   its dead/not-dead decisions for this purpose.)

(I've actually patched the pgstattuple.sgml docs as well, but I want to
re-read that to make sure it's up to date, and didn't want to wait to
post the code changes.)

I also didn't change the name. I figure it's easy enough to change it
everywhere once all the remaining pieces are in place.

Comments welcome.

-- Abhijit
diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 862585c..bae80c9 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -1,10 +1,10 @@
 # contrib/pgstattuple/Makefile
 
 MODULE_big	= pgstattuple
-OBJS		= pgstattuple.o pgstatindex.o $(WIN32RES)
+OBJS		= pgstattuple.o pgstatindex.o fastbloat.o $(WIN32RES)
 
 EXTENSION = pgstattuple
-DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
 PGFILEDESC = "pgstattuple - tuple-level statistics"
 
 REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/fastbloat.c b/contrib/pgstattuple/fastbloat.c
new file mode 100644
index 000..b33db14
--- /dev/null
+++ b/contrib/pgstattuple/fastbloat.c
@@ -0,0 +1,367 @@
+/*
+ * contrib/pgstattuple/fastbloat.c
+ *
+ * Abhijit Menon-Sen 
+ * Portions Copyright (c) 2001,2002	Tatsuo Ishii (from pg_stattuple)
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose, without fee, and without a
+ * written agreement is hereby granted, provided that the above
+ * copyright notice and this paragraph and the following two
+ * paragraphs appear in all copies.
+ *
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
+ * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
+ * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
+ * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
+ * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "postgres.h"
+
+#include "access/visibilitymap.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/multixact.h"
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/procarray.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/tqual.h"
+#include "commands/vacuum.h"
+
+PG_FUNCTION_INFO_V1(fastbloat);
+PG_FUNCTION_INFO_V1(fastbloatbyid);
+
+/*
+ * tuple_percent, dead_tuple_percent and free_percent are computable,
+ * so not defined here.
+ */
+typedef struct fastbloat_output_type
+{
+	uint64		table_len;
+	uint64		tuple_count;
+	uint64		tuple_len;
+	uint64		dead_tuple_count;
+	uint64		dead_tuple_len;
+	uint64		free_space;
+	uint64		total_pages;
+	uint64		scanned_pages;
+} fastbloat_output_type;
+
+static Datum build_output_type(fastbloat_output_type *stat,
+			   FunctionCallInfo fcinfo);
+static Datum fbstat_relation(Relation rel, FunctionCallInfo fcinfo);
+static Datum fbstat_heap(Relation rel, FunctionCallInfo fcinfo);
+
+/*
+ * build a fastbloat_output_type tuple
+ */
+static Datum
+build_output_type(fastbloat_output_type *stat, FunctionCallInfo fcinfo)
+{
+#define NCOLUMNS	10
+#define NCHARS		32
+
+	HeapTuple	tuple;
+	char	   *values[NCOLUMNS];
+	char		values_buf[NCOLUMNS][NCHARS];
+	int			i;
+	double		tuple_percent;
+	double		dead_tupl

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-09-26 Thread Amit Kapila
On Wed, Sep 24, 2014 at 2:26 PM, Abhijit Menon-Sen 
wrote:
>
> Hi Amit.
>
> Thanks for your comments, and I'm sorry it's taken me so long to
> respond.

No issues.

> At 2014-08-03 11:18:57 +0530, amit.kapil...@gmail.com wrote:

> > 7.
> > HeapTupleSatisfiesVacuumNoHint()
> > a. Why can't we use HeapTupleSatisfiesVisibility() with dirty snapshot
> > as we use for pgstattuple?
>
> Heavier locking. I tried to make do with the existing HTS* functions,
> but fastbloat was noticeably faster in tests with the current setup.

I am not sure for the purpose of this functionality, why we need to
use a different HTS (HeapTupleSatisfiesVacuum) routine as compare
to pgstat_heap().  Unless you have some specific purpose to achieve,
I think it is better to use HeapTupleSatisfiesVisibility().

> Maybe I'll call it not_too_slow_bloat().

How about pgfaststattuple() or pgquickstattuple() or pgfuzzystattuple()?

With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-09-25 Thread Andres Freund
On 2014-09-25 14:43:14 +0100, Simon Riggs wrote:
> On 25 September 2014 10:41, Andres Freund  wrote:
> > On 2014-09-25 10:24:39 +0530, Abhijit Menon-Sen wrote:
> >> At 2014-09-24 11:09:24 +0200, and...@2ndquadrant.com wrote:
> >> > I think it's completely unacceptable to copy a visibility routine.
> >>
> >> OK. Which visibility routine should I use, and should I try to create a
> >> variant that doesn't set hint bits?
> >
> > I've not yet followed your premise that you actually need one that
> > doesn't set hint bits...
> 
> Not least because I'm trying to solve a similar problem on another
> thread, so no need to make a special case here.

That's mostly unrelated though - Abhijit wants to avoid them because he
tried to avoid having *any* form of lock on the buffer. That's the
reason he tried avoid hint bit setting. Since I don't believe that's
safe (at least there's by far not enough evidence about it), there's
simply no reason to avoid it.

Greetings,

Andres Freund

-- 
 Andres Freund http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-09-25 Thread Simon Riggs
On 25 September 2014 10:41, Andres Freund  wrote:
> On 2014-09-25 10:24:39 +0530, Abhijit Menon-Sen wrote:
>> At 2014-09-24 11:09:24 +0200, and...@2ndquadrant.com wrote:
>> > I think it's completely unacceptable to copy a visibility routine.
>>
>> OK. Which visibility routine should I use, and should I try to create a
>> variant that doesn't set hint bits?
>
> I've not yet followed your premise that you actually need one that
> doesn't set hint bits...

Not least because I'm trying to solve a similar problem on another
thread, so no need to make a special case here.

-- 
 Simon Riggs   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-09-25 Thread Abhijit Menon-Sen
At 2014-09-25 11:41:29 +0200, and...@2ndquadrant.com wrote:
>
> I've not yet followed your premise that you actually need one that
> doesn't set hint bits...

Oh.

All right, then I'll post a version that addresses Amit's other points,
adds a new file/function to pgstattuple, acquires content locks, and
uses HeapTupleSatisfiesVacuum, hint-bit setting and all. Maybe I'll
call it not_too_slow_bloat().

Thank you for the feedback.

-- Abhijit


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-09-25 Thread Andres Freund
On 2014-09-25 10:24:39 +0530, Abhijit Menon-Sen wrote:
> At 2014-09-24 11:09:24 +0200, and...@2ndquadrant.com wrote:
> > I think it's completely unacceptable to copy a visibility routine.
> 
> OK. Which visibility routine should I use, and should I try to create a
> variant that doesn't set hint bits?

I've not yet followed your premise that you actually need one that
doesn't set hint bits...

> I don't have any reasoning for why it's safe to not hold a content lock.
> If there is one, I need help to find it. If not, I'll acquire a content
> lock. (If anyone can explain why it isn't safe, I would appreciate it.)

I don't see why it'd be safe. Without the content lock you can come
across half written tuple headers and similar things. You can try to
argue that all the possible faults of that are harmless, but I think
that'd require a tremendous amount of code review and it'd be hard to
continue to guarantee. There's a reason we acquire locks, you know :)

I think it's saner to first get this working & committed properly *with*
the lock. If you afterwards have energy to improve it further we can
another look.

Greetings,

Andres Freund

-- 
 Andres Freund http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-09-24 Thread Abhijit Menon-Sen
At 2014-09-24 11:09:24 +0200, and...@2ndquadrant.com wrote:
>
> Why not add it to the stattuple extension, but as an independent
> function (and file)?

Thanks, that's a good idea. I'll do that.

> I think it's completely unacceptable to copy a visibility routine.

OK. Which visibility routine should I use, and should I try to create a
variant that doesn't set hint bits?

I don't have any reasoning for why it's safe to not hold a content lock.
If there is one, I need help to find it. If not, I'll acquire a content
lock. (If anyone can explain why it isn't safe, I would appreciate it.)

-- Abhijit


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-09-24 Thread Andres Freund
Hi,

On 2014-09-24 14:26:37 +0530, Abhijit Menon-Sen wrote:
> Thanks for your comments, and I'm sorry it's taken me so long to
> respond.
> 
> At 2014-08-03 11:18:57 +0530, amit.kapil...@gmail.com wrote:
> >
> > After looking at code, I also felt that it is better to add this as a
> > version of pg_stattuple.
> 
> I started off trying to do that, but now I'm afraid I strongly disagree.
> First, pg_stattuple works on relations and indexes, whereas fastbloat
> only works on relations (because of the VM/FSM use). Second, the code
> began to look hideous when mushed together, with too many ifs to avoid
> locking I didn't need and so on. The logic is just too different.

Why not add it to the stattuple extension, but as an independent
function (and file)? I don't really see a need for a completely new
extension, but a separate extension seems wasteful.

> > 7.
> > HeapTupleSatisfiesVacuumNoHint()
> > a. Why can't we use HeapTupleSatisfiesVisibility() with dirty snapshot
> > as we use for pgstattuple?

I think it's completely unacceptable to copy a visibility routine. And I
don't believe for a second that avoiding hint bit setting makes it legal
to not acquire a content lock for this. What's your reasoning for that
being safe? If you argue that all possible corruption has limited impact
you need to do that *much* more explicitly and verbosely.

Greetings,

Andres Freund


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-09-24 Thread Abhijit Menon-Sen
Hi Amit.

Thanks for your comments, and I'm sorry it's taken me so long to
respond.

At 2014-08-03 11:18:57 +0530, amit.kapil...@gmail.com wrote:
>
> After looking at code, I also felt that it is better to add this as a
> version of pg_stattuple.

I started off trying to do that, but now I'm afraid I strongly disagree.
First, pg_stattuple works on relations and indexes, whereas fastbloat
only works on relations (because of the VM/FSM use). Second, the code
began to look hideous when mushed together, with too many ifs to avoid
locking I didn't need and so on. The logic is just too different.

> Is this assumption based on the reason that if the visibility map bit
> of page is set, then there is high chance that vacuum would have
> pruned the dead tuples and updated FSM with freespace?

Right. I'm not convinced an explanation of the VM/FSM belongs in the
fastbloat documentation, but I'll see if I can make it clearer.

> 1. compilation errors […]
> I think this is not a proper multi-line comment. […]
> It is better to have CHECK_FOR_INTERRUPTS() in above loop. […]
> Incase of new page don't you need to account for freespace. […]
> 5. Don't we need the handling for empty page (PageIsEmpty()) case?

Thanks, have fixed, will push updates soon.

> What is the reason for not counting ItemIdIsDead() case for
> accounting of dead tuples.

Will reconsider and fix.

> 7.
> HeapTupleSatisfiesVacuumNoHint()
> a. Why can't we use HeapTupleSatisfiesVisibility() with dirty snapshot
> as we use for pgstattuple?

Heavier locking. I tried to make do with the existing HTS* functions,
but fastbloat was noticeably faster in tests with the current setup.

> b. If we want to use HeapTupleSatisfiesVacuum(), why can't we add one
> parameter to function which can avoid setting of hintbit.

This is certainly a possibility, and as you say it would be better in
terms of maintenance. I didn't do it that way because I wanted to keep
the extension self-contained rather than make it depend on core changes.
If there's consensus on adding a nohint parameter, sure, I can do that.
(But the fastbloat won't work on current versions, which would suck.)

In the meantime, I'll merge the later updates from HTSVacuum into my
code. Thanks for the heads-up.

> function header of vac_estimate_reltuples() suggests that it is
> used by VACUUM and ANALYZE, I think it will be better to update
> the comment w.r.t new usage as well.

OK.

> 10. I think it should generate resource file as is done for other
> modules if you want to keep it as a separate module.

Thanks, will do.

> Do you really need to specify examples in README.

I don't *need* to, but I like it.

-- Abhijit


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-08-02 Thread Amit Kapila
On Thu, Apr 3, 2014 at 3:11 AM, Abhijit Menon-Sen 
wrote:
>
> This is a follow-up to the thread at
> http://www.postgresql.org/message-id/4eb5fa1b.1090...@2ndquadrant.com
>
> A quick summary: that thread proposed adding a relation_free_space()
> function to the pageinspect extension.

> Various review comments were
> received, among which was the suggestion that the code belonged in
> pg_stattuple as a faster way to calculate free_percent.

You haven't mentioned why you didn't follow that way.  After looking
at code, I also felt that it is better to add this as a version of
pg_stattuple.

> ===
>
> I've attached an extension that produces largely pgstattuple-compatible
> numbers for a table without doing a full-table scan.
>
> It scans through the table, skipping blocks that have their visibility
> map bit set. For such pages, it gets the free space from the free space
> map, and assumes that all remaining space on the page is taken by live
> tuples. It scans other pages tuple-by-tuple and counts live and dead
> tuples and free space.

Is this assumption based on the reason that if the visibility map bit of
page is set, then there is high chance that vacuum would have pruned
the dead tuples and updated FSM with freespace?
In anycase, I think it will be better if you update README and or
code comments to mention the reason of such an assumption.


1. compilation errors
1>contrib\fastbloat\fastbloat.c(450): error C2198: 'MultiXactIdIsRunning' :
too few arguments for call
1>contrib\fastbloat\fastbloat.c(467): error C2198: 'MultiXactIdIsRunning' :
too few arguments for call

Recent commit 05315 added new parameter to this API, so this usage
of API needs to be updated accordingly.

2.
/* Returns a tuple with live/dead tuple statistics for the named table.
 */

I think this is not a proper multi-line comment.

3.
fbstat_heap()
{
..
for (blkno = 0; blkno < nblocks; blkno++)
{
..
}

It is better to have CHECK_FOR_INTERRUPTS() in above loop.

4.
if (PageIsNew(page))
{
ReleaseBuffer(buf);
continue;
}

Incase of new page don't you need to account for freespace.
Why can't we safely assume that all the space in page is
free and add it to freespace.

5. Don't we need the handling for empty page (PageIsEmpty()) case?

6.
ItemIdIsDead(itemid))
{
continue;
}

What is the reason for not counting ItemIdIsDead() case for
accounting of dead tuples.

I think for Vaccum, it is okay to skip that case because same
is counted via heap_page_prune() call.

7.
HeapTupleSatisfiesVacuumNoHint()
a. Why can't we use HeapTupleSatisfiesVisibility() with dirty snapshot
as we use for pgstattuple?  I think it will have the advantage of
keeping the code for fastbloat similar to pgstattuple.
b. If we want to use HeapTupleSatisfiesVacuum(), why can't we
add one parameter to function which can avoid setting of hintbit.
Are you worried about the performance impact it might cause in
other flows, if yes then which flow your are mainly worried.
c. I agree that there is advantage of avoiding hintbit code, but as
this operation will not be frequent, so not sure if its good idea
to add a separate version of tuplevisibility function

In general, I think the main advantage of this patch comes from the
fact that it can skip scanning pages based on visibilitymap, so
it seems to me that it is better if we keep other part of code similar
to pgstattuple.


8.
HeapTupleSatisfiesVacuumNoHint(HeapTuple htup, TransactionId OldestXmin)

There is some new code added in corresponding function
HeapTupleSatisfiesVacuum(), so if we want to use it, then
update the changes in this function as well and I think it
is better to move this into tqual.c along with other similar
functions.

9.
* This routine is shared by VACUUM and ANALYZE.
 */
double
vac_estimate_reltuples(Relation relation, bool is_analyze,
   BlockNumber total_pages,
   BlockNumber scanned_pages,
   double scanned_tuples)

function header of vac_estimate_reltuples() suggests that it is
used by VACUUM and ANALYZE, I think it will be better to update
the comment w.r.t new usage as well.

10. I think it should generate resource file as is done for other modules
if you want to keep it as a separate module.
Example:
MODULE_big = btree_gin
OBJS = btree_gin.o $(WIN32RES)

11.
README.txt
> Here is an example comparing the output of fastbloat and pgstattuple for
the same table:

Do you really need to specify examples in README.  I think it would be
better to specify examples in documentation (.sgml).



With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-04-02 Thread Robert Haas
On Wed, Apr 2, 2014 at 5:41 PM, Abhijit Menon-Sen  wrote:
> I've attached an extension that produces largely pgstattuple-compatible
> numbers for a table without doing a full-table scan.
>
> It scans through the table, skipping blocks that have their visibility
> map bit set. For such pages, it gets the free space from the free space
> map, and assumes that all remaining space on the page is taken by live
> tuples. It scans other pages tuple-by-tuple and counts live and dead
> tuples and free space.

That's clever.  I think it might underestimate free space relative to
tuples because the free space map isn't guaranteed to be completely
correct.  But I guess you knew that already...

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

2014-04-02 Thread Abhijit Menon-Sen
This is a follow-up to the thread at
http://www.postgresql.org/message-id/4eb5fa1b.1090...@2ndquadrant.com

A quick summary: that thread proposed adding a relation_free_space()
function to the pageinspect extension. Various review comments were
received, among which was the suggestion that the code belonged in
pg_stattuple as a faster way to calculate free_percent.

===

I've attached an extension that produces largely pgstattuple-compatible
numbers for a table without doing a full-table scan.

It scans through the table, skipping blocks that have their visibility
map bit set. For such pages, it gets the free space from the free space
map, and assumes that all remaining space on the page is taken by live
tuples. It scans other pages tuple-by-tuple and counts live and dead
tuples and free space.

Here's a comparison of fastbloat and pgstattuple output on a 50-million
row table with some holes created with a single big DELETE statement:

ams=# select * from fastbloat('x');
 table_len  | scanned_percent | approx_tuple_count | approx_tuple_len | 
approx_tuple_percent | dead_tuple_count | dead_tuple_len | dead_tuple_percent | 
free_space | free_percent 
+-++--+--+--++++--
 6714761216 |  17 |   41318301 |   5483815648 | 
   81.67 |  8681708 | 258624 |  16.55 |   
80972912 | 1.21
(1 row)

Time: 639.455 ms

ams=# select * from pgstattuple('x');
 table_len  | tuple_count | tuple_len  | tuple_percent | dead_tuple_count | 
dead_tuple_len | dead_tuple_percent | free_space | free_percent 
+-++---+--++++--
 6714761216 |41318292 | 5288741376 | 78.76 |  8681708 | 
258624 |  16.55 |   91810372 | 1.37
(1 row)

Time: 15610.651 ms

In the above, the table_len is nblocks*BLCKSZ, and the dead_tuple_count,
dead_tuple_len, dead_tuple_percent, free_space, and free_percent are all
exact. scanned_percent shows the percentage of pages that were scanned
tuple-by-tuple (the others having been skipped based on the VM bit).
The live tuple count, size, and percentage are all estimates.

The approx_tuple_count is calculated using vac_estimate_reltuples based
on the pages/tuples that were scanned. The approx_tuple_len is the exact
size of the live tuples on scanned pages, plus the approximate size from
skipped pages (BLCKSZ-GetRecordedFreeSpace()). This is an overestimate,
because it's counting the line pointer array as live tuple space.

Even in the worst case, when every page has dead tuples, fastbloat is
marginally faster than pgstattuple. The same table as the first example,
but with every even-numbered row deleted:

ams=# select * from fastbloat('x');
 table_len  | scanned_percent | approx_tuple_count | approx_tuple_len | 
approx_tuple_percent | dead_tuple_count | dead_tuple_len | dead_tuple_percent | 
free_space | free_percent 
+-++--+--+--++++--
 6714761216 | 100 |   20659150 |   2644371200 | 
   39.38 | 20659142 | 2644370176 |  39.38 | 
1203068996 |17.92
(1 row)

Time: 8924.511 ms

ams=# select * from pgstattuple('x');
 table_len  | tuple_count | tuple_len  | tuple_percent | dead_tuple_count | 
dead_tuple_len | dead_tuple_percent | free_space | free_percent 
+-++---+--++++--
 6714761216 |20659150 | 2644371200 | 39.38 | 20659142 | 
2644370176 |  39.38 | 1203068996 |17.92
(1 row)

Time: 13338.712 ms

Since the code depends on the visibility map to determine which pages to
skip, it does not support indexes (which have no visibility map).

(Just drop the attached files into contrib/fastbloat, and "make install"
should just work. Then just "create extension fastbloat".)

Questions and suggestions welcome.

-- Abhijit
/*
 * contrib/fastbloat/fastbloat.c
 *
 * Abhijit Menon-Sen 
 * Portions Copyright (c) 2001,2002	Tatsuo Ishii (from pg_stattuple)
 *
 * Permission to use, copy, modify, and distribute this software and
 * its documentation for any purpose, without fee, and without a
 * written agreement is hereby granted, provided that the above
 * copyright notice and this paragraph and the following two
 * paragraphs appear in all copies.
 *
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
 * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
 * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
 * DOCUMENTATIO