/*
 * pg_stat_autovacuum
 *
 * This program is open source, licensed under the PostgreSQL license.
 * For license terms, see the LICENSE file.
 *
 */

#include "postgres.h"

#include "funcapi.h"
#include "miscadmin.h"
#include "commands/vacuum.h"
#include "postmaster/autovacuum.h"
#include "storage/ipc.h"
#include "storage/shm_mq.h"
#include "storage/shm_toc.h"
#include "utils/guc.h"
#include "utils/timestamp.h"

PG_MODULE_MAGIC;

#define		PGSA_NB_COLS		15 /* # of column pg_stat_autovacuum() returns */
#define 	PGSA_FREE_ENTRY		-1
#define		PGSA_NO_HANDLE		0
#define		PGSA_MAGIC			0x79fb2448


typedef struct pgsaTable
{
	Oid				relid; /* OID of a table a worker think he'll work on */
	TimestampTz		tsstart;
	TimestampTz		tsstop;
	int				vacoptions; /* bitmask of VacuumOption */
	VacuumParams	params;
	bool			skipped; /* has the worker skipped this table */
	bool			done; /* work done for this table */
	bool			cancelled; /* has autovacuum cancelled work on this table */
} pgsaTable;

typedef struct pgsaWorker
{
	LWLock		   *lock; /* Protect these fields */
	Oid				pid; /* PID of the worker */
	Oid				dbid; /* PID the worker is connected on */
	Oid				curTable; /* OID of the table the worker is working on */
	dsm_handle		h; /* handle the the pgsaTable entries for this worker */
	int				ntables; /* number of tables in dynshm segment */
} pgsaWorker;

/*--- Functions --- */

void	_PG_init(void);
void	_PG_fini(void);

static void pgsa_shmem_startup(void);
static void pgsa_shmem_shutdown(int code, Datum arg);

Size AutoVacuumShmemSize(void);
PG_FUNCTION_INFO_V1(pg_stat_autovacuum);

static Size pgsa_memsize(void);
static void pgsa_init_table(pgsaTable *table, Oid relid);
static bool pgsa_map_or_attach(int numworker, dsm_segment **seg, shm_toc **toc);
static pgsaTable *pgsa_get_table(int numworker, Oid relid);

static void pgsa_list_tables_hook(int pid, Oid dbid, List *table_oids);
static void pgsa_begin_table_hook(int pid, Oid dbid, Oid relid, int vacoptions,
		VacuumParams params);
static void pgsa_end_table_hook(int pid, Oid dbid, Oid relid, bool cancelled);
static void pgsa_database_finished_hook(int pid);

static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
static autovacuum_list_tables_hook_type prev_list_hook = NULL;
static autovacuum_begin_table_hook_type prev_begin_table_hook = NULL;
static autovacuum_end_table_hook_type prev_end_table_hook = NULL;
static autovacuum_database_finished_hook_type prev_database_finished_hook = NULL;

static pgsaWorker *workers; /* as many pgsaWorker as autovacuum workers */
static int max_workers; /* copy of autovacuum_max_workers GUC value */

void
_PG_init(void)
{
	if (!process_shared_preload_libraries_in_progress)
	{
		elog(ERROR, "This module can only be loaded via shared_preload_libraries");
		return;
	}

	/* Need the maximum number of autovacuum workers now to ask for shmem */
	max_workers = atoi(GetConfigOption("autovacuum_max_workers", false, false));
	RequestAddinShmemSpace(pgsa_memsize());
	RequestNamedLWLockTranche("pg_stat_autovacuum", max_workers);

	/* install hooks */
	prev_shmem_startup_hook = shmem_startup_hook;
	shmem_startup_hook = pgsa_shmem_startup;

	prev_list_hook = autovacuum_list_tables_hook;
	autovacuum_list_tables_hook = pgsa_list_tables_hook;

	prev_begin_table_hook = autovacuum_begin_table_hook;
	autovacuum_begin_table_hook = pgsa_begin_table_hook;

	prev_end_table_hook = autovacuum_end_table_hook;
	autovacuum_end_table_hook = pgsa_end_table_hook;

	prev_database_finished_hook = autovacuum_database_finished_hook;
	autovacuum_database_finished_hook = pgsa_database_finished_hook;
}

void
_PG_fini(void)
{
	/* uninstall hooks */
	autovacuum_list_tables_hook = prev_list_hook;
	autovacuum_begin_table_hook = prev_begin_table_hook;
	autovacuum_end_table_hook = prev_end_table_hook;
	autovacuum_database_finished_hook = prev_database_finished_hook;
}

static void
pgsa_shmem_startup(void)
{
	bool found;
	int i;

	if (prev_shmem_startup_hook)
		prev_shmem_startup_hook();

	/* Reset in case this is a restart within the postmaster */
	workers = NULL;

	/* Create or attach to the shared memory state */
	LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);

	workers = ShmemInitStruct("pg_stat_autovacuum",
			sizeof(pgsaWorker) * max_workers,
			&found);

	if (!found)
	{
		/* First time through */
		LWLockPadded *locks = GetNamedLWLockTranche("pg_stat_autovacuum");
		for (i=0; i<max_workers; i++)
			workers[i].lock = &(locks[i]).lock;

	}

	for (i=0; i<max_workers; i++)
	{
		workers[i].pid = PGSA_FREE_ENTRY;
		workers[i].dbid = InvalidOid;
		workers[i].curTable = InvalidOid;
	}

	LWLockRelease(AddinShmemInitLock);

	if (!IsUnderPostmaster)
		on_shmem_exit(pgsa_shmem_shutdown, (Datum) 0);
}

static void
pgsa_shmem_shutdown(int code, Datum arg)
{
	/* nothing yet */
	return;
}


static Size
pgsa_memsize(void)
{
	return MAXALIGN(sizeof(pgsaWorker) * max_workers);
}

static void
pgsa_init_table(pgsaTable *table, Oid relid)
{
	table->relid = relid;
	table->done = false;
	table->vacoptions = 0;
	memset(&table->params, 0, sizeof(VacuumParams));
	table->skipped = table->done = table->cancelled = false;
	table->tsstart = table->tsstop = 0;
}

/*
 * Initialize seg and toc, return true if we had to attach or false if a mapping
 * was found.  Caller must hold a lwlock on that worker.
 */
static bool
pgsa_map_or_attach(int numworker, dsm_segment **seg, shm_toc **toc)
{
	bool		attached = false; /* must we detach from segment */

	Assert(workers[numworker].h != PGSA_NO_HANDLE);

	/* first, try to find a mapping */
	*seg = dsm_find_mapping(workers[numworker].h);

	if (*seg == NULL)
	{
		/*
		 * Could not find a mapping, must be a different backend calling the
		 * SRF function. Try to attach to the handler.
		 */
		*seg = dsm_attach(workers[numworker].h);
		if (seg == NULL)
		{
			elog(WARNING, "Could not attach to dsm with handle %d",
					workers[numworker].h);
			return false;
		}
		attached = true;
	}

	*toc = shm_toc_attach(PGSA_MAGIC, dsm_segment_address(*seg));
	if (*toc == NULL)
	{
		elog(WARNING, "Could not retrieve toc");

		if (attached)
			dsm_detach(*seg);
		return false;
	}

	return attached;
}

/*
 * Return a pointer of a pgsaTable stored in dynamic shared memory of a
 * specified worker if found, NULL otherwise. Caller must hold a lwlock on that
 * pgsaWorker.
 */
static pgsaTable *
pgsa_get_table(int numworker, Oid relid)
{
	pgsaTable  *table = NULL;
	bool		attached = false; /* must we detach from segment */
	dsm_segment *seg = NULL;
	shm_toc *toc = NULL;
	int i;

	Assert(workers[numworker].ntables > 0);
	Assert(workers[numworker].h != PGSA_NO_HANDLE);

	attached = pgsa_map_or_attach(numworker, &seg, &toc);

	if (!toc)
	{
		Assert(!attached);
		return NULL;
	}

	for(i=0; i<workers[numworker].ntables; i++)
	{
		table = shm_toc_lookup(toc, i);

		if (table->relid == relid)
			break;
		else
			table = NULL;
	}

	if (attached)
		dsm_detach(seg);

	return table;
}

static void
pgsa_list_tables_hook(int pid, Oid dbid, List *table_oids)
{
	ListCell *cell;
	bool found = false;
	int i,j;
	int ntables;
	shm_toc_estimator e;
	shm_toc *toc;
	Size segsize;
	pgsaTable *table;

	ntables = list_length(table_oids);

	i=0;
	while (i<max_workers && !found)
	{
		LWLockAcquire(workers[i].lock, LW_EXCLUSIVE);

		if (workers[i].pid == PGSA_FREE_ENTRY)
		{
			found = true;
			workers[i].pid = pid;
			workers[i].dbid = dbid;
			workers[i].ntables = ntables;

			if (ntables > 0)
			{
				dsm_segment *seg;
				shm_toc_initialize_estimator(&e);

				shm_toc_estimate_chunk(&e, sizeof(int));

				/* number of tables the worker will try to work on */
				shm_toc_estimate_keys(&e, ntables);

				for(j=0; j<ntables; j++)
					shm_toc_estimate_chunk(&e, sizeof(pgsaTable));
				segsize = shm_toc_estimate(&e);

				seg = dsm_create(shm_toc_estimate(&e), 0);

				/* keep this segment until the worker has finished his work */
				dsm_pin_mapping(seg);

				toc = shm_toc_create(PGSA_MAGIC, dsm_segment_address(seg),
						segsize);

				j = 0;
				foreach(cell, table_oids)
				{
					Oid	relid = lfirst_oid(cell);

					table = shm_toc_allocate(toc, sizeof(pgsaTable));

					pgsa_init_table(table, relid);

					shm_toc_insert(toc, j++, table);
				}
				Assert(j == ntables);

				workers[i].h = dsm_segment_handle(seg);
			}
			else
				workers[i].h = PGSA_NO_HANDLE;
		}

		LWLockRelease(workers[i].lock);
		i++;
	}
	Assert(found);
}

static void
pgsa_begin_table_hook(int pid, Oid dbid, Oid relid, int vacoptions,
		VacuumParams params)
{
	int i;
	bool found = false;

	i=0;
	while (i<max_workers && !found)
	{
		LWLockAcquire(workers[i].lock, LW_EXCLUSIVE);

		if (workers[i].pid == pid)
		{
			pgsaTable *table;

			found = true;
			workers[i].curTable = relid;
			table = pgsa_get_table(i, relid);

			/* could have fail to attach to dynshm */
			if (table)
			{
				table->vacoptions = vacoptions;
				table->params = params;
				table->tsstart = GetCurrentTimestamp();
			}
		}
		LWLockRelease(workers[i].lock);
		i++;
	}
	Assert(found);
}

static void
pgsa_end_table_hook(int pid, Oid dbid, Oid relid, bool cancelled)
{
	pgsaTable *table;
	bool found = false;
	int i;

	i=0;
	while (i<max_workers && !found)
	{
		LWLockAcquire(workers[i].lock, LW_EXCLUSIVE);

		if (workers[i].pid == pid)
		{
			found = true;
			table = pgsa_get_table(i, relid);

			if (table)
			{
				if (workers[i].curTable != relid)
					table->skipped = true;

				if (cancelled)
					table->cancelled = true;
				else
					table->done = true;
				table->tsstop = GetCurrentTimestamp();
			}
			workers[i].curTable = InvalidOid;
		}

		LWLockRelease(workers[i].lock);
		i++;
	}
	Assert(found);
}

static void
pgsa_database_finished_hook(int pid)
{
	int i;
	bool found = false;

	i=0;
	while (i<max_workers && !found)
	{
		LWLockAcquire(workers[i].lock, LW_EXCLUSIVE);

		if (workers[i].pid == pid)
		{
			found = true;
			workers[i].pid = PGSA_FREE_ENTRY;
			workers[i].dbid = InvalidOid;
			workers[i].curTable = InvalidOid;
			workers[i].ntables = 0;

			if (workers[i].h != PGSA_NO_HANDLE)
				workers[i].h = PGSA_NO_HANDLE;
		}

		LWLockRelease(workers[i].lock);
		i++;
	}
	Assert(found);
}

/*
 * List of autovacuum activity
 */
Datum
pg_stat_autovacuum(PG_FUNCTION_ARGS)
{
	ReturnSetInfo	*rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
	MemoryContext	per_query_ctx;
	MemoryContext	oldcontext;
	TupleDesc		tupdesc;
	Tuplestorestate	*tupstore;
	Datum		values[PGSA_NB_COLS];
	bool		nulls[PGSA_NB_COLS];
	int i;


	/* check to see if caller supports us returning a tuplestore */
	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("set-valued function called in context that cannot accept a set")));
	if (!(rsinfo->allowedModes & SFRM_Materialize))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("materialize mode required, but it is not " \
							"allowed in this context")));

	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
	oldcontext = MemoryContextSwitchTo(per_query_ctx);

	/* Build a tuple descriptor for our result type */
	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
		elog(ERROR, "return type must be a row type");

	tupstore = tuplestore_begin_heap(true, false, work_mem);
	rsinfo->returnMode = SFRM_Materialize;
	rsinfo->setResult = tupstore;
	rsinfo->setDesc = tupdesc;

	MemoryContextSwitchTo(oldcontext);

	for (i=0; i<max_workers; i++)
	{
		dsm_segment *seg;
		shm_toc *toc;
		int j=0;
		int t;

		LWLockAcquire(workers[i].lock, LW_SHARED);

		/* Only show active workers */
		if (workers[i].pid == PGSA_FREE_ENTRY)
		{
			LWLockRelease(workers[i].lock);
			continue;
		}

		if (workers[i].ntables > 0)
		{
			bool attached;

			attached = pgsa_map_or_attach(i, &seg, &toc);

			if (!toc)
			{
				Assert(!attached);

				LWLockRelease(workers[i].lock);
				continue;
			}

			for(t=0; t<workers[i].ntables; t++)
			{
				pgsaTable *table;

				memset(values, 0, sizeof(values));
				memset(nulls, 0, sizeof(nulls));
				j=0;

				table = shm_toc_lookup(toc, t);

				values[j++] = Int32GetDatum(workers[i].pid);

				if (workers[i].dbid == InvalidOid)
					nulls[j++] = true;
				else
					values[j++] = ObjectIdGetDatum(workers[i].dbid);

				if (workers[i].curTable == InvalidOid)
					nulls[j++] = true;
				else
					values[j++] = ObjectIdGetDatum(table->relid);

				values[j++] = Int32GetDatum(t+1);
				values[j++] = BoolGetDatum(table->relid == workers[i].curTable);
				values[j++] = BoolGetDatum(table->skipped);
				values[j++] = BoolGetDatum(table->done);
				values[j++] = BoolGetDatum(table->cancelled);
				if (table->tsstart != 0)
					values[j++] = TimestampTzGetDatum(table->tsstart);
				else
					nulls[j++] = true;
				if (table->tsstop != 0)
					values[j++] = TimestampTzGetDatum(table->tsstop);
				else
					nulls[j++] = true;
				/* do we know these options yet? */
				if (table->vacoptions == 0)
				{
					nulls[j++] = true; /* do_vacuum  */
					nulls[j++] = true; /* do_analyze  */
					nulls[j++] = true; /* do_freeze  */
					nulls[j++] = true; /* do_skiptoast  */
					nulls[j++] = true; /* is_wraparound  */
				}
				else
				{
					values[j++] = BoolGetDatum(table->vacoptions &
							VACOPT_VACUUM);
					values[j++] = BoolGetDatum(table->vacoptions &
							VACOPT_ANALYZE);
					values[j++] = BoolGetDatum(table->vacoptions &
							VACOPT_FREEZE);
					values[j++] = BoolGetDatum(table->vacoptions &
							VACOPT_SKIPTOAST);
					values[j++] = BoolGetDatum(table->params.is_wraparound);
				}

				Assert(j == PGSA_NB_COLS);
				tuplestore_putvalues(tupstore, tupdesc, values, nulls);
			}

			if (attached)
				dsm_detach(seg);
		}
		else
		{
				memset(values, 0, sizeof(values));
				memset(nulls, 0, sizeof(nulls));

				j=0;
				values[j++] = Int32GetDatum(workers[i].pid);
				if (workers[i].dbid == InvalidOid)
					nulls[j++] = true;
				else
					values[j++] = ObjectIdGetDatum(workers[i].dbid);
				nulls[j++] = true; /* relid */
				values[j++] = Int32GetDatum(0); /* pos */
				nulls[j++] = true; /* working */
				nulls[j++] = true; /* skipped */
				nulls[j++] = true; /* finished */
				nulls[j++] = true; /* cancelled  */
				nulls[j++] = true; /* do_vacuum  */
				nulls[j++] = true; /* do_analyze  */
				nulls[j++] = true; /* do_freeze  */
				nulls[j++] = true; /* do_skiptoast  */
				nulls[j++] = true; /* is_wraparound  */

				Assert(j == PGSA_NB_COLS);
				tuplestore_putvalues(tupstore, tupdesc, values, nulls);
		}

		LWLockRelease(workers[i].lock);
	}

	/* clean up and return the tuplestore */
	tuplestore_donestoring(tupstore);

	return (Datum) 0;
}
