On 2013-06-28 21:47:47 +0200, Andres Freund wrote:
> So, from what I gather there's a slight leaning towards *not* storing
> the relation's oid in the WAL. Which means the concerns about the
> uniqueness issues with the syscaches need to be addressed. So far I know
> of three solutions:
> 1) develop a custom caching/mapping module
> 2) Make sure InvalidOid's (the only possible duplicate) can't end up the
>    syscache by adding a hook that prevents that on the catcache level
> 3) Make sure that there can't be any duplicates by storing the oid of
>    the relation in a mapped relations relfilenode

So, here's 4 patches:
1) add RelationMapFilenodeToOid()
2) Add pg_class index on (reltablespace, relfilenode)
3a) Add custom cache that maps from filenode to oid
3b) Add catcache 'filter' that ensures the cache stays unique and use
    that for the mapping
4) Add pg_relation_by_filenode() and use it in a regression test

3b) adds an optional 'filter' attribute to struct cachedesc in
    syscache.c which is then passed to catcache.c. If it's existant
    catcache.c uses it - after checking for a match in the cache - to
    check whether the queried-for value possibly should end up in the
    cache. If not it stores a whiteout entry as currently already done
    for nonexistant entries.
    It also reorders some catcache.h struct attributes to make sure
    we're not growing them. Might make sense to apply that
    independently, those are rather heavily used.

I slightly prefer 3b) because it's smaller, what's your opinions?

Greetings,

Andres Freund

-- 
 Andres Freund                     http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
>From cbedebac6a8d449a5127befe1525230c2132e06f Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Tue, 11 Jun 2013 23:25:26 +0200
Subject: [PATCH] wal_decoding: Add RelationMapFilenodeToOid function to
 relmapper.c

This function maps (reltablespace, relfilenode) to the table oid and thus acts
as a reverse of RelationMapOidToFilenode.
---
 src/backend/utils/cache/relmapper.c | 53 +++++++++++++++++++++++++++++++++++++
 src/include/utils/relmapper.h       |  2 ++
 2 files changed, 55 insertions(+)

diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 2c7d9f3..039aa29 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -180,6 +180,59 @@ RelationMapOidToFilenode(Oid relationId, bool shared)
 	return InvalidOid;
 }
 
+/* RelationMapFilenodeToOid
+ *
+ * Do the reverse of the normal direction of mapping done in
+ * RelationMapOidToFilenode.
+ *
+ * This is not supposed to be used during normal running but rather for
+ * information purposes when looking at the filesystem or the xlog.
+ *
+ * Returns InvalidOid if the OID is not know which can easily happen if the
+ * filenode is not of a relation that is nailed or shared or if it simply
+ * doesn't exists anywhere.
+ */
+Oid
+RelationMapFilenodeToOid(Oid filenode, bool shared)
+{
+	const RelMapFile *map;
+	int32		i;
+
+	/* If there are active updates, believe those over the main maps */
+	if (shared)
+	{
+		map = &active_shared_updates;
+		for (i = 0; i < map->num_mappings; i++)
+		{
+			if (filenode == map->mappings[i].mapfilenode)
+				return map->mappings[i].mapoid;
+		}
+		map = &shared_map;
+		for (i = 0; i < map->num_mappings; i++)
+		{
+			if (filenode == map->mappings[i].mapfilenode)
+				return map->mappings[i].mapoid;
+		}
+	}
+	else
+	{
+		map = &active_local_updates;
+		for (i = 0; i < map->num_mappings; i++)
+		{
+			if (filenode == map->mappings[i].mapfilenode)
+				return map->mappings[i].mapoid;
+		}
+		map = &local_map;
+		for (i = 0; i < map->num_mappings; i++)
+		{
+			if (filenode == map->mappings[i].mapfilenode)
+				return map->mappings[i].mapoid;
+		}
+	}
+
+	return InvalidOid;
+}
+
 /*
  * RelationMapUpdateMap
  *
diff --git a/src/include/utils/relmapper.h b/src/include/utils/relmapper.h
index 8f0b438..071bc98 100644
--- a/src/include/utils/relmapper.h
+++ b/src/include/utils/relmapper.h
@@ -36,6 +36,8 @@ typedef struct xl_relmap_update
 
 extern Oid	RelationMapOidToFilenode(Oid relationId, bool shared);
 
+extern Oid	RelationMapFilenodeToOid(Oid relationId, bool shared);
+
 extern void RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared,
 					 bool immediate);
 
-- 
1.8.3.251.g1462b67

>From fc6022fcc9ba8394069870b0b2b0e32a4a648c70 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Sun, 7 Jul 2013 18:38:56 +0200
Subject: [PATCH] Add index on pg_class(reltablespace, relfilenode)

Used by RelidByRelfilenode either via relfilenodemap.c or via a special
syscache.

Needs a CATVERSION bump.
---
 src/include/catalog/indexing.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index 19268fb..4860e98 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -106,6 +106,8 @@ DECLARE_UNIQUE_INDEX(pg_class_oid_index, 2662, on pg_class using btree(oid oid_o
 #define ClassOidIndexId  2662
 DECLARE_UNIQUE_INDEX(pg_class_relname_nsp_index, 2663, on pg_class using btree(relname name_ops, relnamespace oid_ops));
 #define ClassNameNspIndexId  2663
+DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, on pg_class using btree(reltablespace oid_ops, relfilenode oid_ops));
+#define ClassTblspcRelfilenodeIndexId  3455
 
 DECLARE_UNIQUE_INDEX(pg_collation_name_enc_nsp_index, 3164, on pg_collation using btree(collname name_ops, collencoding int4_ops, collnamespace oid_ops));
 #define CollationNameEncNspIndexId 3164
-- 
1.8.3.251.g1462b67

>From 4019f9556f3708c2d7515ce1d6e1f42c1b724e89 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Sun, 7 Jul 2013 18:24:41 +0200
Subject: [PATCH] Introduce a new relfilenodemap cache that maps filenodes to
 oids

To make invalidations work hook into relcache invalidations.
---
 src/backend/utils/adt/dbsize.c           |   1 +
 src/backend/utils/cache/Makefile         |   3 +-
 src/backend/utils/cache/inval.c          |   2 +-
 src/backend/utils/cache/relfilenodemap.c | 261 +++++++++++++++++++++++++++++++
 src/include/utils/relfilenodemap.h       |  18 +++
 5 files changed, 283 insertions(+), 2 deletions(-)
 create mode 100644 src/backend/utils/cache/relfilenodemap.c
 create mode 100644 src/include/utils/relfilenodemap.h

diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
index 34482ab..c101fee 100644
--- a/src/backend/utils/adt/dbsize.c
+++ b/src/backend/utils/adt/dbsize.c
@@ -28,6 +28,7 @@
 #include "utils/builtins.h"
 #include "utils/numeric.h"
 #include "utils/rel.h"
+#include "utils/relfilenodemap.h"
 #include "utils/relmapper.h"
 #include "utils/syscache.h"
 
diff --git a/src/backend/utils/cache/Makefile b/src/backend/utils/cache/Makefile
index 32d722e..a943f8e 100644
--- a/src/backend/utils/cache/Makefile
+++ b/src/backend/utils/cache/Makefile
@@ -13,6 +13,7 @@ top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = attoptcache.o catcache.o evtcache.o inval.o plancache.o relcache.o \
-	relmapper.o spccache.o syscache.o lsyscache.o typcache.o ts_cache.o
+	relmapper.o relfilenodemap.o spccache.o syscache.o lsyscache.o \
+	typcache.o ts_cache.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 3356d0f..080f223 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -178,7 +178,7 @@ static int	maxSharedInvalidMessagesArray;
  */
 
 #define MAX_SYSCACHE_CALLBACKS 32
-#define MAX_RELCACHE_CALLBACKS 5
+#define MAX_RELCACHE_CALLBACKS 6
 
 static struct SYSCACHECALLBACK
 {
diff --git a/src/backend/utils/cache/relfilenodemap.c b/src/backend/utils/cache/relfilenodemap.c
new file mode 100644
index 0000000..9f543cd
--- /dev/null
+++ b/src/backend/utils/cache/relfilenodemap.c
@@ -0,0 +1,261 @@
+/*-------------------------------------------------------------------------
+ *
+ * relfilenodemap.c
+ *	  relfilenode to oid mapping cache.
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/cache/relfilenode.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_tablespace.h"
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/hsearch.h"
+#include "utils/inval.h"
+#include "utils/fmgroids.h"
+#include "utils/rel.h"
+#include "utils/relfilenodemap.h"
+#include "utils/relmapper.h"
+
+/* Hash table for informations about each relfilenode <-> oid pair */
+static HTAB *RelfilenodeMapHash = NULL;
+
+/* built first time through in InitializeRelfilenodeMap */
+ScanKeyData relfilenode_skey[2];
+
+typedef struct
+{
+	Oid			reltablespace;
+	Oid			relfilenode;
+} RelfilenodeMapKey;
+
+typedef struct
+{
+	RelfilenodeMapKey key;	/* lookup key - must be first */
+	Oid			relid;			/* pg_class.oid */
+} RelfilenodeMapEntry;
+
+/*
+ * RelfilenodeMapInvalidateCallback
+ *		Flush mapping entries when pg_class is updated in a relevant fashion.
+ */
+static void
+RelfilenodeMapInvalidateCallback(Datum arg, Oid relid)
+{
+	HASH_SEQ_STATUS status;
+	RelfilenodeMapEntry *entry;
+
+	/* not active or deleted */
+	if (RelfilenodeMapHash == NULL)
+		return;
+
+	/* delete entire cache */
+	if (relid == InvalidOid)
+	{
+		hash_destroy(RelfilenodeMapHash);
+		RelfilenodeMapHash = NULL;
+		return;
+	}
+
+	hash_seq_init(&status, RelfilenodeMapHash);
+	while ((entry = (RelfilenodeMapEntry *) hash_seq_search(&status)) != NULL)
+	{
+		/*
+		 * Note that there might be multiple entries for one oid at the same
+		 * time while we're processing invalidations.
+		 */
+		if (entry->relid == relid)
+		{
+			if (hash_search(RelfilenodeMapHash,
+							(void *) &entry->key,
+							HASH_REMOVE,
+							NULL) == NULL)
+				elog(ERROR, "hash table corrupted");
+		}
+	}
+}
+
+static void
+InitializeRelfilenodeMap(void)
+{
+	HASHCTL		ctl;
+	static bool	initial_init_done = false;
+	int i;
+
+	/* Make sure we've initialized CacheMemoryContext. */
+	if (CacheMemoryContext == NULL)
+		CreateCacheMemoryContext();
+
+	/* Initialize the hash table. */
+	MemSet(&ctl, 0, sizeof(ctl));
+	ctl.keysize = sizeof(RelfilenodeMapKey);
+	ctl.entrysize = sizeof(RelfilenodeMapEntry);
+	ctl.hash = tag_hash;
+	ctl.hcxt = CacheMemoryContext;
+
+	RelfilenodeMapHash =
+		hash_create("RelfilenodeMap cache", 1024, &ctl,
+					HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+
+	/*
+	 * For complete resets we simply delete the entire hash, but there's no
+	 * need to do the other stuff multiple times. Especially the initialization
+	 * of the relcche invalidation should only be done once.
+	 */
+	if (initial_init_done)
+		return;
+
+	/* build skey */
+	MemSet(&relfilenode_skey, 0, sizeof(relfilenode_skey));
+
+	for (i = 0; i < 2; i++)
+	{
+		fmgr_info_cxt(F_OIDEQ,
+					  &relfilenode_skey[i].sk_func,
+					  CacheMemoryContext);
+		relfilenode_skey[i].sk_strategy = BTEqualStrategyNumber;
+		relfilenode_skey[i].sk_subtype = InvalidOid;
+		relfilenode_skey[i].sk_collation = InvalidOid;
+	}
+
+	relfilenode_skey[0].sk_attno = Anum_pg_class_reltablespace;
+	relfilenode_skey[1].sk_attno = Anum_pg_class_relfilenode;
+
+	/* Watch for invalidation events. */
+	CacheRegisterRelcacheCallback(RelfilenodeMapInvalidateCallback,
+								  (Datum) 0);
+	initial_init_done = true;
+}
+
+/*
+ * Map a relation's (tablespace, filenode) to a relation's oid and cache the
+ * result.
+ *
+ * Instead of DEFAULTTABLESPACE_OID InvalidOid/0 can be passed as
+ * tablespace. The table identified by the parameter pair can be a shared,
+ * nailed or normal relation.
+ *
+ * Returns InvalidOid if no relation mapping the criteria could be found.
+ */
+Oid
+RelidByRelfilenode(Oid reltablespace, Oid relfilenode)
+{
+	RelfilenodeMapKey key;
+	RelfilenodeMapEntry *entry;
+	bool found;
+	SysScanDesc scandesc;
+	Relation relation;
+	HeapTuple ntp;
+	ScanKeyData skey[2];
+
+	if (RelfilenodeMapHash == NULL)
+		InitializeRelfilenodeMap();
+
+	/*
+	 * relations in the default tablespace are stored with InvalidOid as
+	 * pg_class.reltablespace.
+	 */
+	if (reltablespace == DEFAULTTABLESPACE_OID)
+		reltablespace = 0;
+
+	MemSet(&key, 0, sizeof(key));
+	key.reltablespace = reltablespace;
+	key.relfilenode = relfilenode;
+
+	/*
+	 * Check cache and enter entry if nothing could be found. Even if no target
+	 * relation can be found lateron we store the negative match and return a
+	 * InvalidOid from cache. That's not really necessary for performance since
+	 * querinyg invalid values isn't supposed to be a frequent thing, but it's
+	 * simpler implementation wise this way.
+	 */
+	entry = hash_search(RelfilenodeMapHash,
+						(void *) &key,
+						HASH_ENTER,
+						&found);
+
+	if (found)
+		return entry->relid;
+
+	/* ok, no previous cache entry, do it the hard way */
+
+	/* check shared tables */
+	if (reltablespace == GLOBALTABLESPACE_OID)
+	{
+		entry->relid = RelationMapFilenodeToOid(relfilenode, true);
+		return entry->relid;
+	}
+
+	/* check plain relations by looking in pg_class */
+	relation = heap_open(RelationRelationId, AccessShareLock);
+
+	/* copy scankey to local copy, it will be modified during the scan */
+	memcpy(skey, relfilenode_skey, sizeof(skey));
+
+	/* set scan arguments */
+	skey[0].sk_argument = ObjectIdGetDatum(reltablespace);
+	skey[1].sk_argument = ObjectIdGetDatum(relfilenode);
+
+	scandesc = systable_beginscan(relation,
+								  ClassTblspcRelfilenodeIndexId,
+								  true,
+								  NULL,
+								  2,
+								  skey);
+
+	found = false;
+
+	while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
+	{
+		bool isnull;
+
+		if (found)
+			elog(ERROR, "duplicate in GetOidByFilenode");
+		found = true;
+
+#ifdef USE_ASSERT_CHECKING
+		if (assert_enabled)
+		{
+			Oid check;
+			check = fastgetattr(ntp, Anum_pg_class_reltablespace,
+								RelationGetDescr(relation),
+								&isnull);
+
+			/*
+			 * reltablespace is already set to InvalidOid above if we're
+			 * looking for DEFAULTTABLESPACE_OID.
+			 */
+			if (isnull || check != reltablespace)
+				elog(ERROR, "borked reltablespace lookup");
+
+			check = fastgetattr(ntp, Anum_pg_class_relfilenode,
+								RelationGetDescr(relation),
+								&isnull);
+
+			if (isnull || check != relfilenode)
+				elog(ERROR, "borked relfilenode lookup");
+		}
+#endif
+		entry->relid = HeapTupleGetOid(ntp);
+	}
+
+	systable_endscan(scandesc);
+	heap_close(relation, AccessShareLock);
+
+	/* check for nailed tables, those will not have been in pg_class */
+	if (!found)
+		entry->relid = RelationMapFilenodeToOid(relfilenode, false);
+
+	return entry->relid;
+}
diff --git a/src/include/utils/relfilenodemap.h b/src/include/utils/relfilenodemap.h
new file mode 100644
index 0000000..cdb9bbf
--- /dev/null
+++ b/src/include/utils/relfilenodemap.h
@@ -0,0 +1,18 @@
+/*-------------------------------------------------------------------------
+ *
+ * relfilenodemap.h
+ *	  relfilenode to oid mapping cache.
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/relfilenodemap.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RELFILENODEMAP_H
+#define RELFILENODEMAP_H
+
+Oid RelidByRelfilenode(Oid reltablespace, Oid relfilenode);
+
+#endif   /* RELFILENODEMAP_H */
-- 
1.8.3.251.g1462b67

>From ed21c52a5c3ed5837fda5a16871be7505e6bb02d Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Sun, 7 Jul 2013 18:39:43 +0200
Subject: [PATCH] Add syscache for filenode to oid mapping

This cache is problematic because formallyindexes used by syscaches needs to be
unique, this one is not. This is "just" because of 0/InvalidOid are stored in
pg_class.relfilenode for nailed/shared catalog relations. Even if we should
never query those values we need to make sure nothing bad can happen in that
case. So add a 'filter' to the syscache infrastructure that allows to specify a
function which prevents tuples from ending up in a catcache.
---
 src/backend/utils/cache/catcache.c | 25 ++++++++++++++++-----
 src/backend/utils/cache/relcache.c | 45 +++++++++++++++++++++++++++++++++++++
 src/backend/utils/cache/syscache.c | 46 +++++++++++++++++++++++++++++++++++++-
 src/include/utils/catcache.h       | 21 ++++++++++-------
 src/include/utils/relcache.h       |  6 +++++
 src/include/utils/syscache.h       |  1 +
 6 files changed, 129 insertions(+), 15 deletions(-)

diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index d12da76..536de2d 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -726,7 +726,8 @@ InitCatCache(int id,
 			 Oid indexoid,
 			 int nkeys,
 			 const int *key,
-			 int nbuckets)
+			 int nbuckets,
+			 PGFunction filter)
 {
 	CatCache   *cp;
 	MemoryContext oldcxt;
@@ -787,6 +788,7 @@ InitCatCache(int id,
 	cp->cc_indexoid = indexoid;
 	cp->cc_relisshared = false; /* temporary */
 	cp->cc_tupdesc = (TupleDesc) NULL;
+	cp->cc_filter = filter;
 	cp->cc_ntup = 0;
 	cp->cc_nbuckets = nbuckets;
 	cp->cc_nkeys = nkeys;
@@ -1162,6 +1164,18 @@ SearchCatCache(CatCache *cache,
 		}
 	}
 
+	ct = NULL;
+
+	if (cache->cc_filter != NULL &&
+		DatumGetBool(DirectFunctionCall2(cache->cc_filter,
+										 PointerGetDatum(cache),
+										 PointerGetDatum(&cur_skey))))
+	{
+		CACHE2_elog(DEBUG2, "SearchCatCache(%s): filtering lookup",
+					cache->cc_relname);
+		goto create_negative;
+	}
+
 	/*
 	 * Tuple was not found in cache, so we have to try to retrieve it directly
 	 * from the relation.  If found, we will add it to the cache; if not
@@ -1186,8 +1200,6 @@ SearchCatCache(CatCache *cache,
 								  cache->cc_nkeys,
 								  cur_skey);
 
-	ct = NULL;
-
 	while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
 	{
 		ct = CatalogCacheCreateEntry(cache, ntp,
@@ -1204,10 +1216,11 @@ SearchCatCache(CatCache *cache,
 
 	heap_close(relation, AccessShareLock);
 
+create_negative:
 	/*
-	 * If tuple was not found, we need to build a negative cache entry
-	 * containing a fake tuple.  The fake tuple has the correct key columns,
-	 * but nulls everywhere else.
+	 * If tuple was not found or filtered, we need to build a negative cache
+	 * entry containing a fake tuple.  The fake tuple has the correct key
+	 * columns, but nulls everywhere else.
 	 *
 	 * In bootstrap mode, we don't build negative entries, because the cache
 	 * invalidation mechanism isn't alive and can't clear them if the tuple
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 66fb63b..c1eb3c4 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -4859,3 +4859,48 @@ unlink_initfile(const char *initfilename)
 			elog(LOG, "could not remove cache file \"%s\": %m", initfilename);
 	}
 }
+
+Oid
+RelidByRelfilenode(Oid reltablespace, Oid relfilenode)
+{
+	Oid			lookup_tablespace;
+	Oid			heaprel;
+	HeapTuple	tuple;
+
+	if (reltablespace == 0)
+		reltablespace = DEFAULTTABLESPACE_OID;
+
+	/* in global tablespace, has to be a shared table */
+	if (reltablespace == GLOBALTABLESPACE_OID)
+	{
+		heaprel = RelationMapFilenodeToOid(relfilenode, true);
+	}
+	else
+	{
+		/*
+		 * relations in the default tablespace are stored with InvalidOid as
+		 * pg_class."reltablespace".
+		 */
+		if (reltablespace == DEFAULTTABLESPACE_OID)
+			lookup_tablespace = InvalidOid;
+		else
+			lookup_tablespace = reltablespace;
+
+
+		tuple = SearchSysCache2(RELFILENODE,
+								lookup_tablespace,
+								relfilenode);
+		/* ok, found it */
+		if (HeapTupleIsValid(tuple))
+		{
+			heaprel = HeapTupleHeaderGetOid(tuple->t_data);
+			ReleaseSysCache(tuple);
+		}
+		/* has to be nonexistant or a nailed table, but not shared */
+		else
+		{
+			heaprel = RelationMapFilenodeToOid(relfilenode, false);
+		}
+	}
+	return heaprel;
+}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 1ff2f2b..0b3e901 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -110,8 +110,11 @@ struct cachedesc
 	int			nkeys;			/* # of keys needed for cache lookup */
 	int			key[4];			/* attribute numbers of key attrs */
 	int			nbuckets;		/* number of hash buckets for this cache */
+	PGFunction	filter;			/* optional filter to guarantee uniqueness */
 };
 
+static Datum filter_filenode_syscache(PG_FUNCTION_ARGS);
+
 static const struct cachedesc cacheinfo[] = {
 	{AggregateRelationId,		/* AGGFNOID */
 		AggregateFnoidIndexId,
@@ -598,6 +601,18 @@ static const struct cachedesc cacheinfo[] = {
 		},
 		64
 	},
+	{RelationRelationId,		/* RELFILENODE */
+		ClassTblspcRelfilenodeIndexId,
+		2,
+		{
+			Anum_pg_class_reltablespace,
+			Anum_pg_class_relfilenode,
+			0,
+			0
+		},
+		1024,
+		filter_filenode_syscache
+	},
 	{RelationRelationId,		/* RELNAMENSP */
 		ClassNameNspIndexId,
 		2,
@@ -834,7 +849,8 @@ InitCatalogCache(void)
 										 cacheinfo[cacheId].indoid,
 										 cacheinfo[cacheId].nkeys,
 										 cacheinfo[cacheId].key,
-										 cacheinfo[cacheId].nbuckets);
+										 cacheinfo[cacheId].nbuckets,
+										 cacheinfo[cacheId].filter);
 		if (!PointerIsValid(SysCache[cacheId]))
 			elog(ERROR, "could not initialize cache %u (%d)",
 				 cacheinfo[cacheId].reloid, cacheId);
@@ -1208,3 +1224,31 @@ oid_compare(const void *a, const void *b)
 		return 0;
 	return (oa > ob) ? 1 : -1;
 }
+
+/*
+ * Filter away lookups with a InvalidOid relfilenode - those are nailed &
+ * shared relations and are managed via relmapper not via pg_class. Because of
+ * that InvalidOid is stored in pg_class.relfilenode for those making the index
+ * not unique. Make it unique by filtering away those rows.
+ *
+ * Arguments passed:
+ *  0: catcache
+ *  1: *ScanKey[4] identifying the to-be-filtered key
+ *
+ * Return true if the argument should be considered *nonexistant*.
+ */
+static Datum
+filter_filenode_syscache(PG_FUNCTION_ARGS)
+{
+	CatCache   *cache = (CatCache *) PG_GETARG_POINTER(0);
+	ScanKeyData	*skeys  = (ScanKeyData *) PG_GETARG_POINTER(1);
+
+	if (cache->cc_nkeys != 2)
+		elog(ERROR, "invalid parameter");
+
+	/* filter away if relfilenode == 0 */
+	if (DatumGetObjectId(skeys[1].sk_argument) == InvalidOid)
+		PG_RETURN_BOOL(true);
+
+	PG_RETURN_BOOL(false);
+}
diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h
index b6e1c97..a671d31 100644
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@@ -37,21 +37,22 @@
 typedef struct catcache
 {
 	int			id;				/* cache identifier --- see syscache.h */
+	int			cc_ntup;		/* # of tuples currently in this cache */
+	int			cc_nbuckets;	/* # of hash buckets in this cache */
+	int			cc_nkeys;		/* # of keys (1..CATCACHE_MAXKEYS) */
 	slist_node	cc_next;		/* list link */
 	const char *cc_relname;		/* name of relation the tuples come from */
 	Oid			cc_reloid;		/* OID of relation the tuples come from */
 	Oid			cc_indexoid;	/* OID of index matching cache keys */
 	bool		cc_relisshared; /* is relation shared across databases? */
-	TupleDesc	cc_tupdesc;		/* tuple descriptor (copied from reldesc) */
-	int			cc_ntup;		/* # of tuples currently in this cache */
-	int			cc_nbuckets;	/* # of hash buckets in this cache */
-	int			cc_nkeys;		/* # of keys (1..CATCACHE_MAXKEYS) */
+	bool		cc_isname[CATCACHE_MAXKEYS];	/* flag "name" key columns */
 	int			cc_key[CATCACHE_MAXKEYS];		/* AttrNumber of each key */
 	PGFunction	cc_hashfunc[CATCACHE_MAXKEYS];	/* hash function for each key */
 	ScanKeyData cc_skey[CATCACHE_MAXKEYS];		/* precomputed key info for
 												 * heap scans */
-	bool		cc_isname[CATCACHE_MAXKEYS];	/* flag "name" key columns */
+	TupleDesc	cc_tupdesc;		/* tuple descriptor (copied from reldesc) */
 	dlist_head	cc_lists;		/* list of CatCList structs */
+	PGFunction	cc_filter;		/* optional filter to achieve uniqueness */
 #ifdef CATCACHE_STATS
 	long		cc_searches;	/* total # searches against this cache */
 	long		cc_hits;		/* # of matches against existing entry */
@@ -74,6 +75,9 @@ typedef struct catctup
 {
 	int			ct_magic;		/* for identifying CatCTup entries */
 #define CT_MAGIC   0x57261502
+
+	uint32		hash_value;		/* hash value for this tuple's keys */
+
 	CatCache   *my_cache;		/* link to owning catcache */
 
 	/*
@@ -107,7 +111,6 @@ typedef struct catctup
 	int			refcount;		/* number of active references */
 	bool		dead;			/* dead but not yet removed? */
 	bool		negative;		/* negative cache entry? */
-	uint32		hash_value;		/* hash value for this tuple's keys */
 	HeapTupleData tuple;		/* tuple management header */
 } CatCTup;
 
@@ -116,6 +119,9 @@ typedef struct catclist
 {
 	int			cl_magic;		/* for identifying CatCList entries */
 #define CL_MAGIC   0x52765103
+
+	uint32		hash_value;		/* hash value for lookup keys */
+
 	CatCache   *my_cache;		/* link to owning catcache */
 
 	/*
@@ -144,7 +150,6 @@ typedef struct catclist
 	bool		dead;			/* dead but not yet removed? */
 	bool		ordered;		/* members listed in index order? */
 	short		nkeys;			/* number of lookup keys specified */
-	uint32		hash_value;		/* hash value for lookup keys */
 	HeapTupleData tuple;		/* header for tuple holding keys */
 	int			n_members;		/* number of member tuples */
 	CatCTup    *members[1];		/* members --- VARIABLE LENGTH ARRAY */
@@ -166,7 +171,7 @@ extern void AtEOXact_CatCache(bool isCommit);
 
 extern CatCache *InitCatCache(int id, Oid reloid, Oid indexoid,
 			 int nkeys, const int *key,
-			 int nbuckets);
+			 int nbuckets, PGFunction filter);
 extern void InitCatCachePhase2(CatCache *cache, bool touch_index);
 
 extern HeapTuple SearchCatCache(CatCache *cache,
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 8ac2549..46172f3 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -110,6 +110,12 @@ extern void RelationCacheInitFilePreInvalidate(void);
 extern void RelationCacheInitFilePostInvalidate(void);
 extern void RelationCacheInitFileRemove(void);
 
+/*
+ * Mapping from relfilenode to oid
+ */
+extern  Oid RelidByRelfilenode(Oid reltablespace, Oid relfilenode);
+
+
 /* should be used only by relcache.c and catcache.c */
 extern bool criticalRelcachesBuilt;
 
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index e41b3d2..66d5684 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -75,6 +75,7 @@ enum SysCacheIdentifier
 	PROCNAMEARGSNSP,
 	PROCOID,
 	RANGETYPE,
+	RELFILENODE,
 	RELNAMENSP,
 	RELOID,
 	RULERELNAME,
-- 
1.8.3.251.g1462b67

>From 564907fdb433947ed6f596cf37e983788dfbdeab Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Tue, 11 Jun 2013 23:25:26 +0200
Subject: [PATCH] wal_decoding: Add pg_relation_by_filenode to lookup up a
 relation by (tablespace, filenode)

This requires the previously added RELFILENODE syscache and the added
RelationMapFilenodeToOid function added in previous two commits.
---
 doc/src/sgml/func.sgml                    | 23 ++++++++++++++++++++++-
 src/backend/utils/adt/dbsize.c            | 27 +++++++++++++++++++++++++++
 src/include/catalog/pg_proc.h             |  2 ++
 src/include/utils/builtins.h              |  2 ++
 src/test/regress/expected/alter_table.out | 18 ++++++++++++++++++
 src/test/regress/sql/alter_table.sql      | 14 ++++++++++++++
 6 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5765ddf..d25e796 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -15739,7 +15739,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
 
    <para>
     The functions shown in <xref linkend="functions-admin-dblocation"> assist
-    in identifying the specific disk files associated with database objects.
+    in identifying the specific disk files associated with database objects or doing the reverse.
    </para>
 
    <indexterm>
@@ -15748,6 +15748,9 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
    <indexterm>
     <primary>pg_relation_filepath</primary>
    </indexterm>
+   <indexterm>
+    <primary>pg_relation_by_filenode</primary>
+   </indexterm>
 
    <table id="functions-admin-dblocation">
     <title>Database Object Location Functions</title>
@@ -15776,6 +15779,15 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
         File path name of the specified relation
        </entry>
       </row>
+      <row>
+       <entry>
+        <literal><function>pg_relation_by_filenode(<parameter>tablespace</parameter> <type>oid</type>, <parameter>filenode</parameter> <type>oid</type>)</function></literal>
+        </entry>
+       <entry><type>regclass</type></entry>
+       <entry>
+        Find the associated relation of a filenode
+       </entry>
+      </row>
      </tbody>
     </tgroup>
    </table>
@@ -15799,6 +15811,15 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
     the relation.
    </para>
 
+   <para>
+    <function>pg_relation_by_filenode</> is the reverse of
+    <function>pg_relation_filenode</>. Given a <quote>tablespace</> OID and
+    a <quote>filenode</> it returns the associated relation. The default
+    tablespace for user tables can be replaced with 0. Check the
+    documentation of <function>pg_relation_filenode</> for an explanation why
+    this cannot always easily answered by querying <structname>pg_class</>.
+   </para>
+
   </sect2>
 
   <sect2 id="functions-admin-genfile">
diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
index 34482ab..988a8ff 100644
--- a/src/backend/utils/adt/dbsize.c
+++ b/src/backend/utils/adt/dbsize.c
@@ -756,6 +756,33 @@ pg_relation_filenode(PG_FUNCTION_ARGS)
 }
 
 /*
+ * Get the relation via (reltablespace, relfilenode)
+ *
+ * This is expected to be used when somebody wants to match an individual file
+ * on the filesystem back to its table. Thats not trivially possible via
+ * pg_class because that doesn't contain the relfilenodes of shared and nailed
+ * tables.
+ *
+ * We don't fail but return NULL if we cannot find a mapping.
+ *
+ * Instead of knowing DEFAULTTABLESPACE_OID you can pass 0.
+ */
+Datum
+pg_relation_by_filenode(PG_FUNCTION_ARGS)
+{
+	Oid			reltablespace = PG_GETARG_OID(0);
+	Oid			relfilenode = PG_GETARG_OID(1);
+	Oid			heaprel = InvalidOid;
+
+	heaprel = RelidByRelfilenode(reltablespace, relfilenode);
+
+	if (!OidIsValid(heaprel))
+		PG_RETURN_NULL();
+	else
+		PG_RETURN_OID(heaprel);
+}
+
+/*
  * Get the pathname (relative to $PGDATA) of a relation
  *
  * See comments for pg_relation_filenode.
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 90aff3d..3856f57 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3448,6 +3448,8 @@ DATA(insert OID = 2998 ( pg_indexes_size		PGNSP PGUID 12 1 0 0 0 f f f f t f v 1
 DESCR("disk space usage for all indexes attached to the specified table");
 DATA(insert OID = 2999 ( pg_relation_filenode	PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 26 "2205" _null_ _null_ _null_ _null_ pg_relation_filenode _null_ _null_ _null_ ));
 DESCR("filenode identifier of relation");
+DATA(insert OID = 3454 ( pg_relation_by_filenode PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 2205 "26 26" _null_ _null_ _null_ _null_ pg_relation_by_filenode _null_ _null_ _null_ ));
+DESCR("filenode identifier of relation");
 DATA(insert OID = 3034 ( pg_relation_filepath	PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 25 "2205" _null_ _null_ _null_ _null_ pg_relation_filepath _null_ _null_ _null_ ));
 DESCR("file path of relation");
 
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 667c58b..ddbedea 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -459,8 +459,10 @@ extern Datum pg_size_pretty(PG_FUNCTION_ARGS);
 extern Datum pg_size_pretty_numeric(PG_FUNCTION_ARGS);
 extern Datum pg_table_size(PG_FUNCTION_ARGS);
 extern Datum pg_indexes_size(PG_FUNCTION_ARGS);
+extern Datum pg_relation_by_filenode(PG_FUNCTION_ARGS);
 extern Datum pg_relation_filenode(PG_FUNCTION_ARGS);
 extern Datum pg_relation_filepath(PG_FUNCTION_ARGS);
+extern Datum pg_relation_is_scannable(PG_FUNCTION_ARGS);
 
 /* genfile.c */
 extern bytea *read_binary_file(const char *filename,
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index 18daf95..2dfe113 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -2305,3 +2305,21 @@ Check constraints:
 
 DROP TABLE alter2.tt8;
 DROP SCHEMA alter2;
+-- Check that we map relation oids to filenodes and back correctly.
+-- Don't display all the mappings so the test output doesn't change
+-- all the time, but make sure we actually do test some values.
+SELECT
+    SUM((mapped_oid != oid OR mapped_oid IS NULL)::int) incorrectly_mapped,
+    count(*) > 200 have_mappings
+FROM (
+    SELECT
+        oid, reltablespace, relfilenode, relname,
+        pg_relation_by_filenode(reltablespace, pg_relation_filenode(oid)) mapped_oid
+    FROM pg_class
+    WHERE relkind IN ('r', 'i', 'S', 't', 'm')
+    ) mapped;
+ incorrectly_mapped | have_mappings 
+--------------------+---------------
+                  0 | t
+(1 row)
+
diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql
index dcf8121..12b1338 100644
--- a/src/test/regress/sql/alter_table.sql
+++ b/src/test/regress/sql/alter_table.sql
@@ -1544,3 +1544,17 @@ ALTER TABLE IF EXISTS tt8 SET SCHEMA alter2;
 
 DROP TABLE alter2.tt8;
 DROP SCHEMA alter2;
+
+-- Check that we map relation oids to filenodes and back correctly.
+-- Don't display all the mappings so the test output doesn't change
+-- all the time, but make sure we actually do test some values.
+SELECT
+    SUM((mapped_oid != oid OR mapped_oid IS NULL)::int) incorrectly_mapped,
+    count(*) > 200 have_mappings
+FROM (
+    SELECT
+        oid, reltablespace, relfilenode, relname,
+        pg_relation_by_filenode(reltablespace, pg_relation_filenode(oid)) mapped_oid
+    FROM pg_class
+    WHERE relkind IN ('r', 'i', 'S', 't', 'm')
+    ) mapped;
-- 
1.8.3.251.g1462b67

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to