Right now Postgres determines whether update operation touch index or not based only on set of the affected columns. But in case of functional indexes such policy quite frequently leads to unnecessary index updates. For example, functional index are widely use for indexing JSON data: info->>'name'.

JSON data may contain multiple attributes and only few of them may be affected by update. Moreover, index is used to build for immutable attributes (like "id", "isbn", "name",...).

Functions like (info->>'name') are named "surjective" ni mathematics.
I have strong feeling that most of functional indexes are based on surjective functions. For such indexes current Postgresql index update policy is very inefficient. It cause disabling of hot updates
and so leads to significant degrade of performance.

Without this patch Postgres is slower than Mongo on YCSB benchmark with (50% update,50 % select) workload.
And after applying this patch Postgres beats Mongo at all workloads.

My proposal is to check value of function for functional indexes instead of just comparing set of effected attributes. Obviously, for some complex functions it may have negative effect on update speed. This is why I have added "surjective" option to index. By default it is switched on for all functional indexes (based on my assumption that most functions used in functional indexes are surjective). But it is possible to explicitly disable it and make decision weather index
needs to be updated or not only based on set of effected attributes.


--
Konstantin Knizhnik
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 6d1f22f..37fc407 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -130,6 +130,15 @@ static relopt_bool boolRelOpts[] =
 	},
 	{
 		{
+			"surjective",
+			"Reevaluate functional index expression on update to check if its values is changed",
+			RELOPT_KIND_INDEX,
+			AccessExclusiveLock
+		},
+		true
+	},
+	{
+		{
 			"security_barrier",
 			"View acts as a row security barrier",
 			RELOPT_KIND_VIEW,
@@ -1301,7 +1310,7 @@ fillRelOptions(void *rdopts, Size basesize,
 				break;
 			}
 		}
-		if (validate && !found)
+		if (validate && !found && options[i].gen->kinds != RELOPT_KIND_INDEX)
 			elog(ERROR, "reloption \"%s\" not found in parse table",
 				 options[i].gen->name);
 	}
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index e890e08..3525e3c 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -56,6 +56,7 @@
 #include "access/xlogutils.h"
 #include "catalog/catalog.h"
 #include "catalog/namespace.h"
+#include "catalog/index.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "storage/bufmgr.h"
@@ -73,7 +74,9 @@
 #include "utils/snapmgr.h"
 #include "utils/syscache.h"
 #include "utils/tqual.h"
-
+#include "utils/memutils.h"
+#include "nodes/execnodes.h"
+#include "executor/executor.h"
 
 /* GUC variable */
 bool		synchronize_seqscans = true;
@@ -4199,6 +4202,7 @@ l2:
 
 	if (use_hot_update)
 	{
+		elog(DEBUG1, "Use hot update");
 		/* Mark the old tuple as HOT-updated */
 		HeapTupleSetHotUpdated(&oldtup);
 		/* And mark the new tuple as heap-only */
@@ -4436,6 +4440,73 @@ HeapDetermineModifiedColumns(Relation relation, Bitmapset *interesting_cols,
 								attnum - FirstLowInvalidHeapAttributeNumber);
 	}
 
+	if (hot_result && relation->rd_surjective)
+	{
+		ListCell       *l;
+		List	       *indexoidlist = RelationGetIndexList(relation);
+		EState         *estate = CreateExecutorState();
+		ExprContext    *econtext = GetPerTupleExprContext(estate);
+		TupleTableSlot *slot = MakeSingleTupleTableSlot(RelationGetDescr(relation));
+		Datum	   	    old_values[INDEX_MAX_KEYS];
+		bool		    old_isnull[INDEX_MAX_KEYS];
+		Datum	   	    new_values[INDEX_MAX_KEYS];
+		bool		    new_isnull[INDEX_MAX_KEYS];
+
+		econtext->ecxt_scantuple = slot;
+
+		foreach(l, indexoidlist)
+		{
+			Oid		    indexOid = lfirst_oid(l);
+			Relation    indexDesc = index_open(indexOid, AccessShareLock);
+			IndexInfo  *indexInfo = BuildIndexInfo(indexDesc);
+			int         i;
+
+			if (indexInfo->ii_Expressions && indexInfo->ii_Surjective)
+			{
+				ResetExprContext(econtext);
+				ExecStoreTuple(oldtup, slot, InvalidBuffer, false);
+				FormIndexDatum(indexInfo,
+							   slot,
+							   estate,
+							   old_values,
+							   old_isnull);
+
+				ExecStoreTuple(newtup, slot, InvalidBuffer, false);
+				FormIndexDatum(indexInfo,
+							   slot,
+							   estate,
+							   new_values,
+							   new_isnull);
+
+				for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
+				{
+					if (old_isnull[i] != new_isnull[i])
+					{
+						hot_result = false;
+						break;
+					}
+					else if (!old_isnull[i])
+					{
+						Form_pg_attribute att = RelationGetDescr(indexDesc)->attrs[i];
+						if (!datumIsEqual(old_values[i], new_values[i], att->attbyval, att->attlen))
+						{
+							hot_result = false;
+							break;
+						}
+
+					}
+				}
+			}
+			index_close(indexDesc, AccessShareLock);
+		}
+		ExecDropSingleTupleTableSlot(slot);
+		FreeExecutorState(estate);
+	}
+
+	*satisfies_hot = hot_result;
+	*satisfies_key = key_result;
+	*satisfies_id = id_result;
+
 	return modified;
 }
 
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 2328b92..91217fa 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -26,6 +26,7 @@
 #include "access/amapi.h"
 #include "access/multixact.h"
 #include "access/relscan.h"
+#include "access/reloptions.h"
 #include "access/sysattr.h"
 #include "access/transam.h"
 #include "access/visibilitymap.h"
@@ -86,6 +87,13 @@ typedef struct
 				tups_inserted;
 } v_i_state;
 
+/* options common to all indexes */
+typedef struct
+{
+	int32		vl_len_;
+	bool        surjective;
+} index_options;
+
 /* non-export function prototypes */
 static bool relationHasPrimaryKey(Relation rel);
 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
@@ -1676,6 +1684,40 @@ BuildIndexInfo(Relation index)
 		ii->ii_ExclusionStrats = NULL;
 	}
 
+	if (ii->ii_Expressions)
+	{
+		HeapTuple       tuple;
+		Datum           reloptions;
+		bool            isnull;
+
+		ii->ii_Surjective = true; /* by default functional index is considered as surjective */
+
+		tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(RelationGetRelid(index)));
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "cache lookup failed for relation %u", RelationGetRelid(index));
+
+		reloptions = SysCacheGetAttr(RELOID, tuple,
+									 Anum_pg_class_reloptions, &isnull);
+		if (!isnull)
+		{
+			static const relopt_parse_elt tab[] = {
+				{"surjective", RELOPT_TYPE_BOOL, offsetof(index_options, surjective)}
+			};
+			int                       numoptions;
+			relopt_value *options = parseRelOptions(reloptions, false,
+													RELOPT_KIND_INDEX,
+													&numoptions);
+			if (numoptions != 0)
+			{
+				index_options optstruct;
+				fillRelOptions((void *)&optstruct, sizeof(bool), options, numoptions, false, tab, lengthof(tab));
+				ii->ii_Surjective = optstruct.surjective;
+				pfree(options);
+			}
+		}
+		ReleaseSysCache(tuple);
+	}
+
 	/* other info */
 	ii->ii_Unique = indexStruct->indisunique;
 	ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index c2e8361..d3b75f6 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -4854,6 +4854,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
 	List	   *newindexoidlist;
 	Oid			relpkindex;
 	Oid			relreplindex;
+	bool        surjective = false;
 	ListCell   *l;
 	MemoryContext oldcxt;
 
@@ -4963,9 +4964,15 @@ restart:
 			}
 		}
 
-		/* Collect all attributes used in expressions, too */
-		pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
-
+		if (indexInfo->ii_Expressions && indexInfo->ii_Surjective)
+		{
+			surjective = true;
+		}
+		else
+		{
+			/* Collect all attributes used in expressions, too */
+			pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
+		}
 		/* Collect all attributes in the index predicate, too */
 		pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
 
@@ -5010,6 +5017,8 @@ restart:
 	bms_free(relation->rd_idattr);
 	relation->rd_idattr = NULL;
 
+	relation->rd_surjective = surjective;
+
 	/*
 	 * Now save copies of the bitmaps in the relcache entry.  We intentionally
 	 * set rd_indexattr last, because that's the one that signals validity of
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 2abd087..f404888 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -1647,11 +1647,11 @@ psql_completion(const char *text, int start, int end)
 		COMPLETE_WITH_CONST("(");
 	/* ALTER INDEX <foo> SET|RESET ( */
 	else if (Matches5("ALTER", "INDEX", MatchAny, "RESET", "("))
-		COMPLETE_WITH_LIST3("fillfactor", "fastupdate",
-							"gin_pending_list_limit");
+		COMPLETE_WITH_LIST4("fillfactor", "fastupdate",
+							"gin_pending_list_limit", "surjective");
 	else if (Matches5("ALTER", "INDEX", MatchAny, "SET", "("))
-		COMPLETE_WITH_LIST3("fillfactor =", "fastupdate =",
-							"gin_pending_list_limit =");
+		COMPLETE_WITH_LIST4("fillfactor =", "fastupdate =",
+							"gin_pending_list_limit =", "surjective = ");
 
 	/* ALTER LANGUAGE <name> */
 	else if (Matches3("ALTER", "LANGUAGE", MatchAny))
diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h
index 91b2cd7..1fba0f0 100644
--- a/src/include/access/reloptions.h
+++ b/src/include/access/reloptions.h
@@ -51,6 +51,7 @@ typedef enum relopt_kind
 	RELOPT_KIND_PARTITIONED = (1 << 11),
 	/* if you add a new kind, make sure you update "last_default" too */
 	RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_PARTITIONED,
+	RELOPT_KIND_INDEX = RELOPT_KIND_BTREE|RELOPT_KIND_HASH|RELOPT_KIND_GIN|RELOPT_KIND_SPGIST,
 	/* some compilers treat enums as signed ints, so we can't use 1 << 31 */
 	RELOPT_KIND_MAX = (1 << 30)
 } relopt_kind;
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index d33392f..f67c9f0 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -147,6 +147,7 @@ typedef struct IndexInfo
 	bool		ii_ReadyForInserts;
 	bool		ii_Concurrent;
 	bool		ii_BrokenHotChain;
+	bool        ii_Surjective;
 	void	   *ii_AmCache;
 	MemoryContext ii_Context;
 } IndexInfo;
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 8476896..147ba89 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -125,6 +125,8 @@ typedef struct RelationData
 	List	   *rd_fkeylist;	/* list of ForeignKeyCacheInfo (see below) */
 	bool		rd_fkeyvalid;	/* true if list has been computed */
 
+	bool        rd_surjective;   /* relation contains functional index with surjective function */
+
 	MemoryContext rd_partkeycxt;	/* private memory cxt for the below */
 	struct PartitionKeyData *rd_partkey;		/* partition key, or NULL */
 	MemoryContext rd_pdcxt;		/* private context for partdesc */
diff --git a/src/test/regress/expected/func_index.out b/src/test/regress/expected/func_index.out
new file mode 100644
index 0000000..c57a46a
--- /dev/null
+++ b/src/test/regress/expected/func_index.out
@@ -0,0 +1,29 @@
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name')) with (surjective=false);
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+set client_min_messages=notice;
+drop table keyvalue;
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name')) with (surjective=true);
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+DEBUG:  Use hot update
+update keyvalue set info='{"name": "smith", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some more data"}' where id=1;
+DEBUG:  Use hot update
+set client_min_messages=notice;
+drop table keyvalue;
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name'));
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+DEBUG:  Use hot update
+update keyvalue set info='{"name": "smith", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some more data"}' where id=1;
+DEBUG:  Use hot update
+set client_min_messages=notice;
+drop table keyvalue;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 1f8f098..06fd9aa 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -79,7 +79,7 @@ ignore: random
 # ----------
 # Another group of parallel tests
 # ----------
-test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update namespace prepared_xacts delete
+test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index func_index update namespace prepared_xacts delete
 
 # ----------
 # Another group of parallel tests
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 04206c3..4f8b460 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -98,6 +98,7 @@ test: portals
 test: arrays
 test: btree_index
 test: hash_index
+test: func_index
 test: update
 test: delete
 test: namespace
diff --git a/src/test/regress/sql/func_index.sql b/src/test/regress/sql/func_index.sql
new file mode 100644
index 0000000..4923382
--- /dev/null
+++ b/src/test/regress/sql/func_index.sql
@@ -0,0 +1,29 @@
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name')) with (surjective=false);
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+set client_min_messages=notice;
+drop table keyvalue;
+
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name')) with (surjective=true);
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some more data"}' where id=1;
+set client_min_messages=notice;
+drop table keyvalue;
+
+create table keyvalue(id integer primary key, info jsonb);
+create index nameindex on keyvalue((info->>'name'));
+set client_min_messages=debug1;
+insert into keyvalue values (1, '{"name": "john", "data": "some data"}');
+update keyvalue set info='{"name": "john", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some other data"}' where id=1;
+update keyvalue set info='{"name": "smith", "data": "some more data"}' where id=1;
+set client_min_messages=notice;
+drop table keyvalue;
+
+
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to