Robert, Thanks for your reviewing.

> For me, the line you removed from dml.out causes the regression tests to fail.
>
Fixed. Why did I removed this line??

> I don't understand what this is going for:
>
> +       /*
> +        * To boost up trusted procedure checks on db_procedure object
> +        * class, we also confirm the decision when user calls a procedure
> +        * labeled as 'tcontext'.
> +        */
>
> Can you explain?
>
Yes. It also caches an expected security label when a client being
labeled as "scontext" tries to execute a procedure being labeled as
"tcontext", to reduce number of system call invocations on fmgr_hook
and needs_fmgr_hook.
If the expected security label is not same with "scontext", it means
the procedure performs as a trusted procedure that switches security
label of the client during its execution; like a security invoker
function.
A pair of security labels are the only factor to determine whether the
procedure is a trusted-procedure, or not. Thus, it is suitable to
cache in userspace avc.

As an aside, the reason why we don't cache the default security label
being assigned on newly created named objects (such as tables, ...) is
that selinux allows to set up exceptional default security label on a
particular name, so it does not suitable for avc structure.
(I'm waiting for getting included this interface into libselinux.)

> sepgsql_avc_check_perms_label has a formatting error on the line that
> says "result = false".  It's not indented correctly.
>
OK, I fixed it.

> Several functions do this: sepgsql_avc_check_valid(); do { ... } while
> (!sepgsql_avc_check_valid);  I don't understand why we need a loop
> there.
>
It enables to prevent inconsistent access control decision from
concurrent security policy reloading.
I want the following steps being executed in atomic.
 1) Lookup object class number in kernel-side
 2) Lookup permission bits in kernel-side
 3) Ask kernel-side its access control decision.

The selinux_status_update returns 1, if any status of selinux in
kernel side (that requires to flush userspace caches) had been changed
since the last invocation.
In this case, we retry whole of the process from the beginning to
ensure whole of access control decision being made by either old or
new policy.
Thus, I enclosed these blocks by do {...} while() loop.

> The comment for sepgql_avc_check_perms_label uses the word "elsewhere"
> when it really means "otherwise".
>
OK, I fixed it.

> Changing the calling sequence of sepgsql_get_label() would perhaps be
> better separated out into its own patch.
>
OK, I reverted it.

Thanks,
-- 
KaiGai Kohei <kai...@kaigai.gr.jp>
 configure.in               |    4 +-
 contrib/sepgsql/Makefile   |    2 +-
 contrib/sepgsql/dml.c      |   59 +++---
 contrib/sepgsql/hooks.c    |   64 +++---
 contrib/sepgsql/proc.c     |   68 ++-----
 contrib/sepgsql/relation.c |   69 +++----
 contrib/sepgsql/schema.c   |   39 ++--
 contrib/sepgsql/selinux.c  |    2 +-
 contrib/sepgsql/sepgsql.h  |   18 ++-
 contrib/sepgsql/uavc.c     |  511 ++++++++++++++++++++++++++++++++++++++++++++
 doc/src/sgml/sepgsql.sgml  |   12 +-
 11 files changed, 649 insertions(+), 199 deletions(-)

diff --git a/configure.in b/configure.in
index a844afc..b444358 100644
--- a/configure.in
+++ b/configure.in
@@ -964,8 +964,8 @@ fi
 
 # for contrib/sepgsql
 if test "$with_selinux" = yes; then
-  AC_CHECK_LIB(selinux, selinux_sepgsql_context_path, [],
-               [AC_MSG_ERROR([library 'libselinux', version 2.0.93 or newer, is required for SELinux support])])
+  AC_CHECK_LIB(selinux, selinux_status_open, [],
+               [AC_MSG_ERROR([library 'libselinux', version 2.0.99 or newer, is required for SELinux support])])
 fi
 
 # for contrib/uuid-ossp
diff --git a/contrib/sepgsql/Makefile b/contrib/sepgsql/Makefile
index 1978ccf..e273d8f 100644
--- a/contrib/sepgsql/Makefile
+++ b/contrib/sepgsql/Makefile
@@ -1,7 +1,7 @@
 # contrib/sepgsql/Makefile
 
 MODULE_big = sepgsql
-OBJS = hooks.o selinux.o label.o dml.o \
+OBJS = hooks.o selinux.o uavc.o label.o dml.o \
 	schema.o relation.o proc.o
 DATA_built = sepgsql.sql
 REGRESS = label dml misc
diff --git a/contrib/sepgsql/dml.c b/contrib/sepgsql/dml.c
index 22666b7..3199337 100644
--- a/contrib/sepgsql/dml.c
+++ b/contrib/sepgsql/dml.c
@@ -150,12 +150,11 @@ check_relation_privileges(Oid relOid,
 						  uint32 required,
 						  bool abort)
 {
-	char		relkind = get_rel_relkind(relOid);
-	char	   *scontext = sepgsql_get_client_label();
-	char	   *tcontext;
+	ObjectAddress	object;
 	char	   *audit_name;
 	Bitmapset  *columns;
 	int			index;
+	char		relkind = get_rel_relkind(relOid);
 	bool		result = true;
 
 	/*
@@ -184,45 +183,43 @@ check_relation_privileges(Oid relOid,
 	/*
 	 * Check permissions on the relation
 	 */
-	tcontext = sepgsql_get_label(RelationRelationId, relOid, 0);
-	audit_name = getObjectDescriptionOids(RelationRelationId, relOid);
+	object.classId = RelationRelationId;
+	object.objectId = relOid;
+	object.objectSubId = 0;
+	audit_name = getObjectDescription(&object);
 	switch (relkind)
 	{
 		case RELKIND_RELATION:
-			result = sepgsql_check_perms(scontext,
-										 tcontext,
-										 SEPG_CLASS_DB_TABLE,
-										 required,
-										 audit_name,
-										 abort);
+			result = sepgsql_avc_check_perms(&object,
+											 SEPG_CLASS_DB_TABLE,
+											 required,
+											 audit_name,
+											 abort);
 			break;
 
 		case RELKIND_SEQUENCE:
 			Assert((required & ~SEPG_DB_TABLE__SELECT) == 0);
 
 			if (required & SEPG_DB_TABLE__SELECT)
-				result = sepgsql_check_perms(scontext,
-											 tcontext,
-											 SEPG_CLASS_DB_SEQUENCE,
-											 SEPG_DB_SEQUENCE__GET_VALUE,
-											 audit_name,
-											 abort);
+				result = sepgsql_avc_check_perms(&object,
+												 SEPG_CLASS_DB_SEQUENCE,
+												 SEPG_DB_SEQUENCE__GET_VALUE,
+												 audit_name,
+												 abort);
 			break;
 
 		case RELKIND_VIEW:
-			result = sepgsql_check_perms(scontext,
-										 tcontext,
-										 SEPG_CLASS_DB_VIEW,
-										 SEPG_DB_VIEW__EXPAND,
-										 audit_name,
-										 abort);
+			result = sepgsql_avc_check_perms(&object,
+											 SEPG_CLASS_DB_VIEW,
+											 SEPG_DB_VIEW__EXPAND,
+											 audit_name,
+											 abort);
 			break;
 
 		default:
 			/* nothing to be checked */
 			break;
 	}
-	pfree(tcontext);
 	pfree(audit_name);
 
 	/*
@@ -242,7 +239,6 @@ check_relation_privileges(Oid relOid,
 	{
 		AttrNumber	attnum;
 		uint32		column_perms = 0;
-		ObjectAddress object;
 
 		if (bms_is_member(index, selected))
 			column_perms |= SEPG_DB_COLUMN__SELECT;
@@ -258,20 +254,17 @@ check_relation_privileges(Oid relOid,
 
 		/* obtain column's permission */
 		attnum = index + FirstLowInvalidHeapAttributeNumber;
-		tcontext = sepgsql_get_label(RelationRelationId, relOid, attnum);
 
 		object.classId = RelationRelationId;
 		object.objectId = relOid;
 		object.objectSubId = attnum;
 		audit_name = getObjectDescription(&object);
 
-		result = sepgsql_check_perms(scontext,
-									 tcontext,
-									 SEPG_CLASS_DB_COLUMN,
-									 column_perms,
-									 audit_name,
-									 abort);
-		pfree(tcontext);
+		result = sepgsql_avc_check_perms(&object,
+										 SEPG_CLASS_DB_COLUMN,
+										 column_perms,
+										 audit_name,
+										 abort);
 		pfree(audit_name);
 
 		if (!result)
diff --git a/contrib/sepgsql/hooks.c b/contrib/sepgsql/hooks.c
index 7797ccb..ca6ce99 100644
--- a/contrib/sepgsql/hooks.c
+++ b/contrib/sepgsql/hooks.c
@@ -184,9 +184,7 @@ sepgsql_exec_check_perms(List *rangeTabls, bool abort)
 static bool
 sepgsql_needs_fmgr_hook(Oid functionId)
 {
-	char	   *old_label;
-	char	   *new_label;
-	char	   *function_label;
+	ObjectAddress	object;
 
 	if (next_needs_fmgr_hook &&
 		(*next_needs_fmgr_hook) (functionId))
@@ -198,14 +196,8 @@ sepgsql_needs_fmgr_hook(Oid functionId)
 	 * functions as trusted-procedure, if the security policy has a rule that
 	 * switches security label of the client on execution.
 	 */
-	old_label = sepgsql_get_client_label();
-	new_label = sepgsql_proc_get_domtrans(functionId);
-	if (strcmp(old_label, new_label) != 0)
-	{
-		pfree(new_label);
+	if (sepgsql_avc_trusted_proc(functionId) != NULL)
 		return true;
-	}
-	pfree(new_label);
 
 	/*
 	 * Even if not a trusted-procedure, this function should not be inlined
@@ -213,17 +205,15 @@ sepgsql_needs_fmgr_hook(Oid functionId)
 	 * that it shall be actually failed later because of same reason with
 	 * ACL_EXECUTE.
 	 */
-	function_label = sepgsql_get_label(ProcedureRelationId, functionId, 0);
-	if (sepgsql_check_perms(sepgsql_get_client_label(),
-							function_label,
-							SEPG_CLASS_DB_PROCEDURE,
-							SEPG_DB_PROCEDURE__EXECUTE,
-							NULL, false) != true)
-	{
-		pfree(function_label);
+	object.classId = ProcedureRelationId;
+	object.objectId = functionId;
+	object.objectSubId = 0;
+	if (!sepgsql_avc_check_perms(&object,
+								 SEPG_CLASS_DB_PROCEDURE,
+								 SEPG_DB_PROCEDURE__EXECUTE,
+								 SEPGSQL_AVC_NOAUDIT, false))
 		return true;
-	}
-	pfree(function_label);
+
 	return false;
 }
 
@@ -251,33 +241,31 @@ sepgsql_fmgr_hook(FmgrHookEventType event,
 			if (!stack)
 			{
 				MemoryContext oldcxt;
-				const char *cur_label = sepgsql_get_client_label();
 
 				oldcxt = MemoryContextSwitchTo(flinfo->fn_mcxt);
 				stack = palloc(sizeof(*stack));
 				stack->old_label = NULL;
-				stack->new_label = sepgsql_proc_get_domtrans(flinfo->fn_oid);
+				stack->new_label = sepgsql_avc_trusted_proc(flinfo->fn_oid);
 				stack->next_private = 0;
 
 				MemoryContextSwitchTo(oldcxt);
 
-				if (strcmp(cur_label, stack->new_label) != 0)
-				{
-					/*
-					 * process:transition permission between old and new
-					 * label, when user tries to switch security label of the
-					 * client on execution of trusted procedure.
-					 */
-					sepgsql_check_perms(cur_label, stack->new_label,
-										SEPG_CLASS_PROCESS,
-										SEPG_PROCESS__TRANSITION,
-										NULL, true);
-				}
+				/*
+				 * process:transition permission between old and new label,
+				 * when user tries to switch security label of the client
+				 * on execution of trusted procedure.
+				 */
+				if (stack->new_label)
+					sepgsql_avc_check_perms_label(stack->new_label,
+												  SEPG_CLASS_PROCESS,
+												  SEPG_PROCESS__TRANSITION,
+												  NULL, true);
 
 				*private = PointerGetDatum(stack);
 			}
 			Assert(!stack->old_label);
-			stack->old_label = sepgsql_set_client_label(stack->new_label);
+			if (stack->new_label)
+				stack->old_label = sepgsql_set_client_label(stack->new_label);
 
 			if (next_fmgr_hook)
 				(*next_fmgr_hook) (event, flinfo, &stack->next_private);
@@ -290,7 +278,8 @@ sepgsql_fmgr_hook(FmgrHookEventType event,
 			if (next_fmgr_hook)
 				(*next_fmgr_hook) (event, flinfo, &stack->next_private);
 
-			sepgsql_set_client_label(stack->old_label);
+			if (stack->old_label)
+				sepgsql_set_client_label(stack->old_label);
 			stack->old_label = NULL;
 			break;
 
@@ -433,6 +422,9 @@ _PG_init(void)
 				 errmsg("SELinux: failed to get server security label: %m")));
 	sepgsql_set_client_label(context);
 
+	/* Initialize userspace access vector cache */
+	sepgsql_avc_init();
+
 	/* Security label provider hook */
 	register_label_provider(SEPGSQL_LABEL_TAG,
 							sepgsql_object_relabel);
diff --git a/contrib/sepgsql/proc.c b/contrib/sepgsql/proc.c
index 3b8bf23..9630d45 100644
--- a/contrib/sepgsql/proc.c
+++ b/contrib/sepgsql/proc.c
@@ -96,64 +96,30 @@ sepgsql_proc_post_create(Oid functionId)
 void
 sepgsql_proc_relabel(Oid functionId, const char *seclabel)
 {
-	char	   *scontext = sepgsql_get_client_label();
-	char	   *tcontext;
-	char	   *audit_name;
+	ObjectAddress	object;
+	char		   *audit_name;
 
-	audit_name = getObjectDescriptionOids(ProcedureRelationId, functionId);
+	object.classId = ProcedureRelationId;
+	object.objectId = functionId;
+	object.objectSubId = 0;
+	audit_name = getObjectDescription(&object);
 
 	/*
 	 * check db_procedure:{setattr relabelfrom} permission
 	 */
-	tcontext = sepgsql_get_label(ProcedureRelationId, functionId, 0);
-	sepgsql_check_perms(scontext,
-						tcontext,
-						SEPG_CLASS_DB_PROCEDURE,
-						SEPG_DB_PROCEDURE__SETATTR |
-						SEPG_DB_PROCEDURE__RELABELFROM,
-						audit_name,
-						true);
-	pfree(tcontext);
-
+	sepgsql_avc_check_perms(&object,
+							SEPG_CLASS_DB_PROCEDURE,
+							SEPG_DB_PROCEDURE__SETATTR |
+							SEPG_DB_PROCEDURE__RELABELFROM,
+							audit_name,
+							true);
 	/*
 	 * check db_procedure:{relabelto} permission
 	 */
-	sepgsql_check_perms(scontext,
-						seclabel,
-						SEPG_CLASS_DB_PROCEDURE,
-						SEPG_DB_PROCEDURE__RELABELTO,
-						audit_name,
-						true);
+	sepgsql_avc_check_perms_label(seclabel,
+								  SEPG_CLASS_DB_PROCEDURE,
+								  SEPG_DB_PROCEDURE__RELABELTO,
+								  audit_name,
+								  true);
 	pfree(audit_name);
 }
-
-/*
- * sepgsql_proc_get_domtrans
- *
- * It computes security label of the client that shall be applied when
- * the current client invokes the supplied function.
- * This computed label is either same or different from the current one.
- * If security policy informed the function is a trusted-procedure,
- * we need to switch security label of the client during execution of
- * the function.
- *
- * Also note that the translated label shall be allocated using palloc().
- * So, need to switch memory context, if you want to hold the string in
- * someone except for CurrentMemoryContext.
- */
-char *
-sepgsql_proc_get_domtrans(Oid functionId)
-{
-	char	   *scontext = sepgsql_get_client_label();
-	char	   *tcontext;
-	char	   *ncontext;
-
-	tcontext = sepgsql_get_label(ProcedureRelationId, functionId, 0);
-
-	ncontext = sepgsql_compute_create(scontext,
-									  tcontext,
-									  SEPG_CLASS_PROCESS);
-	pfree(tcontext);
-
-	return ncontext;
-}
diff --git a/contrib/sepgsql/relation.c b/contrib/sepgsql/relation.c
index 963cfdf..0767382 100644
--- a/contrib/sepgsql/relation.c
+++ b/contrib/sepgsql/relation.c
@@ -79,10 +79,8 @@ void
 sepgsql_attribute_relabel(Oid relOid, AttrNumber attnum,
 						  const char *seclabel)
 {
-	char	   *scontext = sepgsql_get_client_label();
-	char	   *tcontext;
-	char	   *audit_name;
 	ObjectAddress object;
+	char		 *audit_name;
 
 	if (get_rel_relkind(relOid) != RELKIND_RELATION)
 		ereport(ERROR,
@@ -97,26 +95,20 @@ sepgsql_attribute_relabel(Oid relOid, AttrNumber attnum,
 	/*
 	 * check db_column:{setattr relabelfrom} permission
 	 */
-	tcontext = sepgsql_get_label(RelationRelationId, relOid, attnum);
-	sepgsql_check_perms(scontext,
-						tcontext,
-						SEPG_CLASS_DB_COLUMN,
-						SEPG_DB_COLUMN__SETATTR |
-						SEPG_DB_COLUMN__RELABELFROM,
-						audit_name,
-						true);
-
+	sepgsql_avc_check_perms(&object,
+							SEPG_CLASS_DB_COLUMN,
+							SEPG_DB_COLUMN__SETATTR |
+							SEPG_DB_COLUMN__RELABELFROM,
+							audit_name,
+							true);
 	/*
 	 * check db_column:{relabelto} permission
 	 */
-	sepgsql_check_perms(scontext,
-						seclabel,
-						SEPG_CLASS_DB_COLUMN,
-						SEPG_DB_PROCEDURE__RELABELTO,
-						audit_name,
-						true);
-
-	pfree(tcontext);
+	sepgsql_avc_check_perms_label(seclabel,
+								  SEPG_CLASS_DB_COLUMN,
+								  SEPG_DB_PROCEDURE__RELABELTO,
+								  audit_name,
+								  true);
 	pfree(audit_name);
 }
 
@@ -227,8 +219,7 @@ out:
 void
 sepgsql_relation_relabel(Oid relOid, const char *seclabel)
 {
-	char	   *scontext = sepgsql_get_client_label();
-	char	   *tcontext;
+	ObjectAddress	object;
 	char	   *audit_name;
 	char		relkind;
 	uint16_t	tclass = 0;
@@ -246,31 +237,27 @@ sepgsql_relation_relabel(Oid relOid, const char *seclabel)
 				 errmsg("cannot set security labels on relations except "
 						"for tables, sequences or views")));
 
-	audit_name = getObjectDescriptionOids(RelationRelationId, relOid);
+	object.classId = RelationRelationId;
+	object.objectId = relOid;
+	object.objectSubId = 0;
+	audit_name = getObjectDescription(&object);
 
 	/*
 	 * check db_xxx:{setattr relabelfrom} permission
 	 */
-	tcontext = sepgsql_get_label(RelationRelationId, relOid, 0);
-
-	sepgsql_check_perms(scontext,
-						tcontext,
-						tclass,
-						SEPG_DB_TABLE__SETATTR |
-						SEPG_DB_TABLE__RELABELFROM,
-						audit_name,
-						true);
-
+	sepgsql_avc_check_perms(&object,
+							tclass,
+							SEPG_DB_TABLE__SETATTR |
+							SEPG_DB_TABLE__RELABELFROM,
+							audit_name,
+							true);
 	/*
 	 * check db_xxx:{relabelto} permission
 	 */
-	sepgsql_check_perms(scontext,
-						seclabel,
-						tclass,
-						SEPG_DB_TABLE__RELABELTO,
-						audit_name,
-						true);
-
-	pfree(tcontext);
+	sepgsql_avc_check_perms_label(seclabel,
+								  tclass,
+								  SEPG_DB_TABLE__RELABELTO,
+								  audit_name,
+								  true);
 	pfree(audit_name);
 }
diff --git a/contrib/sepgsql/schema.c b/contrib/sepgsql/schema.c
index 0de8997..aae68ef 100644
--- a/contrib/sepgsql/schema.c
+++ b/contrib/sepgsql/schema.c
@@ -65,35 +65,30 @@ sepgsql_schema_post_create(Oid namespaceId)
 void
 sepgsql_schema_relabel(Oid namespaceId, const char *seclabel)
 {
-	char	   *scontext = sepgsql_get_client_label();
-	char	   *tcontext;
-	char	   *audit_name;
+	ObjectAddress	object;
+	char		   *audit_name;
 
-	audit_name = getObjectDescriptionOids(NamespaceRelationId, namespaceId);
+	object.classId = NamespaceRelationId;
+	object.objectId = namespaceId;
+	object.objectSubId = 0;
+	audit_name = getObjectDescription(&object);
 
 	/*
 	 * check db_schema:{setattr relabelfrom} permission
 	 */
-	tcontext = sepgsql_get_label(NamespaceRelationId, namespaceId, 0);
-
-	sepgsql_check_perms(scontext,
-						tcontext,
-						SEPG_CLASS_DB_SCHEMA,
-						SEPG_DB_SCHEMA__SETATTR |
-						SEPG_DB_SCHEMA__RELABELFROM,
-						audit_name,
-						true);
-
+	sepgsql_avc_check_perms(&object,
+							SEPG_CLASS_DB_SCHEMA,
+							SEPG_DB_SCHEMA__SETATTR |
+							SEPG_DB_SCHEMA__RELABELFROM,
+							audit_name,
+							true);
 	/*
 	 * check db_schema:{relabelto} permission
 	 */
-	sepgsql_check_perms(scontext,
-						seclabel,
-						SEPG_CLASS_DB_SCHEMA,
-						SEPG_DB_SCHEMA__RELABELTO,
-						audit_name,
-						true);
-
-	pfree(tcontext);
+	sepgsql_avc_check_perms_label(seclabel,
+								  SEPG_CLASS_DB_SCHEMA,
+								  SEPG_DB_SCHEMA__RELABELTO,
+								  audit_name,
+								  true);
 	pfree(audit_name);
 }
diff --git a/contrib/sepgsql/selinux.c b/contrib/sepgsql/selinux.c
index 1f5a97e..d693d63 100644
--- a/contrib/sepgsql/selinux.c
+++ b/contrib/sepgsql/selinux.c
@@ -642,7 +642,7 @@ bool
 sepgsql_getenforce(void)
 {
 	if (sepgsql_mode == SEPGSQL_MODE_DEFAULT &&
-		security_getenforce() > 0)
+		selinux_status_getenforce() > 0)
 		return true;
 
 	return false;
diff --git a/contrib/sepgsql/sepgsql.h b/contrib/sepgsql/sepgsql.h
index 71688ab..35b500c 100644
--- a/contrib/sepgsql/sepgsql.h
+++ b/contrib/sepgsql/sepgsql.h
@@ -15,6 +15,7 @@
 #include "fmgr.h"
 
 #include <selinux/selinux.h>
+#include <selinux/avc.h>
 
 /*
  * SE-PostgreSQL Label Tag
@@ -245,6 +246,22 @@ extern bool sepgsql_check_perms(const char *scontext,
 					uint32 required,
 					const char *audit_name,
 					bool abort);
+/*
+ * uavc.c
+ */
+#define SEPGSQL_AVC_NOAUDIT			((void *)(-1))
+extern bool sepgsql_avc_check_perms_label(const char *tcontext,
+										  uint16 tclass,
+										  uint32 required,
+										  const char *audit_name,
+										  bool abort);
+extern bool sepgsql_avc_check_perms(const ObjectAddress *tobject,
+									uint16 tclass,
+									uint32 required,
+									const char *audit_name,
+									bool abort);
+extern char *sepgsql_avc_trusted_proc(Oid functionId);
+extern void sepgsql_avc_init(void);
 
 /*
  * label.c
@@ -286,6 +303,5 @@ extern void sepgsql_relation_relabel(Oid relOid, const char *seclabel);
  */
 extern void sepgsql_proc_post_create(Oid functionId);
 extern void sepgsql_proc_relabel(Oid functionId, const char *seclabel);
-extern char *sepgsql_proc_get_domtrans(Oid functionId);
 
 #endif   /* SEPGSQL_H */
diff --git a/contrib/sepgsql/uavc.c b/contrib/sepgsql/uavc.c
new file mode 100644
index 0000000..bcf0d4c
--- /dev/null
+++ b/contrib/sepgsql/uavc.c
@@ -0,0 +1,511 @@
+/* -------------------------------------------------------------------------
+ *
+ * contrib/sepgsql/uavc.c
+ *
+ * Implementation of userspace access vector cache; that enables to cache
+ * access control decisions recently used, and reduce number of kernel
+ * invocations to avoid unnecessary performance hit.
+ *
+ * Copyright (c) 2011, PostgreSQL Global Development Group
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "catalog/pg_proc.h"
+#include "commands/seclabel.h"
+#include "storage/ipc.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+
+#include "sepgsql.h"
+
+/*
+ * avc_cache
+ *
+ * It enables to cache access control decision (and behavior on execution of
+ * trusted procedure, db_procedure class only) for a particular pair of
+ * security labels and object class in userspace.
+ */
+typedef struct
+{
+	uint32		hash;		/* hash value of this cache entry */
+	char	   *scontext;	/* security context of the subject */
+	char	   *tcontext;	/* security context of the target */
+	uint16		tclass;		/* object class of the target */
+
+	uint32		allowed;	/* permissions to be allowed */
+	uint32		auditallow;	/* permissions to be audited on allowed */
+	uint32		auditdeny;	/* permissions to be audited on denied */
+
+	bool		permissive;	/* true, if permissive rule */
+	bool		hot_cache;	/* true, if recently referenced */
+	bool		tcontext_is_valid;
+							/* true, if tcontext is valid */
+	char	   *ncontext;	/* temporary scontext on execution of trusted
+							 * procedure, or NULL elsewhere */
+} avc_cache;
+
+/*
+ * Declaration of static variables
+ */
+#define AVC_NUM_SLOTS		512
+#define AVC_NUM_RECLAIM		16
+#define AVC_DEF_THRESHOLD	384
+
+static MemoryContext	avc_mem_cxt;
+static List	   *avc_slots[AVC_NUM_SLOTS];	/* avc's hash buckets */
+static int		avc_num_caches;	/* number of caches currently used */
+static int		avc_lru_hint;	/* index of the buckets to be reclaimed next */
+static int		avc_threshold;	/* threshold to launch cache-reclaiming  */
+static char	   *avc_unlabeled;	/* system 'unlabeled' label */
+
+/*
+ * Hash function
+ */
+static uint32
+sepgsql_avc_hash(const char *scontext, const char *tcontext, uint16 tclass)
+{
+	return hash_any((const unsigned char *)scontext, strlen(scontext))
+		^ hash_any((const unsigned char *)tcontext, strlen(tcontext))
+		^ tclass;
+}
+
+/*
+ * Reset all the avc caches
+ */
+static void
+sepgsql_avc_reset(void)
+{
+	MemoryContextReset(avc_mem_cxt);
+
+	memset(avc_slots, 0, sizeof(List *) * AVC_NUM_SLOTS);
+	avc_num_caches = 0;
+	avc_lru_hint = 0;
+	avc_unlabeled = NULL;
+}
+
+/*
+ * Reclaim caches recently unreferenced
+ */	
+static void
+sepgsql_avc_reclaim(void)
+{
+	ListCell   *cell;
+	ListCell   *next;
+	ListCell   *prev;
+	int			index;
+
+	while (avc_num_caches >= avc_threshold - AVC_NUM_RECLAIM)
+	{
+		index = avc_lru_hint;
+
+		prev = NULL;
+		for (cell = list_head(avc_slots[index]); cell; cell = next)
+		{
+			avc_cache  *cache = lfirst(cell);
+
+			next = lnext(cell);
+			if (!cache->hot_cache)
+			{
+				avc_slots[index]
+					= list_delete_cell(avc_slots[index], cell, prev);
+
+				pfree(cache->scontext);
+				pfree(cache->tcontext);
+				if (cache->ncontext)
+					pfree(cache->ncontext);
+				pfree(cache);
+
+				avc_num_caches--;
+			}
+			else
+			{
+				cache->hot_cache = false;
+				prev = cell;
+			}
+		}
+		avc_lru_hint = (avc_lru_hint + 1) % AVC_NUM_SLOTS;
+	}
+}
+
+/*
+ * sepgsql_avc_check_valid
+ *
+ * It checks whether the cached entries are still valid, or not.
+ * If security policy has been reloaded since last reference of access
+ * vector cache, we have to release all the entries, because they are
+ * not valid yet.
+ */
+static bool
+sepgsql_avc_check_valid(void)
+{
+	if (selinux_status_updated() > 0)
+	{
+		sepgsql_avc_reset();
+
+		return false;
+	}
+	return true;
+}
+
+/*
+ * sepgsql_avc_unlabeled
+ *
+ * It returns an alternative label to be applied when no label or invalid 
+ * label would be assigned on objects.
+ */
+static char *
+sepgsql_avc_unlabeled(void)
+{
+	if (!avc_unlabeled)
+	{
+		security_context_t	unlabeled;
+
+		if (security_get_initial_context_raw("unlabeled", &unlabeled) < 0)
+			ereport(ERROR,
+                    (errcode(ERRCODE_INTERNAL_ERROR),
+                     errmsg("SELinux: failed to get initial security label: %m")));
+		PG_TRY();
+		{
+			avc_unlabeled = MemoryContextStrdup(avc_mem_cxt, unlabeled);
+		}
+		PG_CATCH();
+		{
+			freecon(unlabeled);
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
+
+		freecon(unlabeled);
+	}
+	return avc_unlabeled;
+}
+
+/*
+ * sepgsql_avc_compute 
+ *
+ * A fallback path, when cache mishit. It asks SELinux its access control
+ * decision for the supplied pair of security context and object class.
+ */
+static avc_cache *
+sepgsql_avc_compute(const char *scontext, const char *tcontext, uint16 tclass)
+{
+	char		   *ucontext = NULL;
+	char		   *ncontext = NULL;
+	MemoryContext	oldctx;
+	avc_cache	   *cache;
+	uint32			hash;
+	int				index;
+	struct av_decision	avd;
+
+	hash = sepgsql_avc_hash(scontext, tcontext, tclass);
+	index = hash % AVC_NUM_SLOTS;
+
+	/*
+	 * Validation check of the supplied security context.
+	 * Because it always invoke system-call, frequent check should be avoided.
+	 * Unless security policy is reloaded, validation status shall be kept, so
+	 * we also cache whether the supplied security context was valid, or not.
+	 */
+	if (security_check_context_raw((security_context_t)tcontext) != 0)
+		ucontext = sepgsql_avc_unlabeled();
+
+	/*
+	 * Ask SELinux its access control decision
+	 */
+	if (!ucontext)
+		sepgsql_compute_avd(scontext, tcontext, tclass, &avd);
+	else
+		sepgsql_compute_avd(scontext, ucontext, tclass, &avd);
+
+	/*
+	 * To boost up trusted procedure checks on db_procedure object
+	 * class, we also confirm the decision when user calls a procedure
+	 * labeled as 'tcontext'.
+	 */
+	if (tclass == SEPG_CLASS_DB_PROCEDURE)
+	{
+		if (!ucontext)
+			ncontext = sepgsql_compute_create(scontext, tcontext,
+											  SEPG_CLASS_PROCESS);
+		else
+			ncontext = sepgsql_compute_create(scontext, ucontext,
+											  SEPG_CLASS_PROCESS);
+		if (strcmp(scontext, ncontext) == 0)
+		{
+			pfree(ncontext);
+			ncontext = NULL;
+		}
+	}
+
+	/*
+	 * Set up an avc_cache object
+	 */
+	oldctx = MemoryContextSwitchTo(avc_mem_cxt);
+
+	cache = palloc0(sizeof(avc_cache));
+
+	cache->hash	= hash;
+	cache->scontext = pstrdup(scontext);
+	cache->tcontext = pstrdup(tcontext);
+	cache->tclass = tclass;
+
+	cache->allowed = avd.allowed;
+	cache->auditallow = avd.auditallow;
+	cache->auditdeny = avd.auditdeny;
+	cache->hot_cache = true;
+	if (avd.flags & SELINUX_AVD_FLAGS_PERMISSIVE)
+		cache->permissive = true;
+	if (!ucontext)
+		cache->tcontext_is_valid = true;
+	if (ncontext)
+		cache->ncontext = pstrdup(ncontext);
+
+	avc_num_caches++;
+
+	if (avc_num_caches > avc_threshold)
+		sepgsql_avc_reclaim();
+
+	avc_slots[index] = lcons(cache, avc_slots[index]);
+
+	MemoryContextSwitchTo(oldctx);
+
+	return cache;
+}
+
+/*
+ * sepgsql_avc_lookup
+ *
+ * It lookups a cache entry that matches with the supplied object
+ * identifiers and object class. If not found, it tries to create
+ * a new cache entry.
+ */
+static avc_cache *
+sepgsql_avc_lookup(const char *scontext, const char *tcontext, uint16 tclass)
+{
+	avc_cache  *cache;
+	ListCell   *cell;
+	uint32		hash;
+	int			index;
+
+	hash = sepgsql_avc_hash(scontext, tcontext, tclass);
+	index = hash % AVC_NUM_SLOTS;
+
+	foreach (cell, avc_slots[index])
+	{
+		cache = lfirst(cell);
+
+		if (cache->hash == hash &&
+			cache->tclass == tclass &&
+			strcmp(cache->tcontext, tcontext) == 0 &&
+			strcmp(cache->scontext, scontext) == 0)
+		{
+			cache->hot_cache = true;
+			return cache;
+		}
+	}
+	/* not found, so insert a new cache */
+	return sepgsql_avc_compute(scontext, tcontext, tclass);
+}
+
+/*
+ * sepgsql_avc_check_perms(_label)
+ *
+ * It returns 'true', if the security policy suggested to allow the required
+ * permissions. Otherwise, it returns 'false' or raises an error according
+ * to the 'abort' argument.
+ * The 'tobject' and 'tclass' identify the target object being referenced,
+ * and 'required' is a bitmask of permissions (SEPG_*__*) defined for each
+ * object classes.
+ * The 'audit_name' is the object name (optional). If SEPGSQL_AVC_NOAUDIT
+ * was supplied, it means to skip all the audit messages.
+ */
+bool
+sepgsql_avc_check_perms_label(const char *tcontext,
+							  uint16 tclass, uint32 required,
+							  const char *audit_name, bool abort)
+{
+	char *scontext = sepgsql_get_client_label();
+	avc_cache  *cache;
+	uint32		denied;
+	uint32		audited;
+	bool		result;
+
+	sepgsql_avc_check_valid();
+	do {
+		result = true;
+
+		/*
+		 * If target object is unlabeled, we assume it has
+		 * system 'unlabeled' security context instead.
+		 */
+		if (tcontext)
+			cache = sepgsql_avc_lookup(scontext, tcontext, tclass);
+		else
+			cache = sepgsql_avc_lookup(scontext,
+									   sepgsql_avc_unlabeled(), tclass);
+
+		denied = required & ~cache->allowed;
+
+		/*
+		 * Compute permissions to be audited
+		 */
+		if (sepgsql_get_debug_audit())
+			audited = (denied ? (denied & ~0) : (required & ~0));
+		else
+			audited = denied ? (denied & cache->auditdeny)
+							 : (required & cache->auditallow);
+
+		if (denied)
+		{
+			/*
+			 * In permissive mode or permissive domain, violated permissions
+			 * shall be audited on the log files at once, and implicitly
+			 * allowed them to avoid flood of access denied logs, because
+			 * the purpose of permissive mode/domain is to collect violation
+			 * log to fix up security policy itself.
+			 */
+			if (!sepgsql_getenforce() || cache->permissive)
+				cache->allowed |= required;
+			else
+				result = false;
+		}
+	} while (!sepgsql_avc_check_valid());
+
+	/*
+	 * In the case when we have something auditable actions here,
+	 * sepgsql_audit_log shall be called with text representation of
+	 * security labels for both of subject and object.
+	 * It records this access violation, so DBA will be able to find
+	 * out unexpected security problems later.
+	 */
+	if (audited != 0 &&
+		audit_name != SEPGSQL_AVC_NOAUDIT &&
+		sepgsql_get_mode() != SEPGSQL_MODE_INTERNAL)
+	{
+		sepgsql_audit_log(!!denied,
+						  cache->scontext,
+						  cache->tcontext_is_valid ?
+						  cache->tcontext : sepgsql_avc_unlabeled(),
+						  cache->tclass,
+						  audited,
+						  audit_name);
+	}
+
+	if (abort && !result)
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 errmsg("SELinux: security policy violation")));
+
+	return result;
+}
+
+bool
+sepgsql_avc_check_perms(const ObjectAddress *tobject,
+						uint16 tclass, uint32 required,
+						const char *audit_name, bool abort)
+{
+	char   *tcontext = GetSecurityLabel(tobject, SEPGSQL_LABEL_TAG);
+	bool	rc;
+
+	rc = sepgsql_avc_check_perms_label(tcontext,
+									   tclass, required,
+									   audit_name, abort);
+	if (tcontext)
+		pfree(tcontext);
+
+	return rc;
+}
+
+/*
+ * sepgsql_avc_trusted_proc
+ *
+ * It returns a security label to be switched on execution of the supplied
+ * procedure, if it was configured as a trusted procedure. Otherwise, NULL
+ * shall be returned.
+ */
+char *
+sepgsql_avc_trusted_proc(Oid functionId)
+{
+	char		   *scontext = sepgsql_get_client_label();
+	char		   *tcontext;
+	ObjectAddress	tobject;
+	avc_cache	   *cache;
+
+	tobject.classId = ProcedureRelationId;
+	tobject.objectId = functionId;
+	tobject.objectSubId = 0;
+	tcontext = GetSecurityLabel(&tobject, SEPGSQL_LABEL_TAG);
+
+	sepgsql_avc_check_valid();
+	do {
+		if (tcontext)
+			cache = sepgsql_avc_lookup(scontext, tcontext,
+									   SEPG_CLASS_DB_PROCEDURE);
+		else
+			cache = sepgsql_avc_lookup(scontext, sepgsql_avc_unlabeled(),
+									   SEPG_CLASS_DB_PROCEDURE);
+	} while (!sepgsql_avc_check_valid());
+
+	return cache->ncontext;
+}
+
+/*
+ * sepgsql_avc_exit
+ *
+ * It clean up userspace avc stuff on process exit
+ */
+static void
+sepgsql_avc_exit(int code, Datum arg)
+{
+	selinux_status_close();
+}
+
+/*
+ * sepgsql_avc_init
+ *
+ * It shall be invoked at once from _PG_init routine to initialize
+ * userspace access vector cache stuff.
+ */
+void
+sepgsql_avc_init(void)
+{
+	int	rc;
+
+	/*
+	 * All the avc stuff shall be allocated on avc_mem_cxt
+	 */
+	avc_mem_cxt = AllocSetContextCreate(TopMemoryContext,
+										"userspace access vector cache",
+										ALLOCSET_DEFAULT_MINSIZE,
+										ALLOCSET_DEFAULT_INITSIZE,
+										ALLOCSET_DEFAULT_MAXSIZE);
+	memset(avc_slots, 0, sizeof(avc_slots));
+	avc_num_caches = 0;
+	avc_lru_hint = 0;
+	avc_threshold = AVC_DEF_THRESHOLD;
+
+	/*
+	 * SELinux allows to mmap(2) its kernel status page in read-only mode
+	 * to inform userspace applications its status updating (such as
+	 * policy reloading) without system-call invocations.
+	 * This feature is only supported in Linux-2.6.38 or later, however,
+	 * libselinux provides a fallback mode to know its status using
+	 * netlink sockets.
+	 */
+	rc = selinux_status_open(1);
+	if (rc < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INTERNAL_ERROR),
+				 errmsg("SELinux: could not open selinux status : %m")));
+	else if (rc > 0)
+		ereport(LOG,
+				(errmsg("SELinux: kernel status page uses fallback mode")));
+
+	/*
+	 * To close selinux status page on process exit
+	 */
+	on_proc_exit(sepgsql_avc_exit, 0);
+}
diff --git a/doc/src/sgml/sepgsql.sgml b/doc/src/sgml/sepgsql.sgml
index fc37988..0a02edb 100644
--- a/doc/src/sgml/sepgsql.sgml
+++ b/doc/src/sgml/sepgsql.sgml
@@ -64,7 +64,7 @@
     or higher with <productname>SELinux</productname> enabled.  It is not
     available on any other platform, and must be explicitly enabled using
     <literal>--with-selinux</>.  You will also need <productname>libselinux</>
-    2.0.93 or higher and <productname>selinux-policy</> 3.9.13 or higher
+    2.0.99 or higher and <productname>selinux-policy</> 3.9.13 or higher
     (some distributions may backport the necessary rules into older policy
     versions).
   </para>
@@ -474,16 +474,6 @@ postgres=# SELECT cid, cname, show_credit(cid) FROM customer;
 
   <variablelist>
    <varlistentry>
-    <term>Userspace access vector cache</term>
-    <listitem>
-     <para>
-      <productname>sepgsql</> does not yet support an access vector cache.
-      This would likely improve performance.
-     </para>
-    </listitem>
-   </varlistentry>
-
-   <varlistentry>
     <term>Data Definition Language (DDL) Permissions</term>
     <listitem>
      <para>
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to