While working on 1aebfbea83c, I noticed that the new multivariate MCV stats feature suffers from the same problem, and also the original problems that were fixed in e2d4ef8de8 and earlier --- namely that a user can see values in the MCV lists that they shouldn't see (values from tables that they don't have privileges on).
I think there are 2 separate issues here: 1). The table pg_statistic_ext is accessible to anyone, so any user can see the MCV lists of any table. I think we should give this the same treatment as pg_statistic, and hide it behind a security barrier view, revoking public access from the table. 2). The multivariate MCV stats planner code can be made to invoke user-defined operators, so a user can create a leaky operator and use it to reveal data values from the MCV lists even if they have no permissions on the table. Attached is a draft patch to fix (2), which hooks into statext_is_compatible_clause(). I haven't thought much about (1). There are some questions about what exactly the view should look like. Probably it should translate table oids to names, like pg_stats does, but should it also translate column indexes to names? That could get fiddly. Should it unpack MCV items? I'll raise this as an open item for PG12. Regards, Dean
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c new file mode 100644 index ac0ae52..14a7041 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -23,6 +23,7 @@ #include "catalog/indexing.h" #include "catalog/pg_collation.h" #include "catalog/pg_statistic_ext.h" +#include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/optimizer.h" @@ -753,7 +754,8 @@ choose_best_statistics(List *stats, Bitm * attribute numbers from all compatible clauses (recursively). */ static bool -statext_is_compatible_clause_internal(Node *clause, Index relid, Bitmapset **attnums) +statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, + Index relid, Bitmapset **attnums) { /* Look inside any binary-compatible relabeling (as in examine_variable) */ if (IsA(clause, RelabelType)) @@ -784,6 +786,7 @@ statext_is_compatible_clause_internal(No /* (Var op Const) or (Const op Var) */ if (is_opclause(clause)) { + RangeTblEntry *rte = root->simple_rte_array[relid]; OpExpr *expr = (OpExpr *) clause; Var *var; bool varonleft = true; @@ -826,9 +829,24 @@ statext_is_compatible_clause_internal(No return false; } + /* + * If there are any securityQuals on the RTE from security barrier + * views or RLS policies, then the user may not have access to all the + * table's data, and we must check that the operator is leak-proof. + * + * If the operator is leaky, then we must ignore this clause for the + * purposes of estimating with MCV lists, otherwise the operator might + * reveal values from the MCV list that the user doesn't have + * permission to see. + */ + if (rte->securityQuals != NIL && + !get_func_leakproof(get_opcode(expr->opno))) + return false; + var = (varonleft) ? linitial(expr->args) : lsecond(expr->args); - return statext_is_compatible_clause_internal((Node *) var, relid, attnums); + return statext_is_compatible_clause_internal(root, (Node *) var, + relid, attnums); } /* AND/OR/NOT clause */ @@ -859,7 +877,8 @@ statext_is_compatible_clause_internal(No * Had we found incompatible clause in the arguments, treat the * whole clause as incompatible. */ - if (!statext_is_compatible_clause_internal((Node *) lfirst(lc), + if (!statext_is_compatible_clause_internal(root, + (Node *) lfirst(lc), relid, attnums)) return false; } @@ -879,7 +898,8 @@ statext_is_compatible_clause_internal(No if (!IsA(nt->arg, Var)) return false; - return statext_is_compatible_clause_internal((Node *) (nt->arg), relid, attnums); + return statext_is_compatible_clause_internal(root, (Node *) (nt->arg), + relid, attnums); } return false; @@ -902,9 +922,12 @@ statext_is_compatible_clause_internal(No * complex cases, for example (Var op Var). */ static bool -statext_is_compatible_clause(Node *clause, Index relid, Bitmapset **attnums) +statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, + Bitmapset **attnums) { + RangeTblEntry *rte = root->simple_rte_array[relid]; RestrictInfo *rinfo = (RestrictInfo *) clause; + Oid userid; if (!IsA(rinfo, RestrictInfo)) return false; @@ -917,8 +940,43 @@ statext_is_compatible_clause(Node *claus if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) return false; - return statext_is_compatible_clause_internal((Node *) rinfo->clause, - relid, attnums); + /* Check the clause and determine what attributes it references. */ + if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause, + relid, attnums)) + return false; + + /* + * Check that the user has permission to read all these attributes. Use + * checkAsUser if it's set, in case we're accessing the table via a view. + */ + userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); + + if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK) + { + /* Don't have table privilege, must check individual columns */ + if (bms_is_member(InvalidAttrNumber, *attnums)) + { + /* Have a whole-row reference, must have access to all columns */ + if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT, + ACLMASK_ALL) != ACLCHECK_OK) + return false; + } + else + { + /* Check the columns referenced by the clause */ + int attnum = -1; + + while ((attnum = bms_next_member(*attnums, attnum)) >= 0) + { + if (pg_attribute_aclcheck(rte->relid, attnum, userid, + ACL_SELECT) != ACLCHECK_OK) + return false; + } + } + } + + /* If we reach here, the clause is OK */ + return true; } /* @@ -1020,7 +1078,7 @@ statext_mcv_clauselist_selectivity(Plann Bitmapset *attnums = NULL; if (!bms_is_member(listidx, *estimatedclauses) && - statext_is_compatible_clause(clause, rel->relid, &attnums)) + statext_is_compatible_clause(root, clause, rel->relid, &attnums)) { list_attnums[listidx] = attnums; clauses_attnums = bms_add_members(clauses_attnums, attnums); diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out new file mode 100644 index 6dfca7a..aea5ada --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -684,3 +684,63 @@ SELECT * FROM check_estimated_rows('SELE 1 | 0 (1 row) +-- Permission tests. Users should not be able to see specific data values in +-- the extended statistics, if they lack permission to see those values in +-- the underlying table. +-- +-- Currently this is only relevant for MCV stats. +CREATE TABLE priv_test_tbl ( + a int, + b int +); +INSERT INTO priv_test_tbl + SELECT mod(i,5), mod(i,10) FROM generate_series(1,100) s(i); +CREATE STATISTICS priv_test_stats (mcv) ON a, b + FROM priv_test_tbl; +ANALYZE priv_test_tbl; +-- User with no access +CREATE USER regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM priv_test_tbl; -- Permission denied +ERROR: permission denied for table priv_test_tbl +-- Attempt to gain access using a leaky operator +CREATE FUNCTION op_leak(int, int) RETURNS bool + AS 'BEGIN RAISE NOTICE ''op_leak => %, %'', $1, $2; RETURN $1 < $2; END' + LANGUAGE plpgsql; +CREATE OPERATOR <<< (procedure = op_leak, leftarg = int, rightarg = int, + restrict = scalarltsel); +SELECT * FROM priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied +ERROR: permission denied for table priv_test_tbl +DELETE FROM priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied +ERROR: permission denied for table priv_test_tbl +-- Grant access via a security barrier view, but hide all data +RESET SESSION AUTHORIZATION; +CREATE VIEW priv_test_view WITH (security_barrier=true) + AS SELECT * FROM priv_test_tbl WHERE false; +GRANT SELECT, DELETE ON priv_test_view TO regress_stats_user1; +-- Should now have access via the view, but see nothing and leak nothing +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak + a | b +---+--- +(0 rows) + +DELETE FROM priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak +-- Grant table access, but hide all data with RLS +RESET SESSION AUTHORIZATION; +ALTER TABLE priv_test_tbl ENABLE ROW LEVEL SECURITY; +GRANT SELECT, DELETE ON priv_test_tbl TO regress_stats_user1; +-- Should now have direct table access, but see nothing and leak nothing +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak + a | b +---+--- +(0 rows) + +DELETE FROM priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak +-- Tidy up +DROP OPERATOR <<< (int, int); +DROP FUNCTION op_leak(int, int); +RESET SESSION AUTHORIZATION; +DROP VIEW priv_test_view; +DROP TABLE priv_test_tbl; diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql new file mode 100644 index c6a5776..2ac1d27 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -434,3 +434,63 @@ SELECT * FROM check_estimated_rows('SELE SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c'); + +-- Permission tests. Users should not be able to see specific data values in +-- the extended statistics, if they lack permission to see those values in +-- the underlying table. +-- +-- Currently this is only relevant for MCV stats. +CREATE TABLE priv_test_tbl ( + a int, + b int +); + +INSERT INTO priv_test_tbl + SELECT mod(i,5), mod(i,10) FROM generate_series(1,100) s(i); + +CREATE STATISTICS priv_test_stats (mcv) ON a, b + FROM priv_test_tbl; + +ANALYZE priv_test_tbl; + +-- User with no access +CREATE USER regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM priv_test_tbl; -- Permission denied + +-- Attempt to gain access using a leaky operator +CREATE FUNCTION op_leak(int, int) RETURNS bool + AS 'BEGIN RAISE NOTICE ''op_leak => %, %'', $1, $2; RETURN $1 < $2; END' + LANGUAGE plpgsql; +CREATE OPERATOR <<< (procedure = op_leak, leftarg = int, rightarg = int, + restrict = scalarltsel); +SELECT * FROM priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied +DELETE FROM priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Permission denied + +-- Grant access via a security barrier view, but hide all data +RESET SESSION AUTHORIZATION; +CREATE VIEW priv_test_view WITH (security_barrier=true) + AS SELECT * FROM priv_test_tbl WHERE false; +GRANT SELECT, DELETE ON priv_test_view TO regress_stats_user1; + +-- Should now have access via the view, but see nothing and leak nothing +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak +DELETE FROM priv_test_view WHERE a <<< 0 AND b <<< 0; -- Should not leak + +-- Grant table access, but hide all data with RLS +RESET SESSION AUTHORIZATION; +ALTER TABLE priv_test_tbl ENABLE ROW LEVEL SECURITY; +GRANT SELECT, DELETE ON priv_test_tbl TO regress_stats_user1; + +-- Should now have direct table access, but see nothing and leak nothing +SET SESSION AUTHORIZATION regress_stats_user1; +SELECT * FROM priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak +DELETE FROM priv_test_tbl WHERE a <<< 0 AND b <<< 0; -- Should not leak + +-- Tidy up +DROP OPERATOR <<< (int, int); +DROP FUNCTION op_leak(int, int); +RESET SESSION AUTHORIZATION; +DROP VIEW priv_test_view; +DROP TABLE priv_test_tbl;