This is an automated email from the ASF dual-hosted git repository. hahao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 69645c198766499e257eacd1cdf44f7856d093b8 Author: Andrew Wong <[email protected]> AuthorDate: Sat Jun 8 11:07:54 2019 -0700 sentry: avoid authorizing every table in ListTables Currently ListTables will call into Sentry for every table in Kudu's catalog, checking whether the user has metadata privileges on the table, and adding it to the ListTablesResponsePB if so. This is expensive, particularly when there are thousands of tables in Kudu. This patch updates the authorization behavior to check whether the user has METADATA privileges on the table's database for each table. If no such privilege exists for the database, each table within the database is checked. A benchmark is added to gauge the performance in various scenarios: Iterating through many databases: $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=300 --num_tables_per_db=1 --has-db-privileges=true sentry_authz_provider-test.cc:418] Time spent Listing tables: real 25.243s user 0.015s sys 0.001s Iterating through many databases and many tables. This is the worst case since listing a given table will require two lookups in Sentry -- one for the database and one for the table: $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=300 --num_tables_per_db=1 --has-db-privileges=false sentry_authz_provider-test.cc:418] Time spent Listing tables: real 47.543s user 0.040s sys 0.013s This above worst case is actually less performant than without this patch. A follow-up patch will make this more performant by caching privileges from requests at the database scope. Iterating through one database and no tables since the user has database-level privileges: $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=1 --num_tables_per_db=600 --has-db-privileges=true sentry_authz_provider-test.cc:418] Time spent Listing tables: real 0.271s user 0.000s sys 0.000s Iterating through one database and many tables: $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=1 --num_tables_per_db=600 --has-db-privileges=false sentry_authz_provider-test.cc:418] Time spent Listing tables: real 47.441s user 0.031s sys 0.013s Change-Id: I7c495c635fbd2a661709b6d7f04973b7864b5527 Reviewed-on: http://gerrit.cloudera.org:8080/13549 Reviewed-by: Adar Dembo <[email protected]> Reviewed-by: Hao Hao <[email protected]> Tested-by: Andrew Wong <[email protected]> --- src/kudu/master/sentry_authz_provider-test.cc | 83 +++++++++++++++++++++++++++ src/kudu/master/sentry_authz_provider.cc | 42 +++++++++++++- 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/src/kudu/master/sentry_authz_provider-test.cc b/src/kudu/master/sentry_authz_provider-test.cc index 21a8588..3a281df 100644 --- a/src/kudu/master/sentry_authz_provider-test.cc +++ b/src/kudu/master/sentry_authz_provider-test.cc @@ -71,6 +71,18 @@ DEFINE_int32(num_table_privileges, 100, "Number of table privileges to use in testing"); TAG_FLAG(num_table_privileges, hidden); +DEFINE_int32(num_databases, 10, + "Number of databases to use in testing"); +TAG_FLAG(num_databases, hidden); + +DEFINE_int32(num_tables_per_db, 10, + "Number of tables to use per database to use in testing"); +TAG_FLAG(num_tables_per_db, hidden); + +DEFINE_bool(has_db_privileges, true, + "Whether the user should have db-level privileges in testing"); +TAG_FLAG(has_db_privileges, hidden); + DECLARE_int32(sentry_service_recv_timeout_seconds); DECLARE_int32(sentry_service_send_timeout_seconds); DECLARE_uint32(sentry_privileges_cache_capacity_mb); @@ -193,6 +205,7 @@ TEST(SentryPrivilegesFetcherStaticTest, TestPrivilegesWellFormed) { class SentryAuthzProviderTest : public SentryTestBase { public: static const char* const kTestUser; + static const char* const kTrustedUser; static const char* const kUserGroup; static const char* const kRoleName; @@ -205,6 +218,7 @@ class SentryAuthzProviderTest : public SentryTestBase { // Configure the SentryAuthzProvider flags. FLAGS_sentry_privileges_cache_capacity_mb = CachingEnabled() ? 1 : 0; FLAGS_sentry_service_rpc_addresses = sentry_->address().ToString(); + FLAGS_trusted_user_acl = kTrustedUser; sentry_authz_provider_.reset(new SentryAuthzProvider(metric_entity_)); ASSERT_OK(sentry_authz_provider_->Start()); } @@ -292,6 +306,7 @@ class SentryAuthzProviderTest : public SentryTestBase { }; const char* const SentryAuthzProviderTest::kTestUser = "test-user"; +const char* const SentryAuthzProviderTest::kTrustedUser = "trusted-user"; const char* const SentryAuthzProviderTest::kUserGroup = "user"; const char* const SentryAuthzProviderTest::kRoleName = "developer"; @@ -372,6 +387,74 @@ TEST_F(SentryAuthzProviderTest, BroadAuthzScopeBenchmark) { ASSERT_TRUE(s.IsNotAuthorized()); } +// Benchmark to test the time it takes to evaluate privileges when listing +// tables. +TEST_F(SentryAuthzProviderTest, ListTablesBenchmark) { + ASSERT_OK(CreateRoleAndAddToGroups()); + unordered_set<string> tables; + for (int d = 0; d < FLAGS_num_databases; d++) { + const string db_name = Substitute("$0_$1", kDb, d); + // Regardless of whether the user has database-level privileges on the + // database, make sure there's at least one privilege for a database to + // keep the benchmark consistent when toggling this flag. + const string dummy_name = Substitute("$0_$1", "foo", d); + ASSERT_OK(AlterRoleGrantPrivilege( + GetDatabasePrivilege(FLAGS_has_db_privileges ? db_name : dummy_name, "METADATA"))); + for (int t = 0; t < FLAGS_num_tables_per_db; t++) { + const string table_name = Substitute("$0_$1", kTable, t); + const string table_ident = Substitute("$0.$1", db_name, table_name); + + KLOG_EVERY_N_SECS(INFO, 3) << "Granted privilege on table: " << table_ident; + ASSERT_OK(AlterRoleGrantPrivilege(GetTablePrivilege(db_name, table_name, "METADATA"))); + EmplaceOrDie(&tables, table_ident); + } + } + bool checked_table_names = false; + LOG_TIMING(INFO, "Listing tables") { + ASSERT_OK(sentry_authz_provider_->AuthorizeListTables( + kTestUser, &tables, &checked_table_names)); + } + ASSERT_TRUE(checked_table_names); + ASSERT_EQ(FLAGS_num_databases * FLAGS_num_tables_per_db, tables.size()); +} + +TEST_F(SentryAuthzProviderTest, TestListTables) { + ASSERT_OK(CreateRoleAndAddToGroups()); + const int kNumDbs = 2; + const int kNumTablesPerDb = 5; + const int kNumNonHiveTables = 3; + unordered_set<string> tables; + for (int d = 0; d < kNumDbs; d++) { + const string db_name = Substitute("$0_$1", kDb, d); + for (int t = 0; t < kNumTablesPerDb; t++) { + const string table_name = Substitute("$0_$1", kTable, t); + // To test the absence of privileges, only grant privileges on one table + // per database. + if (t == 0) { + ASSERT_OK(AlterRoleGrantPrivilege(GetTablePrivilege(db_name, table_name, "METADATA"))); + } + EmplaceOrDie(&tables, Substitute("$0.$1", db_name, table_name)); + } + } + // Add some tables that don't conform to Hive's naming convention. + for (int i = 0; i < kNumNonHiveTables; i++) { + EmplaceOrDie(&tables, Substitute("badname_$0!", i)); + } + bool checked_table_names = false; + // List tables as a trusted user. All tables, including non-Hive-conformant + // ones, should be visible. + ASSERT_OK(sentry_authz_provider_->AuthorizeListTables( + kTrustedUser, &tables, &checked_table_names)); + ASSERT_FALSE(checked_table_names); + ASSERT_EQ(kNumDbs * kNumTablesPerDb + kNumNonHiveTables, tables.size()); + + // Now try as a regular user. Only the tables with Hive-conformant names that + // the user has privileges on should be visible. + ASSERT_OK(sentry_authz_provider_->AuthorizeListTables(kTestUser, &tables, &checked_table_names)); + ASSERT_TRUE(checked_table_names); + ASSERT_EQ(kNumDbs, tables.size()); +} + class SentryAuthzProviderFilterPrivilegesTest : public SentryAuthzProviderTest { public: SentryAuthzProviderFilterPrivilegesTest() diff --git a/src/kudu/master/sentry_authz_provider.cc b/src/kudu/master/sentry_authz_provider.cc index feba6a2..80b8074 100644 --- a/src/kudu/master/sentry_authz_provider.cc +++ b/src/kudu/master/sentry_authz_provider.cc @@ -17,19 +17,23 @@ #include "kudu/master/sentry_authz_provider.h" +#include <unordered_map> #include <unordered_set> #include <utility> +#include <vector> #include <gflags/gflags_declare.h> #include <glog/logging.h> #include "kudu/common/common.pb.h" +#include "kudu/common/table_util.h" #include "kudu/gutil/map-util.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/master/sentry_privileges_fetcher.h" #include "kudu/security/token.pb.h" #include "kudu/sentry/sentry_action.h" #include "kudu/sentry/sentry_authorizable_scope.h" +#include "kudu/util/slice.h" #include "kudu/util/trace.h" DECLARE_string(sentry_service_rpc_addresses); @@ -39,7 +43,9 @@ using kudu::security::TablePrivilegePB; using kudu::sentry::SentryAction; using kudu::sentry::SentryAuthorizableScope; using std::string; +using std::unordered_map; using std::unordered_set; +using std::vector; using strings::Substitute; namespace kudu { @@ -194,11 +200,43 @@ Status SentryAuthzProvider::AuthorizeGetTableMetadata(const string& table_name, Status SentryAuthzProvider::AuthorizeListTables(const string& user, unordered_set<string>* table_names, bool* checked_table_names) { + if (IsTrustedUser(user)) { + *checked_table_names = false; + return Status::OK(); + } unordered_set<string> authorized_tables; + unordered_map<string, vector<string>> tables_by_db; for (auto table_name : *table_names) { - Status s = AuthorizeGetTableMetadata(table_name, user); + Slice db_slice; + Slice unused_table_slice; + Status s = ParseHiveTableIdentifier(table_name, &db_slice, &unused_table_slice); + if (!s.ok()) { + continue; + } + LookupOrInsert(&tables_by_db, db_slice.ToString(), {}).emplace_back(std::move(table_name)); + } + for (auto db_and_tables : tables_by_db) { + auto tables_in_db = db_and_tables.second; + DCHECK(!tables_in_db.empty()); + // Authorize database-level privileges first in case the user has + // database-level privileges. This would allow us to avoid authorizing each + // indiviudual table. + // Note: the exact table isn't particularly important, as long as we pass + // in a table within the database we're interested in. + const string& first_table_name_in_db = tables_in_db[0]; + Status s = Authorize(SentryAuthorizableScope::Scope::DATABASE, SentryAction::METADATA, + first_table_name_in_db, user); if (s.ok()) { - EmplaceOrDie(&authorized_tables, std::move(table_name)); + for (auto table_name : tables_in_db) { + EmplaceOrDie(&authorized_tables, std::move(table_name)); + } + } else { + for (auto table_name : tables_in_db) { + s = AuthorizeGetTableMetadata(table_name, user); + if (s.ok()) { + EmplaceOrDie(&authorized_tables, std::move(table_name)); + } + } } } *table_names = authorized_tables;
