This is an automated email from the ASF dual-hosted git repository.

hahao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 69645c198766499e257eacd1cdf44f7856d093b8
Author: Andrew Wong <[email protected]>
AuthorDate: Sat Jun 8 11:07:54 2019 -0700

    sentry: avoid authorizing every table in ListTables
    
    Currently ListTables will call into Sentry for every table in Kudu's
    catalog, checking whether the user has metadata privileges on the table,
    and adding it to the ListTablesResponsePB if so. This is expensive,
    particularly when there are thousands of tables in Kudu.
    
    This patch updates the authorization behavior to check whether the user
    has METADATA privileges on the table's database for each table. If no
    such privilege exists for the database, each table within the database
    is checked.
    
    A benchmark is added to gauge the performance in various scenarios:
    
    Iterating through many databases:
     $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* 
--num_databases=300 --num_tables_per_db=1 --has-db-privileges=true
    sentry_authz_provider-test.cc:418] Time spent Listing tables: real 25.243s 
user 0.015s     sys 0.001s
    
    Iterating through many databases and many tables. This is the worst case
    since listing a given table will require two lookups in Sentry -- one
    for the database and one for the table:
     $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* 
--num_databases=300 --num_tables_per_db=1 --has-db-privileges=false
    sentry_authz_provider-test.cc:418] Time spent Listing tables: real 47.543s  
user 0.040s     sys 0.013s
    
    This above worst case is actually less performant than without this
    patch. A follow-up patch will make this more performant by caching
    privileges from requests at the database scope.
    
    Iterating through one database and no tables since the user has
    database-level privileges:
     $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* 
--num_databases=1 --num_tables_per_db=600 --has-db-privileges=true
    sentry_authz_provider-test.cc:418] Time spent Listing tables: real 0.271s  
user 0.000s     sys 0.000s
    
    Iterating through one database and many tables:
     $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* 
--num_databases=1 --num_tables_per_db=600 --has-db-privileges=false
    sentry_authz_provider-test.cc:418] Time spent Listing tables: real 47.441s  
     user 0.031s     sys 0.013s
    
    Change-Id: I7c495c635fbd2a661709b6d7f04973b7864b5527
    Reviewed-on: http://gerrit.cloudera.org:8080/13549
    Reviewed-by: Adar Dembo <[email protected]>
    Reviewed-by: Hao Hao <[email protected]>
    Tested-by: Andrew Wong <[email protected]>
---
 src/kudu/master/sentry_authz_provider-test.cc | 83 +++++++++++++++++++++++++++
 src/kudu/master/sentry_authz_provider.cc      | 42 +++++++++++++-
 2 files changed, 123 insertions(+), 2 deletions(-)

diff --git a/src/kudu/master/sentry_authz_provider-test.cc 
b/src/kudu/master/sentry_authz_provider-test.cc
index 21a8588..3a281df 100644
--- a/src/kudu/master/sentry_authz_provider-test.cc
+++ b/src/kudu/master/sentry_authz_provider-test.cc
@@ -71,6 +71,18 @@ DEFINE_int32(num_table_privileges, 100,
     "Number of table privileges to use in testing");
 TAG_FLAG(num_table_privileges, hidden);
 
+DEFINE_int32(num_databases, 10,
+    "Number of databases to use in testing");
+TAG_FLAG(num_databases, hidden);
+
+DEFINE_int32(num_tables_per_db, 10,
+    "Number of tables to use per database to use in testing");
+TAG_FLAG(num_tables_per_db, hidden);
+
+DEFINE_bool(has_db_privileges, true,
+    "Whether the user should have db-level privileges in testing");
+TAG_FLAG(has_db_privileges, hidden);
+
 DECLARE_int32(sentry_service_recv_timeout_seconds);
 DECLARE_int32(sentry_service_send_timeout_seconds);
 DECLARE_uint32(sentry_privileges_cache_capacity_mb);
@@ -193,6 +205,7 @@ TEST(SentryPrivilegesFetcherStaticTest, 
TestPrivilegesWellFormed) {
 class SentryAuthzProviderTest : public SentryTestBase {
  public:
   static const char* const kTestUser;
+  static const char* const kTrustedUser;
   static const char* const kUserGroup;
   static const char* const kRoleName;
 
@@ -205,6 +218,7 @@ class SentryAuthzProviderTest : public SentryTestBase {
     // Configure the SentryAuthzProvider flags.
     FLAGS_sentry_privileges_cache_capacity_mb = CachingEnabled() ? 1 : 0;
     FLAGS_sentry_service_rpc_addresses = sentry_->address().ToString();
+    FLAGS_trusted_user_acl = kTrustedUser;
     sentry_authz_provider_.reset(new SentryAuthzProvider(metric_entity_));
     ASSERT_OK(sentry_authz_provider_->Start());
   }
@@ -292,6 +306,7 @@ class SentryAuthzProviderTest : public SentryTestBase {
 };
 
 const char* const SentryAuthzProviderTest::kTestUser = "test-user";
+const char* const SentryAuthzProviderTest::kTrustedUser = "trusted-user";
 const char* const SentryAuthzProviderTest::kUserGroup = "user";
 const char* const SentryAuthzProviderTest::kRoleName = "developer";
 
@@ -372,6 +387,74 @@ TEST_F(SentryAuthzProviderTest, BroadAuthzScopeBenchmark) {
   ASSERT_TRUE(s.IsNotAuthorized());
 }
 
+// Benchmark to test the time it takes to evaluate privileges when listing
+// tables.
+TEST_F(SentryAuthzProviderTest, ListTablesBenchmark) {
+  ASSERT_OK(CreateRoleAndAddToGroups());
+  unordered_set<string> tables;
+  for (int d = 0; d < FLAGS_num_databases; d++) {
+    const string db_name = Substitute("$0_$1", kDb, d);
+    // Regardless of whether the user has database-level privileges on the
+    // database, make sure there's at least one privilege for a database to
+    // keep the benchmark consistent when toggling this flag.
+    const string dummy_name = Substitute("$0_$1", "foo", d);
+    ASSERT_OK(AlterRoleGrantPrivilege(
+        GetDatabasePrivilege(FLAGS_has_db_privileges ? db_name : dummy_name, 
"METADATA")));
+    for (int t = 0; t < FLAGS_num_tables_per_db; t++) {
+      const string table_name = Substitute("$0_$1", kTable, t);
+      const string table_ident = Substitute("$0.$1", db_name, table_name);
+
+      KLOG_EVERY_N_SECS(INFO, 3) << "Granted privilege on table: " << 
table_ident;
+      ASSERT_OK(AlterRoleGrantPrivilege(GetTablePrivilege(db_name, table_name, 
"METADATA")));
+      EmplaceOrDie(&tables, table_ident);
+    }
+  }
+  bool checked_table_names = false;
+  LOG_TIMING(INFO, "Listing tables") {
+    ASSERT_OK(sentry_authz_provider_->AuthorizeListTables(
+        kTestUser, &tables, &checked_table_names));
+  }
+  ASSERT_TRUE(checked_table_names);
+  ASSERT_EQ(FLAGS_num_databases * FLAGS_num_tables_per_db, tables.size());
+}
+
+TEST_F(SentryAuthzProviderTest, TestListTables) {
+  ASSERT_OK(CreateRoleAndAddToGroups());
+  const int kNumDbs = 2;
+  const int kNumTablesPerDb = 5;
+  const int kNumNonHiveTables = 3;
+  unordered_set<string> tables;
+  for (int d = 0; d < kNumDbs; d++) {
+    const string db_name = Substitute("$0_$1", kDb, d);
+    for (int t = 0; t < kNumTablesPerDb; t++) {
+      const string table_name = Substitute("$0_$1", kTable, t);
+      // To test the absence of privileges, only grant privileges on one table
+      // per database.
+      if (t == 0) {
+        ASSERT_OK(AlterRoleGrantPrivilege(GetTablePrivilege(db_name, 
table_name, "METADATA")));
+      }
+      EmplaceOrDie(&tables, Substitute("$0.$1", db_name, table_name));
+    }
+  }
+  // Add some tables that don't conform to Hive's naming convention.
+  for (int i = 0; i < kNumNonHiveTables; i++) {
+    EmplaceOrDie(&tables, Substitute("badname_$0!", i));
+  }
+  bool checked_table_names = false;
+  // List tables as a trusted user. All tables, including non-Hive-conformant
+  // ones, should be visible.
+  ASSERT_OK(sentry_authz_provider_->AuthorizeListTables(
+      kTrustedUser, &tables, &checked_table_names));
+  ASSERT_FALSE(checked_table_names);
+  ASSERT_EQ(kNumDbs * kNumTablesPerDb + kNumNonHiveTables, tables.size());
+
+  // Now try as a regular user. Only the tables with Hive-conformant names that
+  // the user has privileges on should be visible.
+  ASSERT_OK(sentry_authz_provider_->AuthorizeListTables(kTestUser, &tables, 
&checked_table_names));
+  ASSERT_TRUE(checked_table_names);
+  ASSERT_EQ(kNumDbs, tables.size());
+}
+
 class SentryAuthzProviderFilterPrivilegesTest : public SentryAuthzProviderTest 
{
  public:
   SentryAuthzProviderFilterPrivilegesTest()
diff --git a/src/kudu/master/sentry_authz_provider.cc 
b/src/kudu/master/sentry_authz_provider.cc
index feba6a2..80b8074 100644
--- a/src/kudu/master/sentry_authz_provider.cc
+++ b/src/kudu/master/sentry_authz_provider.cc
@@ -17,19 +17,23 @@
 
 #include "kudu/master/sentry_authz_provider.h"
 
+#include <unordered_map>
 #include <unordered_set>
 #include <utility>
+#include <vector>
 
 #include <gflags/gflags_declare.h>
 #include <glog/logging.h>
 
 #include "kudu/common/common.pb.h"
+#include "kudu/common/table_util.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/master/sentry_privileges_fetcher.h"
 #include "kudu/security/token.pb.h"
 #include "kudu/sentry/sentry_action.h"
 #include "kudu/sentry/sentry_authorizable_scope.h"
+#include "kudu/util/slice.h"
 #include "kudu/util/trace.h"
 
 DECLARE_string(sentry_service_rpc_addresses);
@@ -39,7 +43,9 @@ using kudu::security::TablePrivilegePB;
 using kudu::sentry::SentryAction;
 using kudu::sentry::SentryAuthorizableScope;
 using std::string;
+using std::unordered_map;
 using std::unordered_set;
+using std::vector;
 using strings::Substitute;
 
 namespace kudu {
@@ -194,11 +200,43 @@ Status 
SentryAuthzProvider::AuthorizeGetTableMetadata(const string& table_name,
 Status SentryAuthzProvider::AuthorizeListTables(const string& user,
                                                 unordered_set<string>* 
table_names,
                                                 bool* checked_table_names) {
+  if (IsTrustedUser(user)) {
+    *checked_table_names = false;
+    return Status::OK();
+  }
   unordered_set<string> authorized_tables;
+  unordered_map<string, vector<string>> tables_by_db;
   for (auto table_name : *table_names) {
-    Status s = AuthorizeGetTableMetadata(table_name, user);
+    Slice db_slice;
+    Slice unused_table_slice;
+    Status s = ParseHiveTableIdentifier(table_name, &db_slice, 
&unused_table_slice);
+    if (!s.ok()) {
+      continue;
+    }
+    LookupOrInsert(&tables_by_db, db_slice.ToString(), 
{}).emplace_back(std::move(table_name));
+  }
+  for (auto db_and_tables : tables_by_db) {
+    auto tables_in_db = db_and_tables.second;
+    DCHECK(!tables_in_db.empty());
+    // Authorize database-level privileges first in case the user has
+    // database-level privileges. This would allow us to avoid authorizing each
+    // indiviudual table.
+    // Note: the exact table isn't particularly important, as long as we pass
+    // in a table within the database we're interested in.
+    const string& first_table_name_in_db = tables_in_db[0];
+    Status s = Authorize(SentryAuthorizableScope::Scope::DATABASE, 
SentryAction::METADATA,
+                         first_table_name_in_db, user);
     if (s.ok()) {
-      EmplaceOrDie(&authorized_tables, std::move(table_name));
+      for (auto table_name : tables_in_db) {
+        EmplaceOrDie(&authorized_tables, std::move(table_name));
+      }
+    } else {
+      for (auto table_name : tables_in_db) {
+        s = AuthorizeGetTableMetadata(table_name, user);
+        if (s.ok()) {
+          EmplaceOrDie(&authorized_tables, std::move(table_name));
+        }
+      }
     }
   }
   *table_names = authorized_tables;

Reply via email to