This is an automated email from the ASF dual-hosted git repository.
ngangam pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new dc9d1ad0965 HIVE-27595: replaced cartesian-product table filtering by
sort + binary search (#4601)
dc9d1ad0965 is described below
commit dc9d1ad0965ece6a09abdb6481ca113dafce7c75
Author: Henrib <[email protected]>
AuthorDate: Thu Aug 24 15:33:16 2023 +0200
HIVE-27595: replaced cartesian-product table filtering by sort + binary
search (#4601)
* HIVE-27595: replaced cartesian-product filtering by sort + binary search
lookup reducing complexity from m*n to (m+n)*logn; (Henri Biestro reviewed by
John Sherman, Saihemanth)
---
.../plugin/AuthorizationMetaStoreFilterHook.java | 23 +-
.../plugin/HivePrivilegeObjectUtils.java | 125 ++++++++-
.../plugin/metastore/HiveMetaStoreAuthorizer.java | 49 +---
.../metastore/TestHiveMetaStoreAuthorizer.java | 2 +-
.../metastore/TestTablePermissionFilterAlgos.java | 282 +++++++++++++++++++++
5 files changed, 425 insertions(+), 56 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/AuthorizationMetaStoreFilterHook.java
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/AuthorizationMetaStoreFilterHook.java
index ad218af9a05..5723022f62b 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/AuthorizationMetaStoreFilterHook.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/AuthorizationMetaStoreFilterHook.java
@@ -19,10 +19,6 @@ package
org.apache.hadoop.hive.ql.security.authorization.plugin;
import java.util.ArrayList;
import java.util.List;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.classification.InterfaceAudience.Private;
import org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl;
@@ -31,6 +27,7 @@ import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.TableMeta;
import
org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType;
+import static
org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils.TablePrivilegeLookup;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.slf4j.Logger;
@@ -162,20 +159,18 @@ public class AuthorizationMetaStoreFilterHook extends
DefaultMetaStoreFilterHook
return objs;
}
- private ImmutablePair<String, String> tableMetaKey(String dbName, String
tableName) {
- return new ImmutablePair(dbName, tableName);
- }
-
@Override
public List<TableMeta> filterTableMetas(List<TableMeta> tableMetas) throws
MetaException {
List<HivePrivilegeObject> listObjs = tableMetasToPrivilegeObjs(tableMetas);
List<HivePrivilegeObject> filteredList = getFilteredObjects(listObjs);
- Set<ImmutablePair<String, String>> filteredNames = filteredList.stream()
- .map(e -> tableMetaKey(e.getDbname(), e.getObjectName()))
- .collect(Collectors.toSet());
- return tableMetas.stream()
- .filter(e -> filteredNames.contains(tableMetaKey(e.getDbName(),
e.getTableName())))
- .collect(Collectors.toList());
+ final List<TableMeta> ret = new ArrayList<>();
+ final TablePrivilegeLookup index = new TablePrivilegeLookup(filteredList);
+ for(TableMeta table : tableMetas) {
+ if (index.lookup(table.getDbName(), table.getTableName()) != null) {
+ ret.add(table);
+ }
+ }
+ return ret;
}
@Override
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObjectUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObjectUtils.java
index 4ab7870e7b3..479dce09ab7 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObjectUtils.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObjectUtils.java
@@ -19,8 +19,6 @@ package
org.apache.hadoop.hive.ql.security.authorization.plugin;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collection;
-import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
@@ -59,7 +57,130 @@ public class HivePrivilegeObjectUtils {
objs.add(new HivePrivilegeObject(HivePrivilegeObjectType.DATACONNECTOR,
null, dcname));
}
return objs;
+ }
+
+ /**
+ * A helper enabling efficient lookup of a table in a list by database name,
table name.
+ * <p>When filtering a source list of tables by checking their presence in
another permission list
+ * using the database name, table name, we need to avoid performing a
cartesian product. This
+ * product stems from checking each table from the source (n tables) by
comparing it to each
+ * table in the permission list (m permissions), thus an n * m
complexity.</p>
+ * <p>This class reduces the complexity of the lookup in the permission by
sorting them and
+ * using a binary search; the sort cost is m*log(m) and each lookup is
log(m), the overall
+ * complexity of a check for all tables is in the order of m*log(m) +
n*log(m) = (m + n)*log(n),
+ * close to m * log(n).</p>
+ *
+ */
+ public abstract static class TableLookup<T> {
+ /** The container. */
+ private final T[] index;
+
+ /**
+ * @param table the table
+ * @return the database name of the table
+ */
+ protected abstract String getDbName(T table);
+
+ /**
+ * @param table the table
+ * @return the table name of the table
+ */
+ protected abstract String getTableName(T table);
+
+ /**
+ * Compares a table to another by names.
+ * @param table the table
+ * @param arg the argument table
+ * @return < 0, 0, > 0
+ */
+ private int compareNames(final T table, final T arg) {
+ int cmp = getDbName(table).compareTo(getDbName(arg));
+ if (cmp == 0) {
+ String argTableName = getTableName(arg);
+ if (argTableName != null) {
+ cmp = getTableName(table).compareTo(argTableName);
+ }
+ }
+ return cmp;
+ }
+
+ /**
+ * Compares a table to names.
+ * @param table the table
+ * @param dbName the argument database name
+ * @param dbName the argument table name
+ * @return < 0, 0, > 0
+ */
+ private int compareNames(final T table, final String dbName, final String
tableName) {
+ int cmp = getDbName(table).compareTo(dbName);
+ if (cmp == 0 && tableName != null) {
+ cmp = getTableName(table).compareTo(tableName);
+ }
+ return cmp;
+ }
+
+ /**
+ * Creates a ByName
+ * @param tables the tables to consider as a clique
+ */
+ protected TableLookup(List<T> tables) {
+ if (tables != null) {
+ index = tables.toArray((T[]) new Object[0]);
+ // sort them by names
+ Arrays.sort(index, this::compareNames);
+ } else {
+ index = (T[]) new Object[0];
+ }
+ }
+ /**
+ * Lookup using dichotomy using order described by database name, table
name.
+ * @param dbName the database name
+ * @param tableName the table name
+ * @return the table if found in the index, null otherwise
+ */
+ public final T lookup(final String dbName, final String tableName) {
+ int low = 0;
+ int high = index.length - 1;
+ while (low <= high) {
+ int mid = (low + high) >>> 1;
+ T item = index[mid];
+ int cmp = compareNames(item, dbName, tableName);
+ if (cmp < 0) {
+ low = mid + 1;
+ } else if (cmp > 0) {
+ high = mid - 1;
+ } else {
+ return item; // key found
+ }
+ }
+ return null; // key not found.
+ }
+
+ /**
+ * Checks whether a given table is present in this set.
+ * @param tt the table to check
+ * @return true if the set contains an item having the same database and
table name
+ */
+ public final boolean contains(T tt) {
+ return lookup(getDbName(tt), getTableName(tt)) != null;
+ }
}
+ /**
+ * Specialized lookup for table privilege (HivePrivilegeObject)..
+ */
+ public static class TablePrivilegeLookup extends
TableLookup<HivePrivilegeObject> {
+ public TablePrivilegeLookup(List<HivePrivilegeObject> tables) {
+ super(tables);
+ }
+
+ @Override protected String getDbName(HivePrivilegeObject o) {
+ return o.getDbname();
+ }
+
+ @Override protected String getTableName(HivePrivilegeObject o) {
+ return o.getObjectName();
+ }
+ }
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
index 55a980bc092..7f4cda8d922 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/HiveMetaStoreAuthorizer.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hive.metastore.api.TableMeta;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.security.HiveMetastoreAuthenticationProvider;
+import static
org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils.TablePrivilegeLookup;
import
org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.events.*;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer;
import
org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory;
@@ -351,30 +352,16 @@ public class HiveMetaStoreAuthorizer extends
MetaStorePreEventListener implement
}
private List<Table> getFilteredTableList(List<HivePrivilegeObject>
hivePrivilegeObjects, List<Table> tableList) {
- List<Table> ret = new ArrayList<>();
- for (HivePrivilegeObject hivePrivilegeObject : hivePrivilegeObjects) {
- String dbName = hivePrivilegeObject.getDbname();
- String tblName = hivePrivilegeObject.getObjectName();
- Table table = getFilteredTable(dbName, tblName, tableList);
- if (table != null) {
+ final List<Table> ret = new ArrayList<>();
+ final TablePrivilegeLookup index = new
TablePrivilegeLookup(hivePrivilegeObjects);
+ for(Table table : tableList) {
+ if (index.lookup(table.getDbName(), table.getTableName()) != null) {
ret.add(table);
}
}
return ret;
}
- private Table getFilteredTable(String dbName, String tblName, List<Table>
tableList) {
- Table ret = null;
- for (Table table: tableList) {
- String databaseName = table.getDbName();
- String tableName = table.getTableName();
- if (dbName.equals(databaseName) && tblName.equals(tableName)) {
- ret = table;
- break;
- }
- }
- return ret;
- }
private List<String> filterTableNames(HiveMetaStoreAuthzInfo
hiveMetaStoreAuthzInfo, String dbName,
List<String> tableNames) throws MetaException {
@@ -396,32 +383,16 @@ public class HiveMetaStoreAuthorizer extends
MetaStorePreEventListener implement
}
return ret;
}
-
- private List<String> getFilteredTableNames(List<HivePrivilegeObject>
hivePrivilegeObjects, String databaseName,
- List<String> tableNames) {
+ private List<String> getFilteredTableNames(List<HivePrivilegeObject>
hivePrivilegeObjects, String databaseName, List<String> tableNames) {
List<String> ret = new ArrayList<>();
- for (HivePrivilegeObject hivePrivilegeObject : hivePrivilegeObjects) {
- String dbName = hivePrivilegeObject.getDbname();
- String tblName = hivePrivilegeObject.getObjectName();
- String table = getFilteredTableNames(dbName, tblName, databaseName,
tableNames);
- if (table != null) {
- ret.add(table);
- }
- }
- return ret;
- }
-
- private String getFilteredTableNames(String dbName, String tblName, String
databaseName, List<String> tableNames) {
- String ret = null;
- for (String tableName : tableNames) {
- if (dbName.equals(databaseName) && tblName.equals(tableName)) {
- ret = tableName;
- break;
+ final TablePrivilegeLookup index = new
TablePrivilegeLookup(hivePrivilegeObjects);
+ for(String tableName : tableNames) {
+ if (index.lookup(databaseName, tableName) != null) {
+ ret.add(tableName);
}
}
return ret;
}
-
private String getDBName(String str) {
return (str != null) ? str.substring(str.indexOf("#")+1) : null;
}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java
b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java
index bb191ba8605..ec9a93c0837 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestHiveMetaStoreAuthorizer.java
@@ -36,8 +36,8 @@ import org.junit.FixMethodOrder;
import org.junit.runners.MethodSorters;
import org.junit.Before;
import org.junit.Test;
-import java.util.Map;
+import java.util.Map;
import java.io.File;
import static org.junit.Assert.assertEquals;
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestTablePermissionFilterAlgos.java
b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestTablePermissionFilterAlgos.java
new file mode 100644
index 00000000000..0f0a2ff996e
--- /dev/null
+++
b/ql/src/test/org/apache/hadoop/hive/ql/security/authorization/plugin/metastore/TestTablePermissionFilterAlgos.java
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.security.authorization.plugin.metastore;
+
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import
org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * This test tries different methods of lookup to try and determine the most
efficient in
+ * the particular usage case related to checking tables against permissions.
+ * Note that a meaningful performance test requires at least 20 dbs, 10000
tables per db and 40 loops
+ * on a 2022 laptop.
+ */
+public class TestTablePermissionFilterAlgos {
+ private static final Logger LOGGER =
LogManager.getLogger(TestHiveMetaStoreAuthorizer.class);
+ // reduce constants to speed up tests when true, set to false for
benchmarking
+ private static final boolean TEST = true;
+ // Whether we shuffle table names or not: this has an important effect on
sort speed - mostly sorted
+ // arrays sort faster than completely unsorted ones.
+ // A hashset of concatenated strings seem to be faster than a sort in that
case.
+ // I suspect something fishy in the cost of string creation (intern()?)
since a hashset of pairs is much slower.
+ // We'll make the semi-educated assumption that most of the time, tablenames
come back mostly ordered.
+ private static final boolean SHUFFLE_TABLENAMES = !TEST;
+ private static final int NUM_DB = TEST? 2 : 20;
+ private static final int NUM_TBL = TEST? 100 : 10000;
+ private static final int NLOOPS = TEST? 1 : 40;
+
+ private final List<TestTable> tables = createTables(NUM_DB, NUM_TBL);
+ // lookup some
+ private final List<TestTable> filtered = tables.stream()
+ .filter(t -> t.getTableName().endsWith("0"))
+ .collect(Collectors.toList());
+
+ /**
+ * Fake tables to test.
+ */
+ private static class TestTable {
+ private int hashCode = -1;
+ private final String dbName;
+ private final String tableName;
+
+ private TestTable(String dbName, String tableName) {
+ this.dbName = dbName;
+ this.tableName = tableName;
+ }
+
+ @Override
+ public String toString() {
+ return tableFullName(this);
+ }
+
+ public String getDbName() {
+ return dbName;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o instanceof TestTable) {
+ TestTable tt = (TestTable) o;
+ return dbName.equals(tt.dbName) && tableName.equals(tt.tableName);
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int h = hashCode;
+ if (h < 0) {
+ hashCode = h = Math.abs((31 * dbName.hashCode()) +
tableName.hashCode());
+ }
+ return h;
+ }
+ }
+
+ /**
+ * Creates a list of tables.
+ * @param dbs number of databases
+ * @param tbls number of tables per database
+ * @return a list of tables
+ */
+ private static List<TestTable> createTables(int dbs, int tbls) {
+ Random rnd = new Random(20230815L);
+ List<Integer> dbis = new ArrayList<>(dbs);
+ for(int i = 0; i < dbs; ++i) {
+ dbis.add(i);
+ }
+ Collections.shuffle(dbis, rnd);
+ List<Integer> tblis = new ArrayList<>(tbls);
+ for(int i = 0; i < tbls; ++i) {
+ tblis.add(i);
+ }
+ if (SHUFFLE_TABLENAMES) {
+ Collections.shuffle(tblis, rnd);
+ }
+
+ List<TestTable> tables = new ArrayList<>(dbs * tbls);
+ for(int d : dbis) {
+ for(int t : tblis) {
+ String dbname = String.format("db%05x", d);
+ String tblName = String.format("tbl%05x", t);
+ tables.add(new TestTable(dbname, tblName));
+ }
+ }
+ return tables;
+ }
+
+ /**
+ * Allows timing code blocks execution precisely.
+ */
+ static class Chrono {
+ long start;
+ long end;
+
+ void start() {
+ start = System.currentTimeMillis();
+ }
+ void stop() {
+ end = System.currentTimeMillis();
+ }
+ String elapse() {
+ return String.format("%.3f", (double) (end - start) / 1000d);
+ }
+ }
+
+ /**
+ * Specialized for TestTable purpose.
+ */
+ private static class TestLookup extends
HivePrivilegeObjectUtils.TableLookup<TestTable> {
+ protected TestLookup(List<TestTable> tables) {
+ super(tables);
+ }
+
+ @Override protected String getDbName(TestTable o) {
+ return o.getDbName();
+ }
+
+ @Override protected String getTableName(TestTable o) {
+ return o.getTableName();
+ }
+ };
+
+ @Test
+ public void testIndexedLookup() {
+ System.gc();
+ Chrono chrono = new Chrono();
+ chrono.start();
+ int cnt = 0;
+ for(int l = 0; l < NLOOPS; ++l) {
+ TestLookup index = new TestLookup(tables);
+ List<TestTable> tts = l % 2 == 1
+ ? tables
+ : filtered;
+ cnt = 0;
+ // lookup all
+ for (TestTable tt : tts) {
+ if (index.contains(tt)) {
+ cnt += 1;
+ }
+ }
+ Assert.assertEquals(cnt, tts.size());
+ }
+ chrono.stop();
+ LOGGER.info("lookup elapse " + chrono.elapse());
+ }
+
+ private static ImmutablePair<String,String> tableKey(TestTable tt) {
+ return new ImmutablePair<>(tt.getDbName(), tt.getTableName());
+ }
+
+ @Test
+ public void testKeyPairLookup() {
+ System.gc();
+ Chrono chrono = new Chrono();
+ chrono.start();
+ for(int l = 0; l < NLOOPS; ++l) {
+ Set<ImmutablePair<String, String>> index = tables.stream()
+ .map(TestTablePermissionFilterAlgos::tableKey)
+ .collect(Collectors.toSet());
+ List<TestTable> tts = l % 2 == 1
+ ? tables
+ : filtered;
+ int cnt = 0;
+ // lookup all
+ for (TestTable tt : tts) {
+ if (index.contains(tableKey(tt))) {
+ cnt += 1;
+ }
+ }
+ Assert.assertEquals(cnt, tts.size());
+ }
+ chrono.stop();
+ LOGGER.info("set pair elapse " + chrono.elapse());
+ }
+
+ @Test
+ public void testTestTableLookup() {
+ System.gc();
+ Chrono chrono = new Chrono();
+ chrono.start();
+ for(int l = 0; l < NLOOPS; ++l) {
+ Set<TestTable> index = new HashSet<>(tables);
+ List<TestTable> tts = l % 2 == 1
+ ? tables
+ : filtered;
+ int cnt = 0;
+ // lookup all
+ for (TestTable tt : tts) {
+ if (index.contains(tt)) {
+ cnt += 1;
+ }
+ }
+ Assert.assertEquals(cnt, tts.size());
+ }
+ chrono.stop();
+ LOGGER.info("set table elapse " + chrono.elapse());
+ }
+
+
+ private static String tableFullName(TestTable tt) {
+ return tt.getDbName() + "." + tt.getTableName();
+ }
+
+ @Test
+ public void testFullNameLookup() {
+ System.gc();
+ Chrono chrono = new Chrono();
+ chrono.start();
+ for(int l = 0; l < NLOOPS; ++l) {
+ Set<String> index = tables.stream()
+ .map(TestTablePermissionFilterAlgos::tableFullName)
+ .collect(Collectors.toSet());
+ List<TestTable> tts = l % 2 == 1
+ ? tables
+ : filtered;
+ int cnt = 0;
+ // lookup all
+ for (TestTable tt : tts) {
+ if (index.contains(tableFullName(tt))) {
+ cnt += 1;
+ }
+ }
+ Assert.assertEquals(cnt, tts.size());
+ }
+ chrono.stop();
+ LOGGER.info("set fullname elapse " + chrono.elapse());
+ }
+}