Repository: sentry Updated Branches: refs/heads/sentry-ha-redesign 2811311ea -> 7b3878cf3
SENTRY-1497: create a sentry scale test tool to add various objects and privileges into Sentry and HMS. (Anne Yu, reviewed by Haohao, Lili and Sravya Tirukkovalur) Project: http://git-wip-us.apache.org/repos/asf/sentry/repo Commit: http://git-wip-us.apache.org/repos/asf/sentry/commit/7b3878cf Tree: http://git-wip-us.apache.org/repos/asf/sentry/tree/7b3878cf Diff: http://git-wip-us.apache.org/repos/asf/sentry/diff/7b3878cf Branch: refs/heads/sentry-ha-redesign Commit: 7b3878cf321b31dcaf4e96cb9f6a3dbe72c980ae Parents: 2811311 Author: Alexander Kolbasov <[email protected]> Authored: Fri Mar 31 10:02:39 2017 -0700 Committer: Alexander Kolbasov <[email protected]> Committed: Fri Mar 31 10:02:39 2017 -0700 ---------------------------------------------------------------------- pom.xml | 3 +- sentry-tests/sentry-tests-hive/pom.xml | 31 +- .../e2e/hive/hiveserver/HiveServerFactory.java | 1 + .../hive/hiveserver/UnmanagedHiveServer.java | 10 +- .../e2e/tools/CreateSentryTestScaleData.java | 542 +++++++++++++++++++ .../sentry/tests/e2e/tools/TestTools.java | 81 +++ .../e2e/tools/sentry_scale_test_config.xml | 119 ++++ .../scale-test/create-many-dbs-tables.sh | 277 ---------- 8 files changed, 781 insertions(+), 283 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/sentry/blob/7b3878cf/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index a52e5ac..94e3360 100644 --- a/pom.xml +++ b/pom.xml @@ -96,6 +96,7 @@ limitations under the License. <kafka.version>0.9.0.0</kafka.version> <commons-io.version>1.3.2</commons-io.version> <hadoop-aws.version>2.7.0</hadoop-aws.version> + <maven.jar.plugin.version>3.0.2</maven.jar.plugin.version> </properties> <dependencyManagement> @@ -789,7 +790,7 @@ limitations under the License. <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> - <version>2.4</version> + <version>${maven.jar.plugin.version}</version> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> http://git-wip-us.apache.org/repos/asf/sentry/blob/7b3878cf/sentry-tests/sentry-tests-hive/pom.xml ---------------------------------------------------------------------- diff --git a/sentry-tests/sentry-tests-hive/pom.xml b/sentry-tests/sentry-tests-hive/pom.xml index d2608d7..9d5b8b5 100644 --- a/sentry-tests/sentry-tests-hive/pom.xml +++ b/sentry-tests/sentry-tests-hive/pom.xml @@ -459,7 +459,7 @@ limitations under the License. </execution> </executions> </plugin> - <plugin> + <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-surefire-plugin</artifactId> <configuration> @@ -505,5 +505,34 @@ limitations under the License. </plugins> </build> </profile> + <!--This profile attempts to create a jar file which contains tools to help tests. For example, scale, longevity etc.--> + <profile> + <id>test-tools</id> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jar-plugin</artifactId> + <version>${maven.jar.plugin.version}</version> + <configuration> + <archive> + <manifest> + <addClasspath>true</addClasspath> + <classpathPrefix>lib/</classpathPrefix> + <mainClass>org.apache.sentry.tests.e2e.tools.TestTools</mainClass> + </manifest> + </archive> + </configuration> + <executions> + <execution> + <goals> + <goal>test-jar</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> </profiles> </project> http://git-wip-us.apache.org/repos/asf/sentry/blob/7b3878cf/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/hive/hiveserver/HiveServerFactory.java ---------------------------------------------------------------------- diff --git a/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/hive/hiveserver/HiveServerFactory.java b/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/hive/hiveserver/HiveServerFactory.java index 20db286..0ca7704 100644 --- a/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/hive/hiveserver/HiveServerFactory.java +++ b/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/hive/hiveserver/HiveServerFactory.java @@ -70,6 +70,7 @@ public class HiveServerFactory { public static final String METASTORE_RAW_STORE_IMPL = HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL.varname; static { + // load correct HS2 JDBC drivers try { Assert.assertNotNull(DERBY_DRIVER_NAME + " is null", Class.forName(DERBY_DRIVER_NAME)); Assert.assertNotNull(HIVE_DRIVER_NAME + " is null", Class.forName(HIVE_DRIVER_NAME)); http://git-wip-us.apache.org/repos/asf/sentry/blob/7b3878cf/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/hive/hiveserver/UnmanagedHiveServer.java ---------------------------------------------------------------------- diff --git a/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/hive/hiveserver/UnmanagedHiveServer.java b/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/hive/hiveserver/UnmanagedHiveServer.java index 90713b1..2b4fd12 100644 --- a/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/hive/hiveserver/UnmanagedHiveServer.java +++ b/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/hive/hiveserver/UnmanagedHiveServer.java @@ -21,6 +21,7 @@ import java.sql.DriverManager; import java.util.Properties; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,14 +57,14 @@ public class UnmanagedHiveServer implements HiveServer { if(val == null || val.trim().equals("")){ LOGGER.warn(hiveVar + " not found in the client hive-site.xml"); if(defaultVal == null) { - val = System.getProperty(hiveVar); - }else { + val = System.getProperty(hiveVar, new Configuration().get(hiveVar)); + } else { val = System.getProperty(hiveVar, defaultVal); } Preconditions.checkNotNull(val, "Required system property is missed: Provide it using -D"+ hiveVar); - LOGGER.info("Using from system property" + hiveVar + " = " + val ); + LOGGER.info("Using from system property: " + hiveVar + " = " + val ); }else { - LOGGER.info("Using from hive-site.xml" + hiveVar + " = " + val ); + LOGGER.info("Using from hive-site.xml: " + hiveVar + " = " + val ); } return val; } @@ -110,6 +111,7 @@ public class UnmanagedHiveServer implements HiveServer { LOGGER.info("url: " + url); return DriverManager.getConnection(url, oProps); } + public void kinit(String user) throws Exception{ UserGroupInformation.loginUserFromKeytab(user, KEYTAB_LOCATION + "/" + user + ".keytab"); LOGGER.info("Kinited user: "+ user+" keytab: "+KEYTAB_LOCATION+"/"+user+".keytab"); http://git-wip-us.apache.org/repos/asf/sentry/blob/7b3878cf/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/CreateSentryTestScaleData.java ---------------------------------------------------------------------- diff --git a/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/CreateSentryTestScaleData.java b/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/CreateSentryTestScaleData.java new file mode 100644 index 0000000..cd9fa28 --- /dev/null +++ b/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/CreateSentryTestScaleData.java @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sentry.tests.e2e.tools; + +import com.google.common.base.Strings; +import com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.sentry.tests.e2e.hive.hiveserver.UnmanagedHiveServer; + +import java.io.File; +import java.io.IOException; +import java.security.PrivilegedExceptionAction; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.Executors; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + + +/** + * This code attempts to create Sentry and HMS synthetic test data. + * Before run: + * export HIVE_CONF_DIR=/etc/hive/conf + * export HIVE_LIB=/usr/lib/hive + * export HADOOP_CONF_DIR=/etc/hadoop/conf + * export HADOOP_CLASSPATH=${HIVE_LIB}/lib/*:${HADOOP_CLASSPATH} + * export HADOOP_CLASSPATH=${HIVE_CONF_DIR}/*:${HADOOP_CLASSPATH} + * export HADOOP_CLASSPATH=${HADOOP_CONF_DIR}/*:${HADOOP_CLASSPATH} + * export HADOOP_OPTS="$HADOOP_OPTS -Dhive.server2.thrift.bind.host=hostname + * -Dsentry.e2e.hive.keytabs.location=/keytabs + * -Dsentry.scale.test.config.path=/tmp/conf" + * To run it: + * hadoop jar test-tools.jar --scale + */ +public class CreateSentryTestScaleData { + // This class stores thread test results + public class TestDataStats { + long num_databases; + long num_tables; + long num_views; + long num_partitions; + long num_columns; + int num_uris; + public void addCounts(TestDataStats testDataStats) { + this.num_databases += testDataStats.num_databases; + this.num_tables += testDataStats.num_tables; + this.num_views += testDataStats.num_views; + this.num_partitions += testDataStats.num_partitions; + this.num_columns += testDataStats.num_columns; + this.num_uris += testDataStats.num_uris; + } + } + + public class TestStatus { + TestDataStats testDataStats = new TestDataStats(); // store object counts + TestDataStats privilegeStatus = new TestDataStats(); // store object's privilege counts + int failed = 0; + long elapsed_time = 0L; + @Override + public String toString() { + String objects = String.format("total databases(%d); tables(%d), views(%d), partitions(%d), columns(%d)", + total_num_databases.get(), testDataStats.num_tables, testDataStats.num_views, + testDataStats.num_partitions, testDataStats.num_columns); + String privileges = String.format("database privileges(%d), table privileges(%d), view privileges(%d), " + + "partition privileges(%d), column privileges(%d), uri privileges(%d)", privilegeStatus.num_databases, + privilegeStatus.num_tables, privilegeStatus.num_views, privilegeStatus.num_partitions, + privilegeStatus.num_columns, privilegeStatus.num_uris); + return String.format("Objects status: %s;\nPrivileges status: %s; Total roles(%d) and groups(%d);\nFailed threads(%d), running time(%d secs).", + objects, privileges, NUM_OF_ROLES, NUM_OF_GROUPS, failed, elapsed_time); + } + } + + final static String CONFIG_FILE_NAME = "sentry_scale_test_config.xml"; + final static String CONFIG_PATH = System.getProperty("sentry.scale.test.config.path"); + private static Configuration scaleConfig = new Configuration(); + static { + StringBuilder fullPath = new StringBuilder(); + if (CONFIG_PATH != null && CONFIG_PATH.length() > 0 ) fullPath.append(CONFIG_PATH); + if (fullPath.length() > 0 && fullPath.lastIndexOf("/") != fullPath.length() - 1) { + fullPath.append("/"); + } + fullPath.append(CONFIG_FILE_NAME); + URL url = null; + try { + url = new File(fullPath.toString()).toURI().toURL(); + System.out.println("Reading config file from url: " + url.toString()); + scaleConfig.addResource(url); + } catch (Exception ex) { + System.err.println("Failed to load config file from local file system: " + url.toString()); + throw new RuntimeException(ex); + } + }; + + final static int NUM_OF_THREADS_TO_CREATE_DATA = scaleConfig.getInt("sentry.scale.test.threads", 1); + final static int NUM_OF_DATABASES = scaleConfig.getInt("sentry.scale.test.num.databases", 4); + final static int MAX_TABLES_PER_DATABASE = scaleConfig.getInt("sentry.scale.test.max.tables.per.database", 4); + final static int MED_TABLES_PER_DATABASE = scaleConfig.getInt("sentry.scale.test.med.tables.per.database", 2) ; + final static int AVG_VIEWS_PER_DATABASE = scaleConfig.getInt("sentry.scale.test.avg.views.per.database", 1); + final static int MAX_PARTITIONS_PER_TABLE = scaleConfig.getInt("sentry.scale.test.max.partitions.per.table", 2); + final static int MED_PARTITIONS_PER_TABLE = scaleConfig.getInt("sentry.scale.test.med.partitions.per.table", 1); + final static int MAX_COLUMNS_PER_TABLE = scaleConfig.getInt("sentry.scale.test.max.columns.per.table", 2); + final static int MED_COLUMNS_PER_TABLE = scaleConfig.getInt("sentry.scale.test.med.columns.per.table", 1); + final static int EXTERNAL_VS_MANAGED_TBLS = scaleConfig.getInt("sentry.scale.test.external.vs.managed.tables", 2); + + final static int NUM_OF_ROLES = scaleConfig.getInt("sentry.scale.test.num.roles", 10); + final static int NUM_OF_GROUPS = scaleConfig.getInt("sentry.scale.test.num.groups", 5); + final static int MAX_ROLES_PER_GROUP = scaleConfig.getInt("sentry.scale.test.max.roles.per.group", 5); + final static int DATABASE_PRIVILEGES_PER_THREAD = scaleConfig.getInt("sentry.scale.test.database.privileges.per.thread", 2); + final static int TABLE_PRIVILEGES_PER_THREAD = scaleConfig.getInt("sentry.scale.test.table.privileges.per.thread", 2); + final static int VIEW_PRIVILEGES_PER_THREAD = scaleConfig.getInt("sentry.scale.test.view.privileges.per.thread", 1); + final static int URI_PRIVILEGES_PER_THREAD = scaleConfig.getInt("sentry.scale.test.uri.privileges.per.thread", 1); + final static int COLUMN_PRIVILEGES_PER_THREAD = scaleConfig.getInt("sentry.scale.test.column.privileges.per.thread", 1); + + final static String ADMIN = scaleConfig.get("sentry.e2etest.admin.group", "hive"); + final static String KEYTAB_LOCATION = scaleConfig.get("sentry.e2e.hive.keytabs.location"); + final static boolean TEST_DEBUG = scaleConfig.getBoolean("sentry.e2etest.scale.data.debug", false); + final static String EXT_TEST_DATA_PATH = scaleConfig.get("sentry.e2etest.scale.data.dir", "extdata"); + final static long TOOL_MAX_RUNNING_SECS = scaleConfig.getInt("sentry.tests.e2e.tools.scale.max.running.seconds", 300); + final static long THREAD_WAITING_TIME_SECS = scaleConfig.getInt("sentry.tests.e2e.tools.scale.thread.waiting.seconds", 5); + + final String[] COLUMN = {"s", "STRING"}; + final static String TEST_DATA_PREFIX = "scale_test"; + private static FileSystem fileSystem = null; + private static UnmanagedHiveServer hiveServer = null; + private static final String HIVE_DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver"; + static { + try { + Class.forName(HIVE_DRIVER_NAME); + hiveServer = new UnmanagedHiveServer(); + UserGroupInformation ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI("hdfs", getKeytabPath("hdfs")); + fileSystem = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { + @Override + public FileSystem run() throws Exception { + Configuration conf = new Configuration(); + String defaultFs = hiveServer.getProperty("fs.defaultFS"); + if (Strings.isNullOrEmpty(defaultFs)) { + defaultFs = scaleConfig.get("fs.defaultFS"); + } + conf.set("fs.defaultFS", defaultFs); + FileSystem fileSystem = FileSystem.get(conf); + Preconditions.checkNotNull(fileSystem); + return fileSystem; + } + }); + Path extPathRoot = new Path(String.format("/%s", EXT_TEST_DATA_PATH)); + System.out.println("Creating external data root dir:" + extPathRoot.toString()); + if (fileSystem.exists(extPathRoot)) { + fileSystem.delete(extPathRoot, true); + } + fileSystem.mkdirs(extPathRoot, new FsPermission((short) 0777)); + fileSystem.setOwner(extPathRoot, ADMIN, ADMIN); + } catch (Exception ex) { + throw new RuntimeException("Failed to create FileSystem: ", ex); + } + } + + private AtomicInteger total_num_databases = new AtomicInteger(0); + + //private functions start from here + private static String getKeytabPath(String runAsUser) { + if (KEYTAB_LOCATION != null || KEYTAB_LOCATION.length() > 0) + return KEYTAB_LOCATION + "/" + runAsUser + ".keytab"; + else return runAsUser + ".keytab"; //in classpath + } + + private void print(String msg, String level) { + switch (level) { + case "ERROR": + System.err.println(String.format("[%s] [ERROR] %s", Thread.currentThread().getName(), msg)); + break; + case "DEBUG": + if (TEST_DEBUG) { + System.out.println(String.format("[%s] [DEBUG] %s", Thread.currentThread().getName(), msg)); + } + break; + default: + System.out.println(String.format("[%s] [%s] %s", Thread.currentThread().getName(), level, msg)); + break; + } + } + + private int getNumOfObjectsPerDb(int dbSeq, int tbSeq, String objType, TestDataStats testDataStats) { + int num = 0; + Random rand = new Random(); + switch (objType) { + case "TABLE": + num = dbSeq == 0 ? MAX_TABLES_PER_DATABASE : rand.nextInt(MED_TABLES_PER_DATABASE) + 1; + testDataStats.num_tables += num; + break; + case "COLUMN": + num = (dbSeq == 1 && (tbSeq == 0 || tbSeq == 1)) ? MAX_COLUMNS_PER_TABLE : rand.nextInt(MED_COLUMNS_PER_TABLE) + 1; + testDataStats.num_columns += num; + break; + case "PARTITION": + num = (dbSeq == 2 && (tbSeq == 0 || tbSeq == 1)) ? MAX_PARTITIONS_PER_TABLE : rand.nextInt(MED_PARTITIONS_PER_TABLE) + 1; + testDataStats.num_partitions += num; + break; + default: + break; + } + return num; + } + + private String createManagedTableCmd(String tblName, int numOfCols) { + StringBuilder command = new StringBuilder("CREATE TABLE IF NOT EXISTS "); + command.append(tblName + "("); + for(int i = 0; i < numOfCols; i++) { + command.append(COLUMN[0]); + command.append(i); + command.append(" " + COLUMN[1]); + if (i < numOfCols - 1) { + command.append(", "); + } + } + command.append(")"); + return command.toString(); + } + + private void createExternalTable(String tblName, int numOfPars, String extPath, + Statement statement) throws Exception { + exec(statement, String.format("CREATE EXTERNAL TABLE IF NOT EXISTS %s (num INT) PARTITIONED BY (%s %s) LOCATION '%s'", + tblName, COLUMN[0], COLUMN[1], extPath)); + + for(int i = 0; i < numOfPars; i ++) { + String strParPath = String.format("%s/%d", extPath, i); + createExtTablePath(strParPath); + exec(statement, String.format("ALTER TABLE %s ADD PARTITION (%s='%d') LOCATION '%s'", + tblName, COLUMN[0], i, strParPath)); + } + } + + private String createExtTablePath(String strPath) throws IOException { + String strFullPath = String.format("/%s/%s", EXT_TEST_DATA_PATH, strPath); + Path extPath = new Path(strFullPath); + if (fileSystem.exists(extPath)) { + fileSystem.delete(extPath, true); + } + if (fileSystem.mkdirs(extPath, new FsPermission((short) 0777))) { + fileSystem.setOwner(extPath, ADMIN, ADMIN); + } else { + throw new IOException("mkdir failed to create " + strFullPath); + } + return strFullPath; + } + + private void exec(Statement statement, String cmd) throws SQLException { + print("Executing [" + cmd + "]", "DEBUG"); + statement.execute(cmd); + } + + private String getRoleName() { + return getRoleName(new Random().nextInt(NUM_OF_ROLES)); + } + + private String getRoleName(final int seq) { + return String.format("%s_role_%d", TEST_DATA_PREFIX, seq); + } + + private String getGroupName() { + return getGroupName(new Random().nextInt(NUM_OF_GROUPS)); + } + + private String getGroupName(final int seq) { + return String.format("%s_group_%d", TEST_DATA_PREFIX, seq); + } + + private void grantPrivileges(Statement statement, String objName, String objType, + TestDataStats privilegeStatus) throws SQLException { + String roleName = getRoleName(); + switch (objType) { + case "DATABASE": + if (privilegeStatus.num_databases < DATABASE_PRIVILEGES_PER_THREAD) { + exec(statement, String.format("GRANT %s ON DATABASE %s TO ROLE %s", + (new Random().nextBoolean() ? "SELECT" : "INSERT"), objName, roleName)); + privilegeStatus.num_databases++; + } + break; + case "TABLE": + if (privilegeStatus.num_tables < TABLE_PRIVILEGES_PER_THREAD) { + exec(statement, String.format("GRANT %s ON TABLE %s TO ROLE %s", + (new Random().nextBoolean() ? "SELECT" : "INSERT"), objName, roleName)); + privilegeStatus.num_tables++; + } + break; + case "VIEW": + if (privilegeStatus.num_views < VIEW_PRIVILEGES_PER_THREAD) { + exec(statement, String.format("GRANT %s ON TABLE %s TO ROLE %s", + (new Random().nextBoolean() ? "SELECT" : "INSERT"), objName, roleName)); + privilegeStatus.num_views++; + } + break; + case "COLUMN": + if (privilegeStatus.num_columns < COLUMN_PRIVILEGES_PER_THREAD) { + exec(statement, String.format("GRANT SELECT(%s1) ON TABLE %s TO ROLE %s", COLUMN[0], objName, roleName)); + privilegeStatus.num_columns++; + } + break; + case "PARTITION": + if (privilegeStatus.num_partitions < COLUMN_PRIVILEGES_PER_THREAD) { + exec(statement, String.format("GRANT SELECT(num) ON TABLE %s TO ROLE %s", objName, roleName)); + privilegeStatus.num_partitions++; + } + break; + case "URI": + if (privilegeStatus.num_uris < URI_PRIVILEGES_PER_THREAD) { + exec(statement, String.format("GRANT ALL ON URI '%s' TO ROLE %s", objName, roleName)); + privilegeStatus.num_uris++; + } + break; + case "MAX_PAR": + exec(statement, String.format("GRANT SELECT(num) ON TABLE %s TO ROLE %s", objName, roleName)); + privilegeStatus.num_partitions += 1; + case "MAX_COL": + StringBuilder grantPars = new StringBuilder("GRANT SELECT("); + for(int i = 0; i < MAX_COLUMNS_PER_TABLE - 1; i++) { + grantPars.append(String.format("%s%d, ", COLUMN[0], i)); + } + grantPars.append(String.format("%s%d", COLUMN[0], MAX_COLUMNS_PER_TABLE - 1)); + grantPars.append(String.format(") ON TABLE %s TO ROLE %s", objName, roleName)); + exec(statement, grantPars.toString()); + break; + default: + break; + } + } + + //create access controlled objects and their privileges + private void createTestData(TestStatus testStatus) throws Exception { + long startTime = System.currentTimeMillis(); + try (Connection con = hiveServer.createConnection(ADMIN, ADMIN)) { + try (Statement statement = con.createStatement()) { + while (total_num_databases.get() < NUM_OF_DATABASES) { + TestDataStats dbTestDataStats = new TestDataStats(); + TestDataStats dbPrivilegeStatus = new TestDataStats(); + int dbSeq = total_num_databases.getAndIncrement(); + String testDb = String.format("%s_db_%d", TEST_DATA_PREFIX, dbSeq); + print("Creating database " + testDb + " and its objects and privileges.", "INFO"); + exec(statement, "CREATE DATABASE IF NOT EXISTS " + testDb); + exec(statement, "USE " + testDb); + grantPrivileges(statement, testDb, "DATABASE", dbPrivilegeStatus); + int num_of_tables = getNumOfObjectsPerDb(dbSeq, 0, "TABLE", dbTestDataStats); + for (int tb = 0; tb < num_of_tables; tb ++) { + String testView = "", extPath = ""; + String testTbl = String.format("%s_tbl_%d", testDb, tb); + //external table + if (tb % EXTERNAL_VS_MANAGED_TBLS != 0) { + print("Creating external table " + testTbl + " and its privileges in database " + testDb, "INFO"); + int num_of_pars = getNumOfObjectsPerDb(dbSeq, tb, "PARTITION", dbTestDataStats); + extPath = String.format("/%s/%s/%s", EXT_TEST_DATA_PATH, testDb, testTbl); + createExtTablePath(extPath); + createExternalTable(testTbl, num_of_pars, extPath, statement); + grantPrivileges(statement, testTbl, "TABLE", dbPrivilegeStatus); + if (num_of_pars == MAX_PARTITIONS_PER_TABLE) { + grantPrivileges(statement, testTbl, "MAX_PAR", dbPrivilegeStatus); + } else { + grantPrivileges(statement, testTbl, "PARTITION", dbPrivilegeStatus); + } + grantPrivileges(statement, extPath, "URI", dbPrivilegeStatus); + } else { //managed table + int num_of_columns = getNumOfObjectsPerDb(dbSeq, tb, "COLUMN", dbTestDataStats); + exec(statement, createManagedTableCmd(testTbl, num_of_columns)); + grantPrivileges(statement, testTbl, "TABLE", dbPrivilegeStatus); + if (num_of_columns == MAX_COLUMNS_PER_TABLE) { + grantPrivileges(statement, testTbl, "MAX_COL", dbPrivilegeStatus); + } else { + grantPrivileges(statement, testTbl, "COLUMN", dbPrivilegeStatus); + } + } + //view + if (dbTestDataStats.num_views < AVG_VIEWS_PER_DATABASE) { + testView = String.format("%s_view", testTbl); + exec(statement, "CREATE VIEW " + testView + " AS SELECT * FROM " + testTbl); + dbTestDataStats.num_views++; + grantPrivileges(statement, testView, "VIEW", dbPrivilegeStatus); + } + } + testStatus.testDataStats.addCounts(dbTestDataStats); + testStatus.privilegeStatus.addCounts(dbPrivilegeStatus); + testStatus.elapsed_time = (System.currentTimeMillis() - startTime) / 1000L; + } + } + } + print("Thread done with creating data: " + testStatus.toString(), "INFO"); + } + + /** + * Attempt to create scale data in HMS and Sentry + * @return + * @throws Exception + */ + public TestStatus create() throws Exception { + long createStartTime = System.currentTimeMillis(); + TestStatus testStatus = new TestStatus(); + try (Connection con = hiveServer.createConnection(ADMIN, ADMIN)) { + try (Statement statement = con.createStatement()) { + //1. create roles + print("Creating " + NUM_OF_ROLES + " roles.", "INFO"); + for(int i = 0; i < NUM_OF_ROLES; i++) { + exec(statement, "CREATE ROLE " + getRoleName(i)); + } + //2. map roles to groups + print("Assigning " + NUM_OF_ROLES + " roles to " + NUM_OF_GROUPS + " groups.", "INFO"); + String largestGroup = getGroupName(); + for(int i = 0; i < MAX_ROLES_PER_GROUP; i++) { + exec(statement, String.format("GRANT ROLE %s TO GROUP %s", getRoleName(i), largestGroup)); + } + for(int i = 0; i < NUM_OF_ROLES; i++) { + exec(statement, String.format("GRANT ROLE %s TO GROUP %s", getRoleName(i), getGroupName())); + } + } + } + //3. create HMS objects and privileges + ExecutorService pool = Executors.newFixedThreadPool(NUM_OF_THREADS_TO_CREATE_DATA); + List<Future<TestStatus>> jobs = new ArrayList<>(); + for (int i = 0; i < NUM_OF_THREADS_TO_CREATE_DATA; i++) { + Future<TestStatus> job = pool.submit(new Callable<TestStatus>() { + @Override + public TestStatus call() { + TestStatus threadTestStatus = new TestStatus(); + try { + createTestData(threadTestStatus); + } catch (Exception ex) { + threadTestStatus.failed += 1; + print("create() throws exception: " + ex + ", Stacktrace: " + Arrays.deepToString(ex.getStackTrace()), "ERROR"); + } + finally { + return threadTestStatus; + } + } + }); + jobs.add(job); + } + print("Submitted " + jobs.size() + " jobs", "INFO"); + for(Future<TestStatus> job : jobs) { + try { + long jobStartTime = System.currentTimeMillis(), jobElapsedTimeInSecs = 0L; + while (!job.isDone() && jobElapsedTimeInSecs < TOOL_MAX_RUNNING_SECS) { + Thread.sleep(THREAD_WAITING_TIME_SECS * 1000); //millis + jobElapsedTimeInSecs = (System.currentTimeMillis() - jobStartTime) / 1000L; + print("Waiting for all threads to finish, elapsed time = " + jobElapsedTimeInSecs + " seconds.", "INFO"); + } + if (!job.isDone()) { + testStatus.failed += 1; + print("Longest waiting time has passed. Thread fails to return.", "ERROR"); + } else { + TestStatus threadTestStatus = job.get(); + if (threadTestStatus != null) { + testStatus.testDataStats.addCounts(threadTestStatus.testDataStats); + testStatus.privilegeStatus.addCounts(threadTestStatus.privilegeStatus); + testStatus.failed += threadTestStatus.failed; + } else { + print("Thread returns null status.", "ERROR"); + testStatus.failed += 1; + } + } + } catch (Exception ex) { + print("Thread job throws exception: " + ex, "ERROR"); + testStatus.failed += 1; + } + } + pool.shutdown(); + try { + if (!pool.awaitTermination(1, TimeUnit.MINUTES)) { + pool.shutdownNow(); + } + } catch (InterruptedException ex) { + print("Failed to shut down pool: " + ex, "ERROR"); + } finally { + testStatus.elapsed_time = (System.currentTimeMillis() - createStartTime) / 1000L; //secs + return testStatus; + } + } + + /** + * Clean up created scale data + * @throws Exception + */ + public void cleanUpScaleData() throws Exception { + try (Connection con = hiveServer.createConnection(ADMIN, ADMIN)) { + try (Statement statement = con.createStatement()) { + ResultSet resultSet = statement.executeQuery("SHOW DATABASES"); + while (resultSet.next()) { + String dbName = resultSet.getString(1); + if (!dbName.startsWith(TEST_DATA_PREFIX)) { + continue; + } + try (Statement statement1 = con.createStatement()) { + exec(statement1, "DROP DATABASE " + dbName + " CASCADE"); + } catch (Exception ex) { + print("Fails to clean up DATABASE " + dbName + ": " + ex, "ERROR"); + } + } + if (resultSet != null) { + resultSet.close(); + } + resultSet = statement.executeQuery("SHOW ROLES"); + while (resultSet.next()) { + String roleName = resultSet.getString(1); + if (!roleName.startsWith(TEST_DATA_PREFIX)) { + continue; + } + try (Statement statement1 = con.createStatement()) { + exec(statement1, "DROP ROLE " + roleName); + } catch (Exception ex) { + print("Fails to clean up ROLE " + roleName + ": " + ex, "ERROR"); + } + } + if (resultSet != null) { + resultSet.close(); + } + } + } + } +} http://git-wip-us.apache.org/repos/asf/sentry/blob/7b3878cf/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/TestTools.java ---------------------------------------------------------------------- diff --git a/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/TestTools.java b/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/TestTools.java new file mode 100644 index 0000000..a8714c4 --- /dev/null +++ b/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/TestTools.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sentry.tests.e2e.tools; + +import com.google.common.collect.ImmutableMap; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.HelpFormatter; + +/** + * This class runs all test tools for sentry-hive integration: + * e.g. scale test tool, long haul tests, stress tests; + * To run it: + * hadoop jar test-tools.jar --help + */ +public class TestTools { + private static ImmutableMap<String, String> COMMANDS = ImmutableMap.of( + "help", "help", + "scale", "scale", + "cleanUpScaleData", "clean-scale" + ); + + private TestTools() { + // Make constructor private to avoid instantiation + } + + public static void main(String[] args) throws Exception { + CommandLineParser parser = new GnuParser(); + Options options = new Options(); + options.addOption(COMMANDS.get("help").substring(0,1), COMMANDS.get("help"), + false, "Print this help text."); + options.addOption(COMMANDS.get("scale").substring(0,1), COMMANDS.get("scale"), + false, "Run scale test tool to create test data."); + options.addOption(COMMANDS.get("cleanUpScaleData").substring(0,1), COMMANDS.get("cleanUpScaleData"), + false, "Clean up scale test data."); + CommandLine commandLine = parser.parse(options, args, true); + if (commandLine.hasOption(COMMANDS.get("help").substring(0,1)) + || commandLine.hasOption(COMMANDS.get("help"))) { + printHelp(options, null); + } else if (commandLine.hasOption(COMMANDS.get("scale").substring(0,1)) + || commandLine.hasOption(COMMANDS.get("scale"))) { + CreateSentryTestScaleData createSentryTestScaleData = new CreateSentryTestScaleData(); + CreateSentryTestScaleData.TestStatus testStatus = createSentryTestScaleData.create(); + if (testStatus != null && testStatus.testDataStats != null && testStatus.privilegeStatus != null) { + System.out.println("Test results:"); + System.out.println(testStatus.toString()); + } + } else if (commandLine.hasOption(COMMANDS.get("cleanUpScaleData").substring(0,1)) + || commandLine.hasOption(COMMANDS.get("cleanUpScaleData"))) { + CreateSentryTestScaleData createSentryTestScaleData = new CreateSentryTestScaleData(); + createSentryTestScaleData.cleanUpScaleData(); + } else { + printHelp(options, null); + } + } + + private static void printHelp(Options options, String msg) { + String sentry = "sentry"; + if (msg != null) { + sentry = msg + sentry; + } + (new HelpFormatter()).printHelp(sentry, options); + System.exit(1); + } +} http://git-wip-us.apache.org/repos/asf/sentry/blob/7b3878cf/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/sentry_scale_test_config.xml ---------------------------------------------------------------------- diff --git a/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/sentry_scale_test_config.xml b/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/sentry_scale_test_config.xml new file mode 100644 index 0000000..caf1ff0 --- /dev/null +++ b/sentry-tests/sentry-tests-hive/src/test/java/org/apache/sentry/tests/e2e/tools/sentry_scale_test_config.xml @@ -0,0 +1,119 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> + +<configuration> + <!--scale data --> + <property> + <name>sentry.scale.test.threads</name> + <value>50</value> + </property> + <property> + <name>sentry.scale.test.num.databases</name> + <value>300</value> + </property> + <property> + <name>sentry.scale.test.max.tables.per.database</name> + <value>600</value> + </property> + <property> + <name>sentry.scale.test.med.tables.per.database</name> + <value>3</value> + </property> + <property> + <name>sentry.scale.test.avg.views.per.database</name> + <value>2</value> + </property> + <property> + <name>sentry.scale.test.max.partitions.per.table</name> + <value>600</value> + </property> + <property> + <name>sentry.scale.test.med.partitions.per.table</name> + <value>10</value> + </property> + <property> + <name>sentry.scale.test.max.columns.per.table</name> + <value>100</value> + </property> + <property> + <name>sentry.scale.test.med.columns.per.table</name> + <value>10</value> + </property> + <property> + <name>sentry.scale.test.external.vs.managed.tables</name> + <value>20</value> + </property> + <property> + <name>sentry.scale.test.num.roles</name> + <value>1000</value> + </property> + <property> + <name>sentry.scale.test.num.groups</name> + <value>500</value> + </property> + <property> + <name>sentry.scale.test.max.roles.per.group</name> + <value>10</value> + </property> + <property> + <name>sentry.scale.test.database.privileges.per.thread</name> + <value>200</value> + </property> + <property> + <name>sentry.scale.test.table.privileges.per.thread</name> + <value>200</value> + </property> + <property> + <name>sentry.scale.test.view.privileges.per.thread</name> + <value>2</value> + </property> + <property> + <name>sentry.scale.test.column.privileges.per.thread</name> + <value>2</value> + </property> + <property> + <name>sentry.scale.test.uri.privileges.per.thread</name> + <value>1</value> + </property> + <!--HiveService admin user--> + <property> + <name>sentry.e2etest.admin.group</name> + <value>hive</value> + </property> + <property> + <name>sentry.e2e.hive.keytabs.location</name> + <value>/cdep/keytabs</value> + </property> + <!--add it into prefix list --> + <property> + <name>sentry.e2etest.scale.data.dir</name> + <value>extdata</value> + </property> + <property> + <name>sentry.e2etest.scale.data.debug</name> + <value>false</value> + </property> + <property> + <name>sentry.tests.e2e.tools.scale.max.running.seconds</name> + <value>1800</value> + </property> + <property> + <name>sentry.tests.e2e.tools.scale.thread.waiting.seconds</name> + <value>60</value> + </property> +</configuration> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/sentry/blob/7b3878cf/sentry-tests/sentry-tests-hive/src/test/scripts/scale-test/create-many-dbs-tables.sh ---------------------------------------------------------------------- diff --git a/sentry-tests/sentry-tests-hive/src/test/scripts/scale-test/create-many-dbs-tables.sh b/sentry-tests/sentry-tests-hive/src/test/scripts/scale-test/create-many-dbs-tables.sh deleted file mode 100755 index dcdddeb..0000000 --- a/sentry-tests/sentry-tests-hive/src/test/scripts/scale-test/create-many-dbs-tables.sh +++ /dev/null @@ -1,277 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# This script means to create many testing objects (database, tables, -# partitions and a wide table with many partitions). The way to run it: -# !/usr/bin/env bash -# export HS2="HOSTNAME" -# export REALM="REALM.NAME" -# bash /root/tests/create-many-dbs-tables.sh & -# bash /root/tests/create-many-dbs-tables.sh & - -if [[ ${HS2} == "" ]]; then - echo "error: need to export HS2=hostname" - exit 1 -fi - -if [[ ${REALM} == "" ]]; then - echo "error: need to export REALM" - exit 1 -fi - -# Define default test scale -NUM_OF_DATABASES=60 -NUM_OF_TABLES_PER_DATABASE=20 -NUM_OF_ROLES_FOR_DATABASES=60 # <= NUM_OF_DATABASES -NUM_OF_ROLES_FOR_TABLES_PER_DATABASE=5 # <= NUM_OF_TABLES_PER_DATABASE -NUM_OF_GROUPS=60 # >= NUM_OF_DATABASES - -# Number of partitions varies between max and min -MAX_NUM_OF_PARTITIONS_PER_TABLE=10 -MIN_NUM_OF_PARTITIONS_PER_TABLE=2 - -BASE_EXTERNAL_DIR="/data" -LOCAL_OUTPUT_DIR="/tmp" -BL="beeline -n hive -p hive --silent=true -u 'jdbc:hive2://${HS2}:10000/default;principal=hive/_HOST@${REALM}'" - -# Number of external partitions wide tables have -declare -a NUM_OF_WIDE_TABLE_PARTITIONS=(10 100 1000) -wLen=${#NUM_OF_WIDE_TABLE_PARTITIONS[@]} - -process_id=$$ - -while getopts "d:t:g:b:l" OPTION -do case "${OPTION}" in - b) BASE_EXTERNAL_DIR="$OPTARG";; - d) NUM_OF_DATABASES="$OPTARG";; - l) LOCAL_OUTPUT_DIR="$OPTARG";; - t) NUM_OF_TABLES_PER_DATABASE="$OPTARG";; - g) NUM_OF_GROUPS="$OPTARG";; - [?]) print >&2 "Usage: $0 [-b BASE_EXTERNAL_DIR] [-d NUM_OF_DATABASES] [-l LOCAL_OUTPUT_DIR] [-t NUM_OF_TABLES_PER_DATABASE] [-g NUM_OF_GROUPS]" - exit 1;; - esac -done - -NUM_OF_PERMISSIONS=$(( NUM_OF_ROLES_FOR_DATABASES + NUM_OF_ROLES_FOR_TABLES_PER_DATABASE * NUM_OF_DATABASES)) -AVG_NUM_OF_PARTITIONS_PER_TABLE=$((( MAX_NUM_OF_PARTITIONS_PER_TABLE + MIN_NUM_OF_PARTITIONS_PER_TABLE) / 2 )) - -echo "[${process_id}] Scale numbers:" -echo "[${process_id}] number of databases: ${NUM_OF_DATABASES}" -echo "[${process_id}] number of tables: $((NUM_OF_TABLES_PER_DATABASE * NUM_OF_DATABASES))" -echo "[${process_id}] number of wide tables: ${wLen}" -echo "[${process_id}] number of partitions per table: ${AVG_NUM_OF_PARTITIONS_PER_TABLE}" -echo "[${process_id}] number of min partitions per wide table: ${NUM_OF_WIDE_TABLE_PARTITIONS[0]}" -echo "[${process_id}] number of max partitions per wide table: ${NUM_OF_WIDE_TABLE_PARTITIONS[${wLen}-1]}" -echo "[${process_id}] number of permissions: ${NUM_OF_PERMISSIONS}" -echo "[${process_id}] number of groups: ${NUM_OF_GROUPS}" - -# Random string as prefix for test databases and tables -prefix_string=$(cat /dev/urandom | tr -dc 'a-z' | fold -w 4 | head -n 1) -prefix_string=${prefix_string}$(date +%s | cut -c1-4) - -DB_NAME=${prefix_string}_db - -function validate_ret () { - ret=$1 - if [[ ${ret} != "" && ${ret} -ne 0 ]]; then - echo "ERROR!! when running query in bulk mode" - exit $ret - fi -} - -function get_group () { - count=$1 - group_name=group_$((count % NUM_OF_GROUPS)) - echo "$group_name" -} - -# Create groups -function create_groups () { - for g in $(seq ${NUM_OF_GROUPS}); do - group_name=$(get_group $g) - getent passwd ${group_name} | grep "${group_name}" 1>&2>/dev/null - if [[ $? -ne 0 ]]; then - sudo groupadd ${group_name} - sudo useradd -g ${group_name} ${group_name} - fi - done -} - -# Convenience function to create one table with many external partitons -function create_wide_table () { - db_name=$1 - tbl_name=$2 - num_of_pars=$3 - file_name=$4 - dir_file_name=$5 - echo "-- [${process_id}] Create ${tbl_name} in ${db_name} with ${num_of_pars} external partitions; " >> ${file_name} - echo "CREATE DATABASE IF NOT EXISTS ${db_name}; " >> ${file_name} - echo "USE ${db_name};" >> ${file_name} - table_dir=${BASE_EXTERNAL_DIR}/${db_name}/${tbl_name} - echo "sudo -u hdfs hdfs dfs -rm -R -skipTrash ${table_dir} 2>/dev/null" >> ${dir_file_name} - echo "DROP TABLE IF EXISTS ${tbl_name}; " >> ${file_name} - echo "CREATE TABLE ${tbl_name} (s STRING, i INT) PARTITIONED BY (par INT);" >> ${file_name} - echo "-- create ${num_of_pars} partitions on table ${tbl_name}" >> ${file_name} - for p in $(seq ${num_of_pars}); do - dir=${table_dir}/$p - echo "sudo -u hdfs hdfs dfs -mkdir -p ${dir}" >> ${dir_file_name} - echo "ALTER TABLE ${tbl_name} ADD PARTITION (par=$p) LOCATION '${dir}';" >> ${file_name} - done -} - -# Convenience function to create wide tables with many external partitions -function create_external_par_dirs_bulk_file () { - file_name=$1 - dir_file_name=$2 - echo "-- [${process_id}] Start bulk process to create wide tables" > ${file_name} - echo "# [${process_id}] Start to create external dirs for partitions" > ${dir_file_name} - db_id=$(awk -v n="${NUM_OF_DATABASES}" 'BEGIN{srand();print int(rand()*n+1)}') - db_name=${DB_NAME}_${db_id} - for p in "${!NUM_OF_WIDE_TABLE_PARTITIONS[@]}"; do - tbl_name=${db_name}_wide_tbl_$p - create_wide_table ${db_name} ${tbl_name} ${NUM_OF_WIDE_TABLE_PARTITIONS[p]} ${file_name} ${dir_file_name} - done - chmod a+x ${file_name} - chmod a+x ${dir_file_name} -} - -# Create internal databases and their tables in one bulk file -function create_dbs_tbls_bulk_file () { - file_name=$1 - echo "-- [${process_id}] start bulk load " > ${file_name} - for d in $(seq ${NUM_OF_DATABASES}); do - db_name=${DB_NAME}_${d} - echo "drop database if exists ${db_name}; " >> ${file_name} - echo "create database ${db_name}; " >> ${file_name} - echo "use ${db_name};" >> ${file_name} - NUM_OF_COLS=$(awk -v mn="${MIN_NUM_OF_PARTITIONS_PER_TABLE}" -v mx="${MAX_NUM_OF_PARTITIONS_PER_TABLE}" 'BEGIN{srand();print int(rand()*(mx-mn)+1)}') - NUM_OF_PARS=$(awk -v mn="${MIN_NUM_OF_PARTITIONS_PER_TABLE}" -v mx="${MAX_NUM_OF_PARTITIONS_PER_TABLE}" 'BEGIN{srand();print int(rand()*(mx-mn)+1)}') - - for t in $(seq ${NUM_OF_TABLES_PER_DATABASE}); do - tbl_name=${db_name}_tbl_${t} - # create table - echo "create table ${tbl_name} (col_start INT, " >> ${file_name} - for c in $(seq ${NUM_OF_COLS}); do - echo "col_${c} STRING, " >> ${file_name} - done - echo "col_end INT) partitioned by (par_start STRING, " >> ${file_name} - # create many partitions - for p in $(seq ${NUM_OF_PARS}); do - echo "par_${p} INT, " >> ${file_name} - done - echo "par_end STRING); " >> ${file_name} - done - done - chmod a+x ${file_name} -} - -# Create database roles -function create_dbs_roles () { - db_file_name=$1 - total_db_permissions=0 - echo "-- [${process_id}] Start to create database roles" > ${db_file_name} - for d in $(seq ${NUM_OF_ROLES_FOR_DATABASES}); do - db_name=${DB_NAME}_${d} - role_name=${db_name}_db_role_${d} - group_name=$(get_group $d) - echo "create role ${role_name}; " >> ${db_file_name} - echo "grant all on database ${db_name} to role ${role_name}; " >> ${db_file_name} - echo "grant ${role_name} to group ${group_name};" >> ${db_file_name} - done - chmod a+x ${db_file_name} -} - -# Create table roles -function create_tbls_roles () { - tbl_file_name=$1 - echo "-- [${process_id}] Start to create table roles;" > ${tbl_file_name} - # create table roles - for d in $(seq ${NUM_OF_DATABASES}); do - db_name=${DB_NAME}_${d} - echo "USE ${db_name};" >> ${tbl_file_name} - for t in $(seq ${NUM_OF_ROLES_FOR_TABLES_PER_DATABASE}); do - tbl_name=${db_name}_tbl_${t} - role_name=${tbl_name}_role_${t} - echo "CREATE ROLE ${role_name};" >> ${tbl_file_name} - rand_number=$(awk 'BEGIN{srand();print int(rand()*3)}') - case "$((rand_number % 3))" in - 0) echo "grant all on table ${tbl_name} to role ${role_name}; " >> ${tbl_file_name} - ;; - 1) echo "grant insert on table ${tbl_name} to role ${role_name}; " >> ${tbl_file_name} - ;; - *) echo "grant select on table ${tbl_name} to role ${role_name}; " >> ${tbl_file_name} - ;; - esac - group_name=$(get_group $d) - echo "grant role ${role_name} to group ${group_name}; " >> ${tbl_file_name} - done - done - chmod a+x ${tbl_file_name} -} - -########################### -# Start from here! -########################### -create_groups -echo "# [${process_id}] Created ${NUM_OF_GROUPS} groups" - -# Use Hive to create the partitions because it supports bulk adding of partitions. -# Hive doesn't allow fully qualified table names in ALTER statements, so start with a -# USE <db>. -create_tables_file_name=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}_bulk_tables.q -create_dbs_tbls_bulk_file ${create_tables_file_name} -echo "# [${process_id}] Created ${create_tables_file_name} to create databases and tables in bulk mode" - -create_wide_tables_file_name=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}_bulk_wide_tables.q -create_wide_tables_dir_file_name=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}_bulk_wide_tables_dirs.sh -create_external_par_dirs_bulk_file ${create_wide_tables_file_name} ${create_wide_tables_dir_file_name} -echo "# [${process_id}] Created ${create_wide_tables_file_name} to create wide tables with external partitions in bulk mode" -echo "# [${process_id}] Created ${create_wide_tables_dir_file_name} to create external dirs for external partitions in bulk mode" - -create_db_role_file_name=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}_bulk_db_roles.q -create_dbs_roles ${create_db_role_file_name} -echo "# [${process_id}] Created ${create_db_role_file_name} to create database roles" - -create_tbl_role_file_name=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}_bulk_tbl_roles.q -create_tbls_roles ${create_tbl_role_file_name} -echo "# [${process_id}] Created ${create_tbl_role_file_name} to create table roles" - -sudo -u hive hive -S -f ${create_tables_file_name} -validate_ret $? -echo "# [${process_id}] Succeessfully ran bulk file ${create_tables_file_name} to create databases and tables" - -. ${create_wide_tables_dir_file_name} -echo "# [${process_id}] Successfully ran ${create_wide_tables_dir_file_name} to create dirs for external partitions" - -sudo -u hive hive -S -f ${create_wide_tables_file_name} -validate_ret $? -echo "# [${process_id}] Successfully ran bulk file ${create_wide_tables_file_name} to create wide tables with external partitions" - -sudo -u hive ${BL} -f ${create_db_role_file_name} 1>/dev/null # to remove white lines after execution -validate_ret $? -echo "# [${process_id}] Successfully created database level roles and privileges" - -sudo -u hive ${BL} -f ${create_tbl_role_file_name} 1>/dev/null # to remove white lines after execution -validate_ret $? -echo "# [${process_id}] Successfully created table level roles and privileges" - -res_file=${LOCAL_OUTPUT_DIR}/hive_${prefix_string}.res -echo "-- [${process_id}] List all databases and roles in ${res_file}" > ${res_file} -sudo -u hive ${BL} -e "show databases" 2>/dev/null 1>>${res_file} -sudo -u hive ${BL} -e "show roles" 2>/dev/null 1>>${res_file} -echo "[${process_id}] Successfully listed all databases and roles in ${res_file}"
