HIVE-19902: Provide Metastore micro-benchmarks (Alexander Kolbasov, via Peter Vary)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d05b7cf0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d05b7cf0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d05b7cf0 Branch: refs/heads/master Commit: d05b7cf096cda09dde20c1e2d81e4bf7b1879247 Parents: 14b972e Author: Alexander Kolbasov <[email protected]> Authored: Mon Aug 27 12:24:29 2018 +0200 Committer: Peter Vary <[email protected]> Committed: Mon Aug 27 12:29:32 2018 +0200 ---------------------------------------------------------------------- .../metastore-benchmarks/README.md | 128 +++++ .../metastore-benchmarks/pom.xml | 164 ++++++ .../hadoop/hive/metastore/tools/BenchData.java | 41 ++ .../hive/metastore/tools/BenchmarkTool.java | 255 +++++++++ .../hive/metastore/tools/HMSBenchmarks.java | 447 +++++++++++++++ .../src/main/resources/log4j.properties | 6 + .../src/main/resources/log4j2.xml | 33 ++ standalone-metastore/metastore-tools/pom.xml | 135 +++++ .../metastore-tools/tools-common/pom.xml | 113 ++++ .../hive/metastore/tools/BenchmarkSuite.java | 266 +++++++++ .../hadoop/hive/metastore/tools/Constants.java | 33 ++ .../hadoop/hive/metastore/tools/HMSClient.java | 428 ++++++++++++++ .../hive/metastore/tools/MicroBenchmark.java | 123 ++++ .../hadoop/hive/metastore/tools/Util.java | 554 +++++++++++++++++++ .../hive/metastore/tools/HMSClientTest.java | 206 +++++++ .../hadoop/hive/metastore/tools/UtilTest.java | 81 +++ standalone-metastore/pom.xml | 9 + 17 files changed, 3022 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/metastore-benchmarks/README.md ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/README.md b/standalone-metastore/metastore-tools/metastore-benchmarks/README.md new file mode 100644 index 0000000..a8c0a41 --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/README.md @@ -0,0 +1,128 @@ +## Hive Metastore micro-benchmarks + +## Installation + + mvn clean install + +You can run tests as well. Just set `HMS_HOST` environment variable to some HMS instance which is +capable of running your requests (non-kerberised one) and run + + mvn install + +target directory has two mega-jars which have all the dependencies. + +Alternatively you can use [bin/hbench](../bin/hbench) script which use Maven to run the code. + +## HmsBench usage + + Usage: BenchmarkTool [-ChlV] [--sanitize] [--confdir=<confDir>] + [--params=<nParameters>] [--savedata=<dataSaveDir>] + [--separator=<csvSeparator>] [-d=<dbName>] [-H=URI] + [-L=<spinCount>] [-N=<instances>] [-o=<outputFile>] + [-P=<port>] [-t=<tableName>] [-T=<nThreads>] [-W=<warmup>] + [-E=<exclude>]... [-M=<matches>]... + --confdir=<confDir> configuration directory + --params=<nParameters> number of table/partition parameters + Default: 0 + --sanitize sanitize results (remove outliers) + --savedata=<dataSaveDir> + save raw data in specified dir + --separator=<csvSeparator> + CSV field separator + Default: + -C, --csv produce CSV output + -d, --db=<dbName> database name + -E, --exclude=<exclude> test name patterns to exclude + -h, --help Show this help message and exit. + -H, --host=URI HMS Host + -l, --list list matching benchmarks + -L, --spin=<spinCount> spin count + Default: 100 + -M, --pattern=<matches> test name patterns + -N, --number=<instances> umber of object instances + Default: 100 + -o, --output=<outputFile> output file + -P, --port=<port> HMS Server port + Default: 9083 + -t, --table=<tableName> table name + -T, --threads=<nThreads> number of concurrent threads + Default: 2 + -V, --version Print version information and exit. + -W, --warmup=<warmup> warmup count + Default: 15 + +### Using single jar + + java -jar hbench-jar-with-dependencies.jar <optins> [test]... + +### Using hbench on kerberized cluster + + java -jar hbench-jar-with-dependencies.jar -H `hostname` <optins> [test]... + +### Examples + +1. Run tests with 500 objects created, 10 times warm-up and exclude concurrent operations and drop operations + + java -jar hmsbench-jar-with-dependencies.jar -H `hostname` -N 500 -W 10 -E 'drop.*' -E 'concurrent.*' + +2. Run tests, produce output in tab-separated format and write individual data points in 'data' directory + + java -jar hmsbench-jar-with-dependencies.jar -H host.com -o result.csv --csv --savedata data + +3. Run tests on localhost + * save raw data in directory /tmp/benchdata + * sanitize results (remove outliers) + * produce tab-separated file + * use table name 'testbench' + * create 100 parameters in partition tests + * run with 100 and thousand partitions + + + java -jar hmsbench-jar-with-dependencies.jar -H `hostname` \ + --savedata /tmp/benchdata \ + --sanitize \ + -N 100 -N 1000 \ + -o bench_results.csv -C \ + -d testbench \ + --params=100 + +Result: + + Operation Mean Med Min Max Err% + addPartition 16.97 16.89 13.84 27.10 8.849 + addPartitions.100 315.9 313.7 274.2 387.0 6.485 + addPartitions.1000 3016 3017 2854 3226 2.861 + concurrentPartitionAdd#2.100 1289 1289 1158 1434 4.872 + concurrentPartitionAdd#2.1000 1.221e+04 1.226e+04 1.074e+04 1.354e+04 5.077 + createTable 18.21 18.15 14.78 24.17 10.30 + dropDatabase 31.13 30.86 26.46 39.09 8.192 + dropDatabase.100 1436 1435 1165 1637 5.929 + dropDatabase.1000 1.376e+04 1.371e+04 1.272e+04 1.516e+04 3.864 + dropPartition 29.43 28.81 24.79 63.24 13.97 + dropPartitions.100 686.5 680.3 575.1 819.8 6.544 + dropPartitions.1000 6247 6166 5616 7535 6.435 + dropTable 27.53 27.34 23.23 35.35 9.241 + dropTableWithPartitions 36.41 36.19 31.33 50.41 8.310 + dropTableWithPartitions.100 793.3 792.0 687.9 987.4 7.293 + dropTableWithPartitions.1000 6981 6964 6336 9179 5.115 + getNid 0.6760 0.6512 0.4482 1.530 21.93 + getPartition 6.242 6.227 5.155 9.791 11.27 + getPartitionNames 4.888 4.660 3.842 13.12 22.53 + getPartitionNames.100 5.031 4.957 3.995 7.156 10.77 + getPartitionNames.1000 8.998 8.915 8.016 12.65 7.520 + getPartitions.100 9.717 9.475 7.883 13.08 9.835 + getPartitions.1000 32.60 32.03 28.30 50.02 9.036 + getPartitionsByNames 6.506 6.384 4.810 9.503 15.51 + getPartitionsByNames.100 9.312 9.025 7.955 18.44 14.46 + getPartitionsByNames.1000 38.47 37.49 34.57 62.51 10.23 + getTable 4.092 3.868 3.132 12.20 24.56 + listDatabases 0.6919 0.6835 0.5309 1.053 12.25 + listPartition 5.556 5.465 4.737 7.969 10.00 + listPartitions.100 9.087 8.874 7.630 12.13 10.86 + listPartitions.1000 33.79 32.55 28.63 46.15 11.14 + listTables 0.9851 0.9761 0.7948 1.378 12.07 + listTables.100 1.416 1.374 1.051 3.228 16.68 + listTables.1000 4.327 4.183 3.484 6.604 14.38 + renameTable 46.67 46.09 40.16 62.46 7.536 + renameTable.100 915.8 915.9 831.0 1022 3.833 + renameTable.1000 9015 8972 8073 1.137e+04 4.228 http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml new file mode 100644 index 0000000..079a07b --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/pom.xml @@ -0,0 +1,164 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>hive-metastore-tools</artifactId> + <groupId>org.apache.hive</groupId> + <version>4.0.0-SNAPSHOT</version> + </parent> + <modelVersion>4.0.0</modelVersion> + + <packaging>jar</packaging> + + <artifactId>hive-metastore-benchmarks</artifactId> + <name>Hive metastore benchmarks</name> + + <dependencies> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>metastore-tools-common</artifactId> + <version>${hive.version}</version> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>org.apache.hive.hcatalog</groupId> + <artifactId>hive-hcatalog-server-extensions</artifactId> + </dependency> + <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 --> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + </dependency> + <dependency> + <groupId>org.apache.logging.log4j</groupId> + <artifactId>log4j-slf4j-impl</artifactId> + </dependency> + <!-- https://mvnrepository.com/artifact/org.jetbrains/annotations --> + <dependency> + <groupId>org.jetbrains</groupId> + <artifactId>annotations</artifactId> + </dependency> + <dependency> + <groupId>info.picocli</groupId> + <artifactId>picocli</artifactId> + </dependency> + <!-- https://mvnrepository.com/artifact/org.apache.maven.plugins/maven-jxr-plugin --> + <dependency> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jxr-plugin</artifactId> + <version>2.5</version> + </dependency> + <!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-api --> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter-api</artifactId> + <scope>test</scope> + </dependency> + <!-- https://mvnrepository.com/artifact/org.junit.platform/junit-platform-runner --> + <dependency> + <groupId>org.junit.platform</groupId> + <artifactId>junit-platform-runner</artifactId> + <scope>test</scope> + </dependency> + <!-- https://mvnrepository.com/artifact/org.hamcrest/hamcrest-all --> + <dependency> + <groupId>org.hamcrest</groupId> + <artifactId>hamcrest-all</artifactId> + <scope>test</scope> + </dependency> + </dependencies> + + <profiles> + <profile> + <!-- + The dist profile generates two full jars with dependencies - obe for HMSBenchmarks and + another for HMSTool. + --> + <id>dist</id> + <build> + <plugins> + <plugin> + <artifactId>maven-assembly-plugin</artifactId> + <executions> + <execution> + <configuration> + <archive> + <manifest> + <mainClass>org.apache.hadoop.hive.metastore.tools.BenchmarkTool</mainClass> + <addClasspath>true</addClasspath> + </manifest> + </archive> + <descriptorRefs> + <descriptorRef>jar-with-dependencies</descriptorRef> + </descriptorRefs> + <finalName>hmsbench</finalName> + </configuration> + <id>make-assembly-hclient</id> <!-- this is used for inheritance merges --> + <phase>package</phase> <!-- bind to the packaging phase --> + <goals> + <goal>single</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> + </profiles> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <configuration> + <source>1.8</source> + <target>1.8</target> + <compilerId>javac-with-errorprone</compilerId> + <forceJavacCompilerUse>true</forceJavacCompilerUse> + </configuration> + <!-- + Error Prone integration + --> + <dependencies> + <dependency> + <groupId>org.codehaus.plexus</groupId> + <artifactId>plexus-compiler-javac-errorprone</artifactId> + <version>${javac.errorprone.version}</version> + </dependency> + <dependency> + <groupId>com.google.errorprone</groupId> + <artifactId>error_prone_core</artifactId> + <version>${errorprone.core.version}</version> + </dependency> + </dependencies> + </plugin> + </plugins> + </build> + + <reporting> + <plugins> + <plugin> + <!-- This is needed for checkstyle --> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jxr-plugin</artifactId> + <version>2.5</version> + </plugin> + </plugins> + </reporting> + +</project> http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchData.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchData.java b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchData.java new file mode 100644 index 0000000..db620a2 --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchData.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools; + +/** + * Container for all data needed for running a benchmark. + */ +final class BenchData { + private HMSClient client; + final String dbName; + final String tableName; + + BenchData(String dbName, String tableName) { + this.dbName = dbName; + this.tableName = tableName; + } + + HMSClient getClient() { + return client; + } + + void setClient(HMSClient client) { + this.client = client; + } +} + http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java new file mode 100644 index 0000000..041cd76 --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Formatter; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; + +import static org.apache.hadoop.hive.metastore.tools.Constants.HMS_DEFAULT_PORT; +import static org.apache.hadoop.hive.metastore.tools.HMSBenchmarks.*; +import static org.apache.hadoop.hive.metastore.tools.Util.getServerUri; +import static picocli.CommandLine.Command; +import static picocli.CommandLine.Option; + +/** + * Command-line access to Hive Metastore. + */ +@SuppressWarnings( {"squid:S106", "squid:S1148"}) // Using System.out +@Command(name = "BenchmarkTool", + mixinStandardHelpOptions = true, version = "1.0", + showDefaultValues = true) + +public class BenchmarkTool implements Runnable { + private static final Logger LOG = LoggerFactory.getLogger(BenchmarkTool.class); + private static final TimeUnit scale = TimeUnit.MILLISECONDS; + private static final String CSV_SEPARATOR = "\t"; + private static final String TEST_TABLE = "bench_table"; + + + @Option(names = {"-H", "--host"}, description = "HMS Host", paramLabel = "URI") + private String host; + + @Option(names = {"-P", "--port"}, description = "HMS Server port") + private Integer port = HMS_DEFAULT_PORT; + + @Option(names = {"-d", "--db"}, description = "database name") + private String dbName = "bench_" + System.getProperty("user.name"); + + @Option(names = {"-t", "--table"}, description = "table name") + private String tableName = TEST_TABLE + "_" + System.getProperty("user.name"); + + + @Option(names = {"-N", "--number"}, description = "umber of object instances") + private int[] instances = {100}; + + @Option(names = {"-L", "--spin"}, description = "spin count") + private int spinCount = 100; + + @Option(names = {"-W", "--warmup"}, description = "warmup count") + private int warmup = 15; + + @Option(names = {"-l", "--list"}, description = "list matching benchmarks") + private boolean doList = false; + + @Option(names = {"-o", "--output"}, description = "output file") + private String outputFile; + + @Option(names = {"-T", "--threads"}, description = "number of concurrent threads") + private int nThreads = 2; + + @Option(names = {"--confdir"}, description = "configuration directory") + private String confDir; + + @Option(names = {"--sanitize"}, description = "sanitize results (remove outliers)") + private boolean doSanitize = false; + + @Option(names = {"-C", "--csv"}, description = "produce CSV output") + private boolean doCSV = false; + + @Option(names = {"--params"}, description = "number of table/partition parameters") + private int nParameters = 0; + + @Option(names = {"--savedata"}, description = "save raw data in specified dir") + private String dataSaveDir; + + @Option(names = {"--separator"}, description = "CSV field separator") + private String csvSeparator = CSV_SEPARATOR; + + @Option(names = {"-M", "--pattern"}, description = "test name patterns") + private Pattern[] matches; + + @Option(names = {"-E", "--exclude"}, description = "test name patterns to exclude") + private Pattern[] exclude; + + public static void main(String[] args) { + CommandLine.run(new BenchmarkTool(), args); + } + + static void saveData(Map<String, + DescriptiveStatistics> result, String location, TimeUnit scale) throws IOException { + Path dir = Paths.get(location); + if (!dir.toFile().exists()) { + LOG.debug("creating directory {}", location); + Files.createDirectories(dir); + } else if (!dir.toFile().isDirectory()) { + LOG.error("{} should be a directory", location); + } + + // Create a new file for each benchmark and dump raw data to it. + result.forEach((name, data) -> saveDataFile(location, name, data, scale)); + } + + private static void saveDataFile(String location, String name, + DescriptiveStatistics data, TimeUnit scale) { + long conv = scale.toNanos(1); + Path dst = Paths.get(location, name); + try (PrintStream output = new PrintStream(dst.toString())) { + // Print all values one per line + Arrays.stream(data.getValues()).forEach(d -> output.println(d / conv)); + } catch (FileNotFoundException e) { + LOG.error("failed to write to {}", dst); + } + } + + + @Override + public void run() { + LOG.info("Using warmup " + warmup + + " spin " + spinCount + " nparams " + nParameters + " threads " + nThreads); + + StringBuilder sb = new StringBuilder(); + BenchData bData = new BenchData(dbName, tableName); + + MicroBenchmark bench = new MicroBenchmark(warmup, spinCount); + BenchmarkSuite suite = new BenchmarkSuite(); + + suite + .setScale(scale) + .doSanitize(doSanitize) + .add("getNid", () -> benchmarkGetNotificationId(bench, bData)) + .add("listDatabases", () -> benchmarkListDatabases(bench, bData)) + .add("listTables", () -> benchmarkListAllTables(bench, bData)) + .add("getTable", () -> benchmarkGetTable(bench, bData)) + .add("createTable", () -> benchmarkTableCreate(bench, bData)) + .add("dropTable", () -> benchmarkDeleteCreate(bench, bData)) + .add("dropTableWithPartitions", + () -> benchmarkDeleteWithPartitions(bench, bData, 1, nParameters)) + .add("addPartition", () -> benchmarkCreatePartition(bench, bData)) + .add("dropPartition", () -> benchmarkDropPartition(bench, bData)) + .add("listPartition", () -> benchmarkListPartition(bench, bData)) + .add("getPartition", + () -> benchmarkGetPartitions(bench, bData, 1)) + .add("getPartitionNames", + () -> benchmarkGetPartitionNames(bench, bData, 1)) + .add("getPartitionsByNames", + () -> benchmarkGetPartitionsByName(bench, bData, 1)) + .add("renameTable", + () -> benchmarkRenameTable(bench, bData, 1)) + .add("dropDatabase", + () -> benchmarkDropDatabase(bench, bData, 1)); + + for (int howMany: instances) { + suite.add("listTables" + '.' + howMany, + () -> benchmarkListTables(bench, bData, howMany)) + .add("dropTableWithPartitions" + '.' + howMany, + () -> benchmarkDeleteWithPartitions(bench, bData, howMany, nParameters)) + .add("listPartitions" + '.' + howMany, + () -> benchmarkListManyPartitions(bench, bData, howMany)) + .add("getPartitions" + '.' + howMany, + () -> benchmarkGetPartitions(bench, bData, howMany)) + .add("getPartitionNames" + '.' + howMany, + () -> benchmarkGetPartitionNames(bench, bData, howMany)) + .add("getPartitionsByNames" + '.' + howMany, + () -> benchmarkGetPartitionsByName(bench, bData, howMany)) + .add("addPartitions" + '.' + howMany, + () -> benchmarkCreatePartitions(bench, bData, howMany)) + .add("dropPartitions" + '.' + howMany, + () -> benchmarkDropPartitions(bench, bData, howMany)) + .add("renameTable" + '.' + howMany, + () -> benchmarkRenameTable(bench, bData, howMany)) + .add("dropDatabase" + '.' + howMany, + () -> benchmarkDropDatabase(bench, bData, howMany)); + } + + if (doList) { + suite.listMatching(matches, exclude).forEach(System.out::println); + return; + } + + LOG.info("Using table '{}.{}", dbName, tableName); + + try (HMSClient client = new HMSClient(getServerUri(host, String.valueOf(port)), confDir)) { + bData.setClient(client); + if (!client.dbExists(dbName)) { + client.createDatabase(dbName); + } + + if (client.tableExists(dbName, tableName)) { + client.dropTable(dbName, tableName); + } + + // Arrange various benchmarks in a suite + BenchmarkSuite result = suite.runMatching(matches, exclude); + + Formatter fmt = new Formatter(sb); + if (doCSV) { + result.displayCSV(fmt, csvSeparator); + } else { + result.display(fmt); + } + + PrintStream output = System.out; + if (outputFile != null) { + output = new PrintStream(outputFile); + } + + if (outputFile != null) { + // Print results to stdout as well + StringBuilder s = new StringBuilder(); + Formatter f = new Formatter(s); + result.display(f); + System.out.print(s); + f.close(); + } + + output.print(sb.toString()); + fmt.close(); + + if (dataSaveDir != null) { + saveData(result.getResult(), dataSaveDir, scale); + } + } catch (Exception e) { + e.printStackTrace(); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java new file mode 100644 index 0000000..f53f2ef --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSBenchmarks.java @@ -0,0 +1,447 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.stream.IntStream; + +import static org.apache.hadoop.hive.metastore.tools.Util.addManyPartitions; +import static org.apache.hadoop.hive.metastore.tools.Util.addManyPartitionsNoException; +import static org.apache.hadoop.hive.metastore.tools.Util.createSchema; +import static org.apache.hadoop.hive.metastore.tools.Util.generatePartitionNames; +import static java.util.concurrent.Executors.newFixedThreadPool; +import static org.apache.hadoop.hive.metastore.tools.Util.throwingSupplierWrapper; + +/** + * Actual benchmark code. + */ +final class HMSBenchmarks { + private static final Logger LOG = LoggerFactory.getLogger(HMSBenchmarks.class); + + private static final String PARAM_KEY = "parameter_"; + private static final String PARAM_VALUE = "value_"; + + static DescriptiveStatistics benchmarkListDatabases(@NotNull MicroBenchmark benchmark, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + return benchmark.measure(() -> + throwingSupplierWrapper(() -> client.getAllDatabases(null))); + } + + static DescriptiveStatistics benchmarkListAllTables(@NotNull MicroBenchmark benchmark, + @NotNull BenchData data) { + + final HMSClient client = data.getClient(); + String dbName = data.dbName; + + return benchmark.measure(() -> + throwingSupplierWrapper(() -> client.getAllTables(dbName, null))); + } + + static DescriptiveStatistics benchmarkTableCreate(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + Table table = Util.TableBuilder.buildDefaultTable(dbName, tableName); + + return bench.measure(null, + () -> throwingSupplierWrapper(() -> client.createTable(table)), + () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName))); + } + + static DescriptiveStatistics benchmarkDeleteCreate(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + Table table = Util.TableBuilder.buildDefaultTable(dbName, tableName); + + return bench.measure( + () -> throwingSupplierWrapper(() -> client.createTable(table)), + () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)), + null); + } + + static DescriptiveStatistics benchmarkDeleteWithPartitions(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int howMany, + int nparams) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + // Create many parameters + Map<String, String> parameters = new HashMap<>(nparams); + for (int i = 0; i < nparams; i++) { + parameters.put(PARAM_KEY + i, PARAM_VALUE + i); + } + + return bench.measure( + () -> throwingSupplierWrapper(() -> { + createPartitionedTable(client, dbName, tableName); + addManyPartitions(client, dbName, tableName, parameters, + Collections.singletonList("d"), howMany); + return true; + }), + () -> throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)), + null); + } + + static DescriptiveStatistics benchmarkGetTable(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + return bench.measure(() -> + throwingSupplierWrapper(() -> client.getTable(dbName, tableName))); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkListTables(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + + // Create a bunch of tables + String format = "tmp_table_%d"; + try { + createManyTables(client, count, dbName, format); + return bench.measure(() -> + throwingSupplierWrapper(() -> client.getAllTables(dbName, null))); + } finally { + dropManyTables(client, count, dbName, format); + } + } + + static DescriptiveStatistics benchmarkCreatePartition(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + final List<String> values = Collections.singletonList("d1"); + try { + Table t = client.getTable(dbName, tableName); + Partition partition = new Util.PartitionBuilder(t) + .withValues(values) + .build(); + + return bench.measure(null, + () -> throwingSupplierWrapper(() -> client.addPartition(partition)), + () -> throwingSupplierWrapper(() -> client.dropPartition(dbName, tableName, values))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkListPartition(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitions(client, dbName, tableName, null, + Collections.singletonList("d"), 1); + + return bench.measure(() -> + throwingSupplierWrapper(() -> client.listPartitions(dbName, tableName))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkListManyPartitions(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int howMany) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitions(client, dbName, tableName, null, Collections.singletonList("d"), howMany); + LOG.debug("Created {} partitions", howMany); + LOG.debug("started benchmark... "); + return bench.measure(() -> + throwingSupplierWrapper(() -> client.listPartitions(dbName, tableName))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkGetPartitions(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int howMany) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitions(client, dbName, tableName, null, Collections.singletonList("d"), howMany); + LOG.debug("Created {} partitions", howMany); + LOG.debug("started benchmark... "); + return bench.measure(() -> + throwingSupplierWrapper(() -> client.getPartitions(dbName, tableName))); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkDropPartition(@NotNull MicroBenchmark bench, + @NotNull BenchData data) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + final List<String> values = Collections.singletonList("d1"); + try { + Table t = client.getTable(dbName, tableName); + Partition partition = new Util.PartitionBuilder(t) + .withValues(values) + .build(); + + return bench.measure( + () -> throwingSupplierWrapper(() -> client.addPartition(partition)), + () -> throwingSupplierWrapper(() -> client.dropPartition(dbName, tableName, values)), + null); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkCreatePartitions(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + return bench.measure( + null, + () -> addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count), + () -> throwingSupplierWrapper(() -> + client.dropPartitions(dbName, tableName, null)) + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkDropPartitions(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + return bench.measure( + () -> addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count), + () -> throwingSupplierWrapper(() -> + client.dropPartitions(dbName, tableName, null)), + null + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkGetPartitionNames(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count); + return bench.measure( + () -> throwingSupplierWrapper(() -> client.getPartitionNames(dbName, tableName)) + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkGetPartitionsByName(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count); + List<String> partitionNames = throwingSupplierWrapper(() -> + client.getPartitionNames(dbName, tableName)); + return bench.measure( + () -> + throwingSupplierWrapper(() -> + client.getPartitionsByNames(dbName, tableName, partitionNames)) + ); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkRenameTable(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + String tableName = data.tableName; + + createPartitionedTable(client, dbName, tableName); + try { + addManyPartitionsNoException(client, dbName, tableName, null, + Collections.singletonList("d"), count); + Table oldTable = client.getTable(dbName, tableName); + oldTable.getSd().setLocation(""); + Table newTable = oldTable.deepCopy(); + newTable.setTableName(tableName + "_renamed"); + + return bench.measure( + () -> { + // Measuring 2 renames, so the tests are idempotent + throwingSupplierWrapper(() -> + client.alterTable(oldTable.getDbName(), oldTable.getTableName(), newTable)); + throwingSupplierWrapper(() -> + client.alterTable(newTable.getDbName(), newTable.getTableName(), oldTable)); + } + ); + } catch (TException e) { + e.printStackTrace(); + return new DescriptiveStatistics(); + } finally { + throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); + } + } + + static DescriptiveStatistics benchmarkDropDatabase(@NotNull MicroBenchmark bench, + @NotNull BenchData data, + int count) { + final HMSClient client = data.getClient(); + String dbName = data.dbName; + + throwingSupplierWrapper(() -> client.dropDatabase(dbName)); + try { + return bench.measure( + () -> { + throwingSupplierWrapper(() -> client.createDatabase(dbName)); + createManyTables(client, count, dbName, "tmp_table_%d"); + }, + () -> throwingSupplierWrapper(() -> client.dropDatabase(dbName)), + null + ); + } finally { + throwingSupplierWrapper(() -> client.createDatabase(dbName)); + } + } + + private static void createManyTables(HMSClient client, int howMany, String dbName, String format) { + List<FieldSchema> columns = createSchema(new ArrayList<>(Arrays.asList("name", "string"))); + List<FieldSchema> partitions = createSchema(new ArrayList<>(Arrays.asList("date", "string"))); + IntStream.range(0, howMany) + .forEach(i -> + throwingSupplierWrapper(() -> client.createTable( + new Util.TableBuilder(dbName, String.format(format, i)) + .withType(TableType.MANAGED_TABLE) + .withColumns(columns) + .withPartitionKeys(partitions) + .build()))); + } + + private static void dropManyTables(HMSClient client, int howMany, String dbName, String format) { + IntStream.range(0, howMany) + .forEach(i -> + throwingSupplierWrapper(() -> client.dropTable(dbName, String.format(format, i)))); + } + + // Create a simple table with a single column and single partition + private static void createPartitionedTable(HMSClient client, String dbName, String tableName) { + throwingSupplierWrapper(() -> client.createTable( + new Util.TableBuilder(dbName, tableName) + .withType(TableType.MANAGED_TABLE) + .withColumns(createSchema(Collections.singletonList("name:string"))) + .withPartitionKeys(createSchema(Collections.singletonList("date"))) + .build())); + } + + static DescriptiveStatistics benchmarkGetNotificationId(@NotNull MicroBenchmark benchmark, + @NotNull BenchData data) { + HMSClient client = data.getClient(); + return benchmark.measure(() -> + throwingSupplierWrapper(client::getCurrentNotificationId)); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties new file mode 100644 index 0000000..3abc887 --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j.properties @@ -0,0 +1,6 @@ +log4j.rootLogger=INFO, CA + +log4j.appender.CA=org.apache.log4j.ConsoleAppender + +log4j.appender.CA.layout=org.apache.log4j.PatternLayout +log4j.appender.CA.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml new file mode 100644 index 0000000..dba1392 --- /dev/null +++ b/standalone-metastore/metastore-tools/metastore-benchmarks/src/main/resources/log4j2.xml @@ -0,0 +1,33 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<Configuration status="warn" + strict="true" + name="HMSTools"> + <Appenders>Â Â Â Â + <Appender type="Console" name="STDOUT"> + <Layout type="PatternLayout" + pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/> + </Appender> + <File name="STDERR" fileName="/dev/stderr"> + <Layout type="PatternLayout" + pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/> + </File> + </Appenders> + <Loggers> + <Root level="info"> + <AppenderRef ref="STDOUT"/> + </Root> + </Loggers> +</Configuration> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/pom.xml ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/pom.xml b/standalone-metastore/metastore-tools/pom.xml new file mode 100644 index 0000000..f6fb6dc --- /dev/null +++ b/standalone-metastore/metastore-tools/pom.xml @@ -0,0 +1,135 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>hive-standalone-metastore</artifactId> + <groupId>org.apache.hive</groupId> + <version>4.0.0-SNAPSHOT</version> + </parent> + <modelVersion>4.0.0</modelVersion> + + <artifactId>hive-metastore-tools</artifactId> + <name>Hive Metastore Tools</name> + <version>4.0.0-SNAPSHOT</version> + + <packaging>pom</packaging> + + <modules> + <module>metastore-benchmarks</module> + <module>tools-common</module> + </modules> + + <properties> + <hive.version>4.0.0-SNAPSHOT</hive.version> + <maven.surefire.version>2.20.1</maven.surefire.version> + <checkstyle.conf.dir>${basedir}/checkstyle</checkstyle.conf.dir> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> + <javac.errorprone.version>2.8</javac.errorprone.version> + <errorprone.core.version>2.3.1</errorprone.core.version> + <picocli.version>3.1.0</picocli.version> + <junit.platform.runner.version>1.2.0</junit.platform.runner.version> + <junit.jupiter.api.version>5.2.0</junit.jupiter.api.version> + <commons-math3.version>3.6.1</commons-math3.version> + <jetbrain-annotation.version>16.0.2</jetbrain-annotation.version> + </properties> + + <dependencyManagement> + <dependencies> + <dependency> + <groupId>org.apache.hive.hcatalog</groupId> + <artifactId>hive-hcatalog-server-extensions</artifactId> + <version>${hive.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-common</artifactId> + <version>${hive.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-standalone-metastore-common</artifactId> + <version>${hive.version}</version> + </dependency> + <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-math3 --> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + <version>${commons-math3.version}</version> + </dependency> + <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 --> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + <version>1.7.25</version> + </dependency> + <!-- https://mvnrepository.com/artifact/org.jetbrains/annotations --> + <dependency> + <groupId>org.jetbrains</groupId> + <artifactId>annotations</artifactId> + <version>${jetbrain-annotation.version}</version> + </dependency> + <!-- https://mvnrepository.com/artifact/org.apache.maven.plugins/maven-jxr-plugin --> + <dependency> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jxr-plugin</artifactId> + <version>2.5</version> + </dependency> + <!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-api --> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter-api</artifactId> + <version>${junit.jupiter.api.version}</version> + <scope>test</scope> + </dependency> + <!-- https://mvnrepository.com/artifact/org.junit.platform/junit-platform-runner --> + <dependency> + <groupId>org.junit.platform</groupId> + <artifactId>junit-platform-runner</artifactId> + <version>${junit.platform.runner.version}</version> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>${junit.version}</version> + </dependency> + <dependency> + <groupId>info.picocli</groupId> + <artifactId>picocli</artifactId> + <version>${picocli.version}</version> + </dependency> + </dependencies> + </dependencyManagement> + + <build> + <pluginManagement> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.7.0</version> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>${maven.surefire.version}</version> + </plugin> + </plugins> + </pluginManagement> + </build> + +</project> http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/tools-common/pom.xml ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/tools-common/pom.xml b/standalone-metastore/metastore-tools/tools-common/pom.xml new file mode 100644 index 0000000..6b03dd5 --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/pom.xml @@ -0,0 +1,113 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>hive-metastore-tools</artifactId> + <groupId>org.apache.hive</groupId> + <version>4.0.0-SNAPSHOT</version> + </parent> + <modelVersion>4.0.0</modelVersion> + + <packaging>jar</packaging> + + <artifactId>metastore-tools-common</artifactId> + <name>Hive Metastore Tools common libraries</name> + + <dependencies> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-standalone-metastore-common</artifactId> + </dependency> + <dependency> + <groupId>org.apache.hive.hcatalog</groupId> + <artifactId>hive-hcatalog-server-extensions</artifactId> + </dependency> + <!-- https://mvnrepository.com/artifact/org.jetbrains/annotations --> + <dependency> + <groupId>org.jetbrains</groupId> + <artifactId>annotations</artifactId> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-common</artifactId> + </dependency> + <!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-api --> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter-api</artifactId> + <scope>test</scope> + </dependency> + <!-- https://mvnrepository.com/artifact/org.hamcrest/hamcrest-all --> + <dependency> + <groupId>org.hamcrest</groupId> + <artifactId>hamcrest-all</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <configuration> + <source>1.8</source> + <target>1.8</target> + <compilerId>javac-with-errorprone</compilerId> + <forceJavacCompilerUse>true</forceJavacCompilerUse> + </configuration> + <!-- + Error Prone integration + --> + <dependencies> + <dependency> + <groupId>org.codehaus.plexus</groupId> + <artifactId>plexus-compiler-javac-errorprone</artifactId> + <version>${javac.errorprone.version}</version> + </dependency> + <dependency> + <groupId>com.google.errorprone</groupId> + <artifactId>error_prone_core</artifactId> + <version>${errorprone.core.version}</version> + </dependency> + </dependencies> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + </plugin> + </plugins> + </build> + + <reporting> + <plugins> + <plugin> + <!-- This is needed for checkstyle --> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jxr-plugin</artifactId> + <version>2.5</version> + </plugin> + </plugins> + </reporting> + + +</project> http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java new file mode 100644 index 0000000..5211082 --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkSuite.java @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.apache.commons.math3.stat.descriptive.rank.Median; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Formatter; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.regex.Pattern; + +import static org.apache.hadoop.hive.metastore.tools.Util.filterMatches; + +/** + * Group of benchmarks that can be joined together. + * Every benchmark has an associated name and code to run it. + * It is possible to run all benchmarks or only ones matching the filter.<p> + * + * Results can be optionally sanitized - any result that is outside of + * mean +/- margin * delta is removed from the result set. This helps remove random + * outliers. + * + * <h1>Example</h1> + * + * <pre> + * StringBuilder sb = new StringBuilder(); + * Formatter fmt = new Formatter(sb); + * BenchmarkSuite suite = new BenchmarkSuite(); + * // Arrange various benchmarks in a suite + * BenchmarkSuite result = suite + * .setScale(scale) + * .doSanitize(true) + * .add("someBenchmark", someBenchmarkFunc) + * .add("anotherBenchmark", anotherBenchmarkFunc) + * .runMatching(patterns, exclude); + * result.display(fmt); + * </pre> + * + */ +public final class BenchmarkSuite { + private static final Logger LOG = LoggerFactory.getLogger(BenchmarkSuite.class); + // Delta margin for data sanitizing. When sanitizing is enabled, we filter out + // all result which are outside + // mean +/- MARGIN * stddev + private static final double MARGIN = 2; + // Collection of benchmarks + private final Map<String, Supplier<DescriptiveStatistics>> suite = new HashMap<>(); + // List of benchmarks. All benchmarks are executed in the order + // they are inserted + private final List<String> benchmarks = new ArrayList<>(); + // Once benchmarks are executed, results are stored in TreeMap to prserve the order. + private final Map<String, DescriptiveStatistics> result = new TreeMap<>(); + // Whether sanitizing of results is requested + private boolean doSanitize = false; + // Time units - we use milliseconds. + private TimeUnit scale = TimeUnit.MILLISECONDS; + + /** + * Set scaling factor for displaying results. + * When data is reported, all times are divided by scale functor. + * Data is always collected in nanoseconds, so this can be used to present + * data using different time units. + * @param scale: scaling factor + * @return this for chaining + */ + public BenchmarkSuite setScale(TimeUnit scale) { + this.scale = scale; + return this; + } + + /** + * Enable or disable result sanitization. + * This should be done before benchmarks are executed. + * @param sanitize enable sanitization if true, disable if false + * @return this object, allowing chained calls. + */ + public BenchmarkSuite doSanitize(boolean sanitize) { + this.doSanitize = sanitize; + return this; + } + + /** + * Get raw benchmark results + * @return map of benchmark name to the statistics describing the result + */ + public Map<String, DescriptiveStatistics> getResult() { + return result; + } + + /** + * Run all benchmarks in the 'names' list. + * @param names list of benchmarks to run + * @return this to allow chaining + */ + private BenchmarkSuite runAll(List<String> names) { + if (doSanitize) { + names.forEach(name -> { + LOG.info("Running benchmark {}", name); + result.put(name, sanitize(suite.get(name).get())); + }); + } else { + names.forEach(name -> { + LOG.info("Running benchmark {}", name); + result.put(name, suite.get(name).get()); + }); + } + return this; + } + + /** + * Return list of benchmark names that match positive patterns and do not + * match negative patterns. + * @param positive regexp patterns that should match benchmark name + * @param negatve regexp patterns that should be excluded when matches + * @return list of benchmark names + */ + public List<String> listMatching(@Nullable Pattern[] positive, + @Nullable Pattern[] negatve) { + return filterMatches(benchmarks, positive, negatve); + } + + /** + * Run all benchmarks (filtered by positive and negative matches. + * See {@link #listMatching(Pattern[], Pattern[])} for details. + * @param positive regexp patterns that should match benchmark name + * @param negatve regexp patterns that should be excluded when matches + * @return this + */ + public BenchmarkSuite runMatching(@Nullable Pattern[] positive, + @Nullable Pattern[] negatve) { + return runAll(filterMatches(benchmarks, positive, negatve)); + } + + /** + * Add new benchmark to the suite. + * @param name benchmark name + * @param b benchmark corresponding to name + * @return this + */ + public BenchmarkSuite add(@NotNull String name, @NotNull Supplier<DescriptiveStatistics> b) { + suite.put(name, b); + benchmarks.add(name); + return this; + } + + /** + * Get new statistics that excludes values beyond mean +/- 2 * stdev + * + * @param data Source data + * @return new {@link @DescriptiveStatistics objects with sanitized data} + */ + private static DescriptiveStatistics sanitize(@NotNull DescriptiveStatistics data) { + double meanValue = data.getMean(); + double delta = MARGIN * meanValue; + double minVal = meanValue - delta; + double maxVal = meanValue + delta; + return new DescriptiveStatistics(Arrays.stream(data.getValues()) + .filter(x -> x > minVal && x < maxVal) + .toArray()); + } + + /** + * Get median value for given statistics. + * @param data collected datapoints. + * @return median value. + */ + private static double median(@NotNull DescriptiveStatistics data) { + return new Median().evaluate(data.getValues()); + } + + /** + * Produce printable result + * @param fmt text formatter - destination of formatted results. + * @param name benchmark name + * @param stats benchmark data + */ + private void displayStats(@NotNull Formatter fmt, @NotNull String name, + @NotNull DescriptiveStatistics stats) { + double mean = stats.getMean(); + double err = stats.getStandardDeviation() / mean * 100; + long conv = scale.toNanos(1); + + fmt.format("%-30s %-8.4g %-8.4g %-8.4g %-8.4g %-8.4g%n", + name, + mean / conv, + median(stats) / conv, + stats.getMin() / conv, + stats.getMax() / conv, + err); + } + + /** + * Produce results in printable CSV format, separated by separator. + * @param fmt text formatter - destination of formatted results. + * @param name benchmark name + * @param stats benchmark data + * @param separator field separator + */ + private void displayCSV(@NotNull Formatter fmt, @NotNull String name, + @NotNull DescriptiveStatistics stats, @NotNull String separator) { + double mean = stats.getMean(); + double err = stats.getStandardDeviation() / mean * 100; + long conv = scale.toNanos(1); + + fmt.format("%s%s%g%s%g%s%g%s%g%s%g%n", + name, separator, + mean / conv, separator, + median(stats) / conv, separator, + stats.getMin() / conv, separator, + stats.getMax() / conv, separator, + err); + } + + /** + * Format all results + * @param fmt text formatter - destination of formatted results. + * @return this + */ + BenchmarkSuite display(Formatter fmt) { + fmt.format("%-30s %-8s %-8s %-8s %-8s %-8s%n", + "Operation", "Mean", "Med", "Min", "Max", "Err%"); + result.forEach((name, stat) -> displayStats(fmt, name, stat)); + return this; + } + + /** + * Format all results in CSV format + * @param fmt text formatter - destination of formatted results. + * @param separator field separator + * @return this + */ + BenchmarkSuite displayCSV(Formatter fmt, String separator) { + fmt.format("%s%s%s%s%s%s%s%s%s%s%s%n", + "Operation", separator, "Mean", separator, "Med", separator, "Min", + separator, "Max", separator, "Err%"); + result.forEach((name, s) -> displayCSV(fmt, name, s, separator)); + return this; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java new file mode 100644 index 0000000..5a584f6 --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/Constants.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools; + +/** + * Common constants for metastore tools. + */ +public final class Constants { + static final String OPT_HOST = "host"; + static final String OPT_PORT = "port"; + static final String OPT_DATABASE = "database"; + static final String OPT_CONF = "conf"; + static final String OPT_VERBOSE = "verbose"; + static final int HMS_DEFAULT_PORT = 8093; + + // Disable object construction + private Constants() {} +} http://git-wip-us.apache.org/repos/asf/hive/blob/d05b7cf0/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java new file mode 100644 index 0000000..7cc1e42 --- /dev/null +++ b/standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java @@ -0,0 +1,428 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore.tools; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.DropPartitionsRequest; +import org.apache.hadoop.hive.metastore.api.DropPartitionsResult; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.RequestPartsSpec; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.utils.SecurityUtils; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.thrift.TException; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.thrift.protocol.TCompactProtocol; +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.transport.TFramedTransport; +import org.apache.thrift.transport.TSocket; +import org.apache.thrift.transport.TTransport; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.security.auth.login.LoginException; +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.security.PrivilegedExceptionAction; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +/** + * Â Wrapper for Thrift HMS interface. + */ +final class HMSClient implements AutoCloseable { + private static final Logger LOG = LoggerFactory.getLogger(HMSClient.class); + private static final String METASTORE_URI = "hive.metastore.uris"; + private static final String CONFIG_DIR = "/etc/hive/conf"; + private static final String HIVE_SITE = "hive-site.xml"; + private static final String CORE_SITE = "core-site.xml"; + private static final String PRINCIPAL_KEY = "hive.metastore.kerberos.principal"; + + private final String confDir; + private ThriftHiveMetastore.Iface client; + private TTransport transport; + private URI serverURI; + + public URI getServerURI() { + return serverURI; + } + + @Override + public String toString() { + return serverURI.toString(); + } + + HMSClient(@Nullable URI uri) + throws TException, IOException, InterruptedException, LoginException, URISyntaxException { + this(uri, CONFIG_DIR); + } + + HMSClient(@Nullable URI uri, @Nullable String confDir) + throws TException, IOException, InterruptedException, LoginException, URISyntaxException { + this.confDir = confDir == null ? CONFIG_DIR : confDir; + getClient(uri); + } + + private void addResource(Configuration conf, @NotNull String r) throws MalformedURLException { + File f = new File(confDir + "/" + r); + if (f.exists() && !f.isDirectory()) { + LOG.debug("Adding configuration resource {}", r); + conf.addResource(f.toURI().toURL()); + } else { + LOG.debug("Configuration {} does not exist", r); + } + } + + /** + * Create a client to Hive Metastore. + * If principal is specified, create kerberised client. + * + * @param uri server uri + * @throws MetaException if fails to login using kerberos credentials + * @throws IOException if fails connecting to metastore + * @throws InterruptedException if interrupted during kerberos setup + */ + private void getClient(@Nullable URI uri) + throws TException, IOException, InterruptedException, URISyntaxException, LoginException { + Configuration conf = new HiveConf(); + addResource(conf, HIVE_SITE); + if (uri != null) { + conf.set(METASTORE_URI, uri.toString()); + } + + // Pick up the first URI from the list of available URIs + serverURI = uri != null ? + uri : + new URI(conf.get(METASTORE_URI).split(",")[0]); + + String principal = conf.get(PRINCIPAL_KEY); + + if (principal == null) { + open(conf, serverURI); + return; + } + + LOG.debug("Opening kerberos connection to HMS"); + addResource(conf, CORE_SITE); + + Configuration hadoopConf = new Configuration(); + addResource(hadoopConf, HIVE_SITE); + addResource(hadoopConf, CORE_SITE); + + // Kerberos magic + UserGroupInformation.setConfiguration(hadoopConf); + UserGroupInformation.getLoginUser() + .doAs((PrivilegedExceptionAction<TTransport>) + () -> open(conf, serverURI)); + } + + boolean dbExists(@NotNull String dbName) throws TException { + return getAllDatabases(dbName).contains(dbName); + } + + boolean tableExists(@NotNull String dbName, @NotNull String tableName) throws TException { + return getAllTables(dbName, tableName).contains(tableName); + } + + Database getDatabase(@NotNull String dbName) throws TException { + return client.get_database(dbName); + } + + /** + * Return all databases with name matching the filter. + * + * @param filter Regexp. Can be null or empty in which case everything matches + * @return list of database names matching the filter + * @throws MetaException + */ + Set<String> getAllDatabases(@Nullable String filter) throws TException { + if (filter == null || filter.isEmpty()) { + return new HashSet<>(client.get_all_databases()); + } + return client.get_all_databases() + .stream() + .filter(n -> n.matches(filter)) + .collect(Collectors.toSet()); + } + + Set<String> getAllTables(@NotNull String dbName, @Nullable String filter) throws TException { + if (filter == null || filter.isEmpty()) { + return new HashSet<>(client.get_all_tables(dbName)); + } + return client.get_all_tables(dbName) + .stream() + .filter(n -> n.matches(filter)) + .collect(Collectors.toSet()); + } + + /** + * Create database with the given name if it doesn't exist + * + * @param name database name + */ + boolean createDatabase(@NotNull String name) throws TException { + return createDatabase(name, null, null, null); + } + + /** + * Create database if it doesn't exist + * + * @param name Database name + * @param description Database description + * @param location Database location + * @param params Database params + * @throws TException if database exists + */ + boolean createDatabase(@NotNull String name, + @Nullable String description, + @Nullable String location, + @Nullable Map<String, String> params) + throws TException { + Database db = new Database(name, description, location, params); + client.create_database(db); + return true; + } + + boolean createDatabase(Database db) throws TException { + client.create_database(db); + return true; + } + + boolean dropDatabase(@NotNull String dbName) throws TException { + client.drop_database(dbName, true, true); + return true; + } + + boolean createTable(Table table) throws TException { + client.create_table(table); + return true; + } + + boolean dropTable(@NotNull String dbName, @NotNull String tableName) throws TException { + client.drop_table(dbName, tableName, true); + return true; + } + + Table getTable(@NotNull String dbName, @NotNull String tableName) throws TException { + return client.get_table(dbName, tableName); + } + + Partition createPartition(@NotNull Table table, @NotNull List<String> values) throws TException { + return client.add_partition(new Util.PartitionBuilder(table).withValues(values).build()); + } + + Partition addPartition(@NotNull Partition partition) throws TException { + return client.add_partition(partition); + } + + void addPartitions(List<Partition> partitions) throws TException { + client.add_partitions(partitions); + } + + + List<Partition> listPartitions(@NotNull String dbName, + @NotNull String tableName) throws TException { + return client.get_partitions(dbName, tableName, (short) -1); + } + + Long getCurrentNotificationId() throws TException { + return client.get_current_notificationEventId().getEventId(); + } + + List<String> getPartitionNames(@NotNull String dbName, + @NotNull String tableName) throws TException { + return client.get_partition_names(dbName, tableName, (short) -1); + } + + public boolean dropPartition(@NotNull String dbName, @NotNull String tableName, + @NotNull List<String> arguments) + throws TException { + return client.drop_partition(dbName, tableName, arguments, true); + } + + List<Partition> getPartitions(@NotNull String dbName, @NotNull String tableName) throws TException { + return client.get_partitions(dbName, tableName, (short) -1); + } + + DropPartitionsResult dropPartitions(@NotNull String dbName, @NotNull String tableName, + @Nullable List<String> partNames) throws TException { + if (partNames == null) { + return dropPartitions(dbName, tableName, getPartitionNames(dbName, tableName)); + } + if (partNames.isEmpty()) { + return null; + } + return client.drop_partitions_req(new DropPartitionsRequest(dbName, + tableName, RequestPartsSpec.names(partNames))); + } + + List<Partition> getPartitionsByNames(@NotNull String dbName, @NotNull String tableName, + @Nullable List<String> names) throws TException { + if (names == null) { + return client.get_partitions_by_names(dbName, tableName, + getPartitionNames(dbName, tableName)); + } + return client.get_partitions_by_names(dbName, tableName, names); + } + + boolean alterTable(@NotNull String dbName, @NotNull String tableName, @NotNull Table newTable) + throws TException { + client.alter_table(dbName, tableName, newTable); + return true; + } + + void alterPartition(@NotNull String dbName, @NotNull String tableName, + @NotNull Partition partition) throws TException { + client.alter_partition(dbName, tableName, partition); + } + + void alterPartitions(@NotNull String dbName, @NotNull String tableName, + @NotNull List<Partition> partitions) throws TException { + client.alter_partitions(dbName, tableName, partitions); + } + + void appendPartition(@NotNull String dbName, @NotNull String tableName, + @NotNull List<String> partitionValues) throws TException { + client.append_partition_with_environment_context(dbName, tableName, partitionValues, null); + } + + private TTransport open(Configuration conf, @NotNull URI uri) throws + TException, IOException, LoginException { + boolean useSSL = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_SSL); + boolean useSasl = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_THRIFT_SASL); + boolean useFramedTransport = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_THRIFT_FRAMED_TRANSPORT); + boolean useCompactProtocol = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.USE_THRIFT_COMPACT_PROTOCOL); + int clientSocketTimeout = (int) MetastoreConf.getTimeVar(conf, + MetastoreConf.ConfVars.CLIENT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS); + + LOG.debug("Connecting to {}, framedTransport = {}", uri, useFramedTransport); + + String host = uri.getHost(); + int port = uri.getPort(); + + // Sasl/SSL code is copied from HiveMetastoreCLient + if (!useSSL) { + transport = new TSocket(host, port, clientSocketTimeout); + } else { + String trustStorePath = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.SSL_TRUSTSTORE_PATH).trim(); + if (trustStorePath.isEmpty()) { + throw new IllegalArgumentException(MetastoreConf.ConfVars.SSL_TRUSTSTORE_PATH.toString() + + " Not configured for SSL connection"); + } + String trustStorePassword = + MetastoreConf.getPassword(conf, MetastoreConf.ConfVars.SSL_TRUSTSTORE_PASSWORD); + + // Create an SSL socket and connect + transport = SecurityUtils.getSSLSocket(host, port, clientSocketTimeout, + trustStorePath, trustStorePassword); + LOG.info("Opened an SSL connection to metastore, current connections"); + } + + if (useSasl) { + // Wrap thrift connection with SASL for secure connection. + HadoopThriftAuthBridge.Client authBridge = + HadoopThriftAuthBridge.getBridge().createClient(); + + // check if we should use delegation tokens to authenticate + // the call below gets hold of the tokens if they are set up by hadoop + // this should happen on the map/reduce tasks if the client added the + // tokens into hadoop's credential store in the front end during job + // submission. + String tokenSig = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.TOKEN_SIGNATURE); + // tokenSig could be null + String tokenStrForm = SecurityUtils.getTokenStrForm(tokenSig); + + if (tokenStrForm != null) { + LOG.info("HMSC::open(): Found delegation token. Creating DIGEST-based thrift connection."); + // authenticate using delegation tokens via the "DIGEST" mechanism + transport = authBridge.createClientTransport(null, host, + "DIGEST", tokenStrForm, transport, + MetaStoreUtils.getMetaStoreSaslProperties(conf, useSSL)); + } else { + LOG.info("HMSC::open(): Could not find delegation token. Creating KERBEROS-based thrift connection."); + String principalConfig = + MetastoreConf.getVar(conf, MetastoreConf.ConfVars.KERBEROS_PRINCIPAL); + transport = authBridge.createClientTransport( + principalConfig, host, "KERBEROS", null, + transport, MetaStoreUtils.getMetaStoreSaslProperties(conf, useSSL)); + } + } else { + if (useFramedTransport) { + transport = new TFramedTransport(transport); + } + } + + final TProtocol protocol; + if (useCompactProtocol) { + protocol = new TCompactProtocol(transport); + } else { + protocol = new TBinaryProtocol(transport); + } + client = new ThriftHiveMetastore.Client(protocol); + if (!transport.isOpen()) { + transport.open(); + LOG.info("Opened a connection to metastore, current connections"); + + if (!useSasl && MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.EXECUTE_SET_UGI)) { + // Call set_ugi, only in unsecure mode. + try { + UserGroupInformation ugi = SecurityUtils.getUGI(); + client.set_ugi(ugi.getUserName(), Arrays.asList(ugi.getGroupNames())); + } catch (LoginException e) { + LOG.warn("Failed to do login. set_ugi() is not successful, " + + "Continuing without it.", e); + } catch (IOException e) { + LOG.warn("Failed to find ugi of client set_ugi() is not successful, " + + "Continuing without it.", e); + } catch (TException e) { + LOG.warn("set_ugi() not successful, Likely cause: new client talking to old server. " + + "Continuing without it.", e); + } + } + } + + LOG.debug("Connected to metastore, using compact protocol = {}", useCompactProtocol); + return transport; + } + + @Override + public void close() throws Exception { + if (transport != null && transport.isOpen()) { + LOG.debug("Closing thrift transport"); + transport.close(); + } + } +}
