HIVE-18705: Improve HiveMetaStoreClient.dropDatabase (Adam Szita, reviewed by Peter Vary)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3e023546 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3e023546 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3e023546 Branch: refs/heads/master-txnstats Commit: 3e023546186823e3cc8ea5ce32828a52743ed1fe Parents: 4ab1080 Author: Adam Szita <sz...@cloudera.com> Authored: Tue Jul 17 10:27:10 2018 +0200 Committer: Adam Szita <sz...@cloudera.com> Committed: Tue Jul 17 10:27:10 2018 +0200 ---------------------------------------------------------------------- .../positive/drop_database_table_hooks.q | 57 ++++ .../positive/drop_database_table_hooks.q.out | 258 +++++++++++++++++++ .../hadoop/hive/ql/metadata/TableIterable.java | 104 -------- .../hive/ql/metadata/TestTableIterable.java | 67 ----- .../cli/operation/GetColumnsOperation.java | 2 +- .../hadoop/hive/metastore/HiveMetaStore.java | 1 + .../hive/metastore/HiveMetaStoreClient.java | 117 ++++++++- .../hadoop/hive/metastore/TableIterable.java | 115 +++++++++ .../hive/metastore/TestTableIterable.java | 76 ++++++ 9 files changed, 616 insertions(+), 181 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3e023546/hbase-handler/src/test/queries/positive/drop_database_table_hooks.q ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/queries/positive/drop_database_table_hooks.q b/hbase-handler/src/test/queries/positive/drop_database_table_hooks.q new file mode 100644 index 0000000..96263d0 --- /dev/null +++ b/hbase-handler/src/test/queries/positive/drop_database_table_hooks.q @@ -0,0 +1,57 @@ +CREATE DATABASE sometableshavehook; +USE sometableshavehook; + +CREATE TABLE NOHOOK0 (name string, number int); +CREATE TABLE NOHOOK1 (name string, number int); +CREATE TABLE NOHOOK2 (name string, number int); +CREATE TABLE NOHOOK3 (name string, number int); +CREATE TABLE NOHOOK4 (name string, number int); + +CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); +CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); +CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); + +set metastore.batch.retrieve.max=5; +DROP DATABASE sometableshavehook CASCADE; +SHOW DATABASES; + +CREATE DATABASE sometableshavehook; +USE sometableshavehook; + +CREATE TABLE NOHOOK0 (name string, number int); +CREATE TABLE NOHOOK1 (name string, number int); +CREATE TABLE NOHOOK2 (name string, number int); +CREATE TABLE NOHOOK3 (name string, number int); +CREATE TABLE NOHOOK4 (name string, number int); + +CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); +CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); +CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +); + +set metastore.batch.retrieve.max=300; +DROP DATABASE sometableshavehook CASCADE; +SHOW DATABASES; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/3e023546/hbase-handler/src/test/results/positive/drop_database_table_hooks.q.out ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/results/positive/drop_database_table_hooks.q.out b/hbase-handler/src/test/results/positive/drop_database_table_hooks.q.out new file mode 100644 index 0000000..90713ef --- /dev/null +++ b/hbase-handler/src/test/results/positive/drop_database_table_hooks.q.out @@ -0,0 +1,258 @@ +PREHOOK: query: CREATE DATABASE sometableshavehook +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:sometableshavehook +POSTHOOK: query: CREATE DATABASE sometableshavehook +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:sometableshavehook +PREHOOK: query: USE sometableshavehook +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:sometableshavehook +POSTHOOK: query: USE sometableshavehook +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:sometableshavehook +PREHOOK: query: CREATE TABLE NOHOOK0 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK0 +POSTHOOK: query: CREATE TABLE NOHOOK0 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK0 +PREHOOK: query: CREATE TABLE NOHOOK1 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK1 +POSTHOOK: query: CREATE TABLE NOHOOK1 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK1 +PREHOOK: query: CREATE TABLE NOHOOK2 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK2 +POSTHOOK: query: CREATE TABLE NOHOOK2 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK2 +PREHOOK: query: CREATE TABLE NOHOOK3 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK3 +POSTHOOK: query: CREATE TABLE NOHOOK3 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK3 +PREHOOK: query: CREATE TABLE NOHOOK4 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK4 +POSTHOOK: query: CREATE TABLE NOHOOK4 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK4 +PREHOOK: query: CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK0 +POSTHOOK: query: CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK0 +PREHOOK: query: CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK1 +POSTHOOK: query: CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK1 +PREHOOK: query: CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK2 +POSTHOOK: query: CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK2 +PREHOOK: query: DROP DATABASE sometableshavehook CASCADE +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:sometableshavehook +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@hbasehook0 +PREHOOK: Output: sometableshavehook@hbasehook1 +PREHOOK: Output: sometableshavehook@hbasehook2 +PREHOOK: Output: sometableshavehook@nohook0 +PREHOOK: Output: sometableshavehook@nohook1 +PREHOOK: Output: sometableshavehook@nohook2 +PREHOOK: Output: sometableshavehook@nohook3 +PREHOOK: Output: sometableshavehook@nohook4 +POSTHOOK: query: DROP DATABASE sometableshavehook CASCADE +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:sometableshavehook +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@hbasehook0 +POSTHOOK: Output: sometableshavehook@hbasehook1 +POSTHOOK: Output: sometableshavehook@hbasehook2 +POSTHOOK: Output: sometableshavehook@nohook0 +POSTHOOK: Output: sometableshavehook@nohook1 +POSTHOOK: Output: sometableshavehook@nohook2 +POSTHOOK: Output: sometableshavehook@nohook3 +POSTHOOK: Output: sometableshavehook@nohook4 +PREHOOK: query: SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default +PREHOOK: query: CREATE DATABASE sometableshavehook +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:sometableshavehook +POSTHOOK: query: CREATE DATABASE sometableshavehook +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:sometableshavehook +PREHOOK: query: USE sometableshavehook +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:sometableshavehook +POSTHOOK: query: USE sometableshavehook +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:sometableshavehook +PREHOOK: query: CREATE TABLE NOHOOK0 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK0 +POSTHOOK: query: CREATE TABLE NOHOOK0 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK0 +PREHOOK: query: CREATE TABLE NOHOOK1 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK1 +POSTHOOK: query: CREATE TABLE NOHOOK1 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK1 +PREHOOK: query: CREATE TABLE NOHOOK2 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK2 +POSTHOOK: query: CREATE TABLE NOHOOK2 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK2 +PREHOOK: query: CREATE TABLE NOHOOK3 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK3 +POSTHOOK: query: CREATE TABLE NOHOOK3 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK3 +PREHOOK: query: CREATE TABLE NOHOOK4 (name string, number int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@NOHOOK4 +POSTHOOK: query: CREATE TABLE NOHOOK4 (name string, number int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@NOHOOK4 +PREHOOK: query: CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK0 +POSTHOOK: query: CREATE TABLE HBASEHOOK0 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK0 +PREHOOK: query: CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK1 +POSTHOOK: query: CREATE TABLE HBASEHOOK1 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK1 +PREHOOK: query: CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@HBASEHOOK2 +POSTHOOK: query: CREATE TABLE HBASEHOOK2 (key int, val binary) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ( + "hbase.columns.mapping" = ":key,cf:val#b" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@HBASEHOOK2 +PREHOOK: query: DROP DATABASE sometableshavehook CASCADE +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:sometableshavehook +PREHOOK: Output: database:sometableshavehook +PREHOOK: Output: sometableshavehook@hbasehook0 +PREHOOK: Output: sometableshavehook@hbasehook1 +PREHOOK: Output: sometableshavehook@hbasehook2 +PREHOOK: Output: sometableshavehook@nohook0 +PREHOOK: Output: sometableshavehook@nohook1 +PREHOOK: Output: sometableshavehook@nohook2 +PREHOOK: Output: sometableshavehook@nohook3 +PREHOOK: Output: sometableshavehook@nohook4 +POSTHOOK: query: DROP DATABASE sometableshavehook CASCADE +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:sometableshavehook +POSTHOOK: Output: database:sometableshavehook +POSTHOOK: Output: sometableshavehook@hbasehook0 +POSTHOOK: Output: sometableshavehook@hbasehook1 +POSTHOOK: Output: sometableshavehook@hbasehook2 +POSTHOOK: Output: sometableshavehook@nohook0 +POSTHOOK: Output: sometableshavehook@nohook1 +POSTHOOK: Output: sometableshavehook@nohook2 +POSTHOOK: Output: sometableshavehook@nohook3 +POSTHOOK: Output: sometableshavehook@nohook4 +PREHOOK: query: SHOW DATABASES +PREHOOK: type: SHOWDATABASES +POSTHOOK: query: SHOW DATABASES +POSTHOOK: type: SHOWDATABASES +default http://git-wip-us.apache.org/repos/asf/hive/blob/3e023546/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java deleted file mode 100644 index d8e771d..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.metadata; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.thrift.TException; - -/** - * Use this to get Table objects for a table list. It provides an iterator to - * on the resulting Table objects. It batches the calls to - * IMetaStoreClient.getTableObjectsByName to avoid OOM issues in HS2 (with - * embedded metastore) or MetaStore server (if HS2 is using remote metastore). - * - */ -public class TableIterable implements Iterable<Table> { - - @Override - public Iterator<Table> iterator() { - return new Iterator<Table>() { - - private final Iterator<String> tableNamesIter = tableNames.iterator(); - private Iterator<org.apache.hadoop.hive.metastore.api.Table> batchIter = null; - - @Override - public boolean hasNext() { - return ((batchIter != null) && batchIter.hasNext()) || tableNamesIter.hasNext(); - } - - @Override - public Table next() { - if ((batchIter == null) || !batchIter.hasNext()) { - getNextBatch(); - } - return batchIter.next(); - } - - private void getNextBatch() { - // get next batch of table names in this list - List<String> nameBatch = new ArrayList<String>(); - int batch_counter = 0; - while (batch_counter < batch_size && tableNamesIter.hasNext()) { - nameBatch.add(tableNamesIter.next()); - batch_counter++; - } - // get the Table objects for this batch of table names and get iterator - // on it - try { - try { - batchIter = msc.getTableObjectsByName(dbname, nameBatch).iterator(); - } catch (TException e) { - throw new HiveException(e); - } - } catch (HiveException e) { - throw new RuntimeException(e); - } - } - - @Override - public void remove() { - throw new IllegalStateException( - "TableIterable is a read-only iterable and remove() is unsupported"); - } - }; - } - - private final IMetaStoreClient msc; - private final String dbname; - private final List<String> tableNames; - private final int batch_size; - - /** - * Primary constructor that fetches all tables in a given msc, given a Hive - * object,a db name and a table name list - */ - public TableIterable(IMetaStoreClient msc, String dbname, List<String> tableNames, int batch_size) - throws TException { - this.msc = msc; - this.dbname = dbname; - this.tableNames = tableNames; - this.batch_size = batch_size; - } - -} http://git-wip-us.apache.org/repos/asf/hive/blob/3e023546/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestTableIterable.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestTableIterable.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestTableIterable.java deleted file mode 100644 index 6637d15..0000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestTableIterable.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.metadata; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; - -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.junit.Test; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.UnknownDBException; -import org.apache.thrift.TException; - -import static org.junit.Assert.*; -import static org.mockito.Mockito.*; -/** - * Unit tests for TableIterable - */ -public class TestTableIterable { - - @Test - public void testNumReturned() throws MetaException, InvalidOperationException, UnknownDBException, TException { - HiveMetaStoreClient msc = mock(HiveMetaStoreClient.class); - - - // create a mocked metastore client that returns 3 table objects every time it is called - // will use same size for TableIterable batch fetch size - List<Table> threeTables = Arrays.asList(new Table(), new Table(), new Table()); - when(msc.getTableObjectsByName(anyString(), anyListOf(String.class))).thenReturn(threeTables); - - List<String> tableNames = Arrays.asList("a", "b", "c", "d", "e", "f"); - TableIterable tIterable = new TableIterable(msc, "dummy", tableNames, threeTables.size()); - tIterable.iterator(); - - Iterator<Table> tIter = tIterable.iterator(); - int size = 0; - while(tIter.hasNext()) { - size++; - tIter.next(); - } - assertEquals("Number of table objects returned", size, tableNames.size()); - - verify(msc).getTableObjectsByName("dummy", Arrays.asList("a","b","c")); - verify(msc).getTableObjectsByName("dummy", Arrays.asList("d","e","f")); - - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/3e023546/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java ---------------------------------------------------------------------- diff --git a/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java index 838dd89..6bbdce5 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java @@ -35,7 +35,7 @@ import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest; import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.metadata.TableIterable; +import org.apache.hadoop.hive.metastore.TableIterable; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; http://git-wip-us.apache.org/repos/asf/hive/blob/3e023546/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index e6f7333..47f819b 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1535,6 +1535,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { if (tables != null && !tables.isEmpty()) { for (Table table : tables) { + // If the table is not external and it might not be in a subdirectory of the database // add it's locations to the list of paths to delete Path tablePath = null; http://git-wip-us.apache.org/repos/asf/hive/blob/3e023546/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index acdb73b..92e2805 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -1039,6 +1039,8 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { return; } + String dbNameWithCatalog = prependCatalogToDbName(catalogName, dbName, conf); + if (cascade) { // Note that this logic may drop some of the tables of the database // even if the drop database fail for any reason @@ -1048,18 +1050,115 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable { // First we delete the materialized views dropTable(dbName, table, deleteData, true); } - List<String> tableList = getAllTables(dbName); - for (String table : tableList) { - // Now we delete the rest of tables - try { - // Subclasses can override this step (for example, for temporary tables) - dropTable(dbName, table, deleteData, true); - } catch (UnsupportedOperationException e) { - // Ignore Index tables, those will be dropped with parent tables + + /** + * When dropping db cascade, client side hooks have to be called at each table removal. + * If {@link org.apache.hadoop.hive.metastore.conf.MetastoreConf#ConfVars.BATCH_RETRIEVE_MAX + * BATCH_RETRIEVE_MAX} is less than the number of tables in the DB, we'll have to call the + * hooks one by one each alongside with a + * {@link #dropTable(String, String, boolean, boolean, EnvironmentContext) dropTable} call to + * ensure transactionality. + */ + List<String> tableNameList = getAllTables(dbName); + int tableCount = tableNameList.size(); + int maxBatchSize = MetastoreConf.getIntVar(conf, ConfVars.BATCH_RETRIEVE_MAX); + LOG.debug("Selecting dropDatabase method for " + dbName + " (" + tableCount + " tables), " + + ConfVars.BATCH_RETRIEVE_MAX.getVarname() + "=" + maxBatchSize); + + if (tableCount > maxBatchSize) { + LOG.debug("Dropping database in a per table batch manner."); + dropDatabaseCascadePerTable(catalogName, dbName, tableNameList, deleteData, maxBatchSize); + } else { + LOG.debug("Dropping database in a per DB manner."); + dropDatabaseCascadePerDb(catalogName, dbName, tableNameList, deleteData); + } + + } else { + client.drop_database(dbNameWithCatalog, deleteData, cascade); + } + } + + /** + * Handles dropDatabase by invoking drop_table in HMS for each table. + * Useful when table list in DB is too large to fit in memory. It will retrieve tables in + * chunks and for each table with a drop_table hook it will invoke drop_table on both HMS and + * the hook. This is a timely operation so hookless tables are skipped and will be dropped on + * server side when the client invokes drop_database. + * Note that this is 'less transactional' than dropDatabaseCascadePerDb since we're dropping + * table level objects, so the overall outcome of this method might result in a halfly dropped DB. + * @param catName + * @param dbName + * @param tableList + * @param deleteData + * @param maxBatchSize + * @throws TException + */ + private void dropDatabaseCascadePerTable(String catName, String dbName, List<String> tableList, + boolean deleteData, int maxBatchSize) throws TException { + String dbNameWithCatalog = prependCatalogToDbName(catName, dbName, conf); + for (Table table : new TableIterable(this, catName, dbName, tableList, maxBatchSize)) { + boolean success = false; + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + try { + hook.preDropTable(table); + client.drop_table_with_environment_context(dbNameWithCatalog, table.getTableName(), deleteData, null); + hook.commitDropTable(table, deleteData); + success = true; + } finally { + if (!success) { + hook.rollbackDropTable(table); + } + } + } + client.drop_database(dbNameWithCatalog, deleteData, true); + } + + /** + * Handles dropDatabase by invoking drop_database in HMS. + * Useful when table list in DB can fit in memory, it will retrieve all tables at once and + * call drop_database once. Also handles drop_table hooks. + * @param catName + * @param dbName + * @param tableList + * @param deleteData + * @throws TException + */ + private void dropDatabaseCascadePerDb(String catName, String dbName, List<String> tableList, + boolean deleteData) throws TException { + String dbNameWithCatalog = prependCatalogToDbName(catName, dbName, conf); + List<Table> tables = getTableObjectsByName(catName, dbName, tableList); + boolean success = false; + try { + for (Table table : tables) { + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + hook.preDropTable(table); + } + client.drop_database(dbNameWithCatalog, deleteData, true); + for (Table table : tables) { + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + hook.commitDropTable(table, deleteData); + } + success = true; + } finally { + if (!success) { + for (Table table : tables) { + HiveMetaHook hook = getHook(table); + if (hook == null) { + continue; + } + hook.rollbackDropTable(table); } } } - client.drop_database(prependCatalogToDbName(catalogName, dbName, conf), deleteData, cascade); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/3e023546/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/TableIterable.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/TableIterable.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/TableIterable.java new file mode 100644 index 0000000..1a17fe3 --- /dev/null +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/TableIterable.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; + +/** + * Use this to get Table objects for a table list. It provides an iterator to + * on the resulting Table objects. It batches the calls to + * IMetaStoreClient.getTableObjectsByName to avoid OOM issues in HS2 (with + * embedded metastore) or MetaStore server (if HS2 is using remote metastore). + * + */ +public class TableIterable implements Iterable<Table> { + + @Override + public Iterator<Table> iterator() { + return new Iterator<Table>() { + + private final Iterator<String> tableNamesIter = tableNames.iterator(); + private Iterator<org.apache.hadoop.hive.metastore.api.Table> batchIter = null; + + @Override + public boolean hasNext() { + return ((batchIter != null) && batchIter.hasNext()) || tableNamesIter.hasNext(); + } + + @Override + public Table next() { + if ((batchIter == null) || !batchIter.hasNext()) { + getNextBatch(); + } + return batchIter.next(); + } + + private void getNextBatch() { + // get next batch of table names in this list + List<String> nameBatch = new ArrayList<String>(); + int batchCounter = 0; + while (batchCounter < batchSize && tableNamesIter.hasNext()) { + nameBatch.add(tableNamesIter.next()); + batchCounter++; + } + // get the Table objects for this batch of table names and get iterator + // on it + + try { + if (catName != null) { + batchIter = msc.getTableObjectsByName(catName, dbname, nameBatch).iterator(); + } else { + batchIter = msc.getTableObjectsByName(dbname, nameBatch).iterator(); + } + } catch (TException e) { + throw new RuntimeException(e); + } + + } + + @Override + public void remove() { + throw new IllegalStateException( + "TableIterable is a read-only iterable and remove() is unsupported"); + } + }; + } + + private final IMetaStoreClient msc; + private final String dbname; + private final List<String> tableNames; + private final int batchSize; + private final String catName; + + /** + * Primary constructor that fetches all tables in a given msc, given a Hive + * object,a db name and a table name list. + */ + public TableIterable(IMetaStoreClient msc, String dbname, List<String> tableNames, int batchSize) + throws TException { + this.msc = msc; + this.catName = null; + this.dbname = dbname; + this.tableNames = tableNames; + this.batchSize = batchSize; + } + + public TableIterable(IMetaStoreClient msc, String catName, String dbname, List<String> + tableNames, int batchSize) throws TException { + this.msc = msc; + this.catName = catName; + this.dbname = dbname; + this.tableNames = tableNames; + this.batchSize = batchSize; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/3e023546/standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestTableIterable.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestTableIterable.java b/standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestTableIterable.java new file mode 100644 index 0000000..f0d4427 --- /dev/null +++ b/standalone-metastore/metastore-common/src/test/java/org/apache/hadoop/hive/metastore/TestTableIterable.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.UnknownDBException; +import org.apache.thrift.TException; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.anyListOf; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * Unit tests for TableIterable. + */ +@Category(MetastoreUnitTest.class) +public class TestTableIterable { + + @Test + public void testNumReturned() throws MetaException, InvalidOperationException, + UnknownDBException, TException { + HiveMetaStoreClient msc = mock(HiveMetaStoreClient.class); + + + // create a mocked metastore client that returns 3 table objects every time it is called + // will use same size for TableIterable batch fetch size + List<Table> threeTables = Arrays.asList(new Table(), new Table(), new Table()); + when(msc.getTableObjectsByName(anyString(), anyListOf(String.class))).thenReturn(threeTables); + + List<String> tableNames = Arrays.asList("a", "b", "c", "d", "e", "f"); + TableIterable tIterable = new TableIterable(msc, "dummy", tableNames, + threeTables.size()); + tIterable.iterator(); + + Iterator<Table> tIter = tIterable.iterator(); + int size = 0; + while(tIter.hasNext()) { + size++; + tIter.next(); + } + assertEquals("Number of table objects returned", size, tableNames.size()); + + verify(msc).getTableObjectsByName("dummy", Arrays.asList("a", "b", "c")); + verify(msc).getTableObjectsByName("dummy", Arrays.asList("d", "e", "f")); + + } +}