HIVE-11407 : JDBC DatabaseMetaData.getTables with large no of tables call leads to HS2 OOM (Sushanth Sowmyan via Thejas Nair)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/46739a6a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/46739a6a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/46739a6a Branch: refs/heads/llap Commit: 46739a6a2de6bc672fda094d5505060a21a22179 Parents: c7e1d34 Author: Thejas Nair <[email protected]> Authored: Tue Aug 4 13:25:02 2015 -0700 Committer: Thejas Nair <[email protected]> Committed: Tue Aug 4 13:25:02 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/metadata/TableIterable.java | 104 +++++++++++++++++++ .../cli/operation/GetColumnsOperation.java | 10 +- .../cli/operation/GetTablesOperation.java | 7 +- 3 files changed, 118 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/46739a6a/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java new file mode 100644 index 0000000..f3af39b --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/TableIterable.java @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.metadata; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.thrift.TException; + +/** + * Use this to get Table objects for a table list. It provides an iterator to + * on the resulting Table objects. It batches the calls to + * IMetaStoreClient.getTableObjectsByName to avoid OOM issues in HS2 (with + * embedded metastore) or MetaStore server (if HS2 is using remote metastore). + * + */ +public class TableIterable implements Iterable<Table> { + + @Override + public Iterator<Table> iterator() { + return new Iterator<Table>() { + + private final Iterator<String> tableNamesIter = tableNames.iterator(); + private Iterator<org.apache.hadoop.hive.metastore.api.Table> batchIter = null; + + @Override + public boolean hasNext() { + return ((batchIter != null) && batchIter.hasNext()) || tableNamesIter.hasNext(); + } + + @Override + public Table next() { + if ((batchIter == null) || !batchIter.hasNext()) { + getNextBatch(); + } + return batchIter.next(); + } + + private void getNextBatch() { + // get next batch of table names in this list + List<String> nameBatch = new ArrayList<String>(); + int batch_counter = 0; + while (batch_counter < batch_size && tableNamesIter.hasNext()) { + nameBatch.add(tableNamesIter.next()); + batch_counter++; + } + // get the Table objects for this batch of table names and get iterator + // on it + try { + try { + batchIter = msc.getTableObjectsByName(dbname, nameBatch).iterator(); + } catch (TException e) { + throw new HiveException(e); + } + } catch (HiveException e) { + throw new RuntimeException(e); + } + } + + @Override + public void remove() { + throw new IllegalStateException( + "TableIterable is a read-only iterable and remove() is unsupported"); + } + }; + } + + private final IMetaStoreClient msc; + private final String dbname; + private final List<String> tableNames; + private final int batch_size; + + /** + * Primary constructor that fetches all tables in a given msc, given a Hive + * object,a db name and a table name list + */ + public TableIterable(IMetaStoreClient msc, String dbname, List<String> tableNames, int batch_size) + throws TException { + this.msc = msc; + this.dbname = dbname; + this.tableNames = tableNames; + this.batch_size = batch_size; + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/46739a6a/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java ---------------------------------------------------------------------- diff --git a/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java index 309f10f..8ecdc2e 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java @@ -30,10 +30,12 @@ import java.util.regex.Pattern; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.metadata.TableIterable; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hive.service.cli.ColumnDescriptor; import org.apache.hive.service.cli.FetchOrientation; import org.apache.hive.service.cli.HiveSQLException; @@ -153,11 +155,15 @@ public class GetColumnsOperation extends MetadataOperation { authorizeMetaGets(HiveOperationType.GET_COLUMNS, privObjs, cmdStr); } + int maxBatchSize = SessionState.get().getConf().getIntVar(ConfVars.METASTORE_BATCH_RETRIEVE_MAX); for (Entry<String, List<String>> dbTabs : db2Tabs.entrySet()) { String dbName = dbTabs.getKey(); List<String> tableNames = dbTabs.getValue(); - for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) { - TableSchema schema = new TableSchema(metastoreClient.getSchema(dbName, table.getTableName())); + + for (Table table : new TableIterable(metastoreClient, dbName, tableNames, maxBatchSize)) { + + TableSchema schema = new TableSchema(metastoreClient.getSchema(dbName, + table.getTableName())); for (ColumnDescriptor column : schema.getColumnDescriptors()) { if (columnPattern != null && !columnPattern.matcher(column.getName()).matches()) { continue; http://git-wip-us.apache.org/repos/asf/hive/blob/46739a6a/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java ---------------------------------------------------------------------- diff --git a/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java index 0e2fdc6..296280f 100644 --- a/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java +++ b/service/src/java/org/apache/hive/service/cli/operation/GetTablesOperation.java @@ -22,11 +22,14 @@ import java.util.ArrayList; import java.util.List; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.metadata.TableIterable; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hive.service.cli.FetchOrientation; import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.OperationState; @@ -88,9 +91,11 @@ public class GetTablesOperation extends MetadataOperation { } String tablePattern = convertIdentifierPattern(tableName, true); + int maxBatchSize = SessionState.get().getConf().getIntVar(ConfVars.METASTORE_BATCH_RETRIEVE_MAX); + for (String dbName : metastoreClient.getDatabases(schemaPattern)) { List<String> tableNames = metastoreClient.getTables(dbName, tablePattern); - for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) { + for (Table table : new TableIterable(metastoreClient, dbName, tableNames, maxBatchSize)) { Object[] rowData = new Object[] { DEFAULT_HIVE_CATALOG, table.getDbName(),
