This is an automated email from the ASF dual-hosted git repository. tarmstrong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 69f7c8e1cd31519febf13637b4c9054ec9ab0423 Author: stiga-huang <huangquanl...@gmail.com> AuthorDate: Sat Aug 24 00:53:36 2019 -0700 IMPALA-8797: Support database and table blacklist Add catalogd and coordinator startup options for database and table blacklist. Blacklisted dbs/tables will be skipped in loading. Users won't see them when getting database/table list, e.g. in SHOW DATABASES/TABLES. Dropping/creating blacklisted databases/tables/views are not allowed too. Implementation: Catalogd and coordinators parses the --blacklisted_dbs and --blacklisted_tables options in startup. Blacklist checks are added in catalogd when loading the metadata and in coordinators when analysing DDL requests for create/drop. Catalogd will also check blacklist when executing DDL requests from coordinators in case their blacklists are inconsistent. Motivation: By default, it's used to blacklist "sys" and "information_schema" databases from Hive. Admin can use them to specify any databases/tables that are not suitable for Impala to query. Tests: - Add java unit tests for blacklist related functions - Add a custom cluster test: test_blacklisted_dbs_and_tables.py - Ran CORE tests Change-Id: I02dbb07f8e08793b57b2a88d09b30fd32cff26dc Reviewed-on: http://gerrit.cloudera.org:8080/14134 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- be/src/common/global-flags.cc | 9 + be/src/util/backend-gflag-util.cc | 4 + common/thrift/BackendGflags.thrift | 4 + .../analysis/AlterTableOrViewRenameStmt.java | 2 +- .../org/apache/impala/analysis/CreateDbStmt.java | 2 + .../impala/analysis/CreateTableLikeStmt.java | 2 +- .../org/apache/impala/analysis/CreateViewStmt.java | 2 +- .../java/org/apache/impala/analysis/TableName.java | 36 +++- .../impala/catalog/CatalogServiceCatalog.java | 43 +++++ .../org/apache/impala/service/BackendConfig.java | 8 + .../apache/impala/service/CatalogOpExecutor.java | 64 ++++++-- .../apache/impala/util/CatalogBlacklistUtils.java | 109 +++++++++++++ .../impala/util/CatalogBlacklistUtilsTest.java | 95 +++++++++++ .../test_blacklisted_dbs_and_tables.py | 181 +++++++++++++++++++++ 14 files changed, 542 insertions(+), 19 deletions(-) diff --git a/be/src/common/global-flags.cc b/be/src/common/global-flags.cc index 4185b98..1d8d225 100644 --- a/be/src/common/global-flags.cc +++ b/be/src/common/global-flags.cc @@ -249,6 +249,15 @@ DEFINE_int32(hms_event_polling_interval_s, 0, "feature and not recommended to be deployed on production systems until it is " "made generally available."); +DEFINE_string(blacklisted_dbs, "sys,information_schema", + "Comma separated list for blacklisted databases. Configure which databases to be " + "skipped for loading (in startup and global INVALIDATE METADATA). Users can't access," + " create, or drop databases which are blacklisted."); +DEFINE_string(blacklisted_tables, "", + "Comma separated full names (in format: <db>.<table>) of blacklisted tables. " + "Configure which tables to be skipped for loading (in startup and reseting metadata " + "of the table). Users can't access, create, or drop tables which are blacklisted"); + DEFINE_double_hidden(invalidate_tables_gc_old_gen_full_threshold, 0.6, "The threshold " "above which CatalogdTableInvalidator would consider the old generation to be almost " "full and trigger an invalidation on recently unused tables"); diff --git a/be/src/util/backend-gflag-util.cc b/be/src/util/backend-gflag-util.cc index 5bb7d20..ab77cd0 100644 --- a/be/src/util/backend-gflag-util.cc +++ b/be/src/util/backend-gflag-util.cc @@ -77,6 +77,8 @@ DECLARE_string(query_event_hook_classes); DECLARE_int32(query_event_hook_nthreads); DECLARE_bool(is_executor); DECLARE_bool(use_dedicated_coordinator_estimates); +DECLARE_string(blacklisted_dbs); +DECLARE_string(blacklisted_tables); namespace impala { @@ -157,6 +159,8 @@ Status GetThriftBackendGflags(JNIEnv* jni_env, jbyteArray* cfg_bytes) { cfg.__set_is_executor(FLAGS_is_executor); cfg.__set_use_dedicated_coordinator_estimates( FLAGS_use_dedicated_coordinator_estimates); + cfg.__set_blacklisted_dbs(FLAGS_blacklisted_dbs); + cfg.__set_blacklisted_tables(FLAGS_blacklisted_tables); RETURN_IF_ERROR(SerializeThriftMsg(jni_env, &cfg, cfg_bytes)); return Status::OK(); } diff --git a/common/thrift/BackendGflags.thrift b/common/thrift/BackendGflags.thrift index 9c5a075..08c072c 100644 --- a/common/thrift/BackendGflags.thrift +++ b/common/thrift/BackendGflags.thrift @@ -135,4 +135,8 @@ struct TBackendGflags { 55: required bool is_executor 56: required bool use_dedicated_coordinator_estimates + + 57: required string blacklisted_dbs + + 58: required string blacklisted_tables } diff --git a/fe/src/main/java/org/apache/impala/analysis/AlterTableOrViewRenameStmt.java b/fe/src/main/java/org/apache/impala/analysis/AlterTableOrViewRenameStmt.java index 1ba4ae5..ba0c90a 100644 --- a/fe/src/main/java/org/apache/impala/analysis/AlterTableOrViewRenameStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/AlterTableOrViewRenameStmt.java @@ -69,7 +69,7 @@ public class AlterTableOrViewRenameStmt extends AlterTableStmt { @Override public void analyze(Analyzer analyzer) throws AnalysisException { - newTableName_.analyze(); + analyzer.getFqTableName(newTableName_).analyze(); table_ = analyzer.getTable(tableName_, Privilege.ALL); if (table_ instanceof FeView && renameTable_) { throw new AnalysisException(String.format( diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateDbStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateDbStmt.java index c0e5cdf..aa43ade 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateDbStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateDbStmt.java @@ -24,6 +24,7 @@ import org.apache.impala.catalog.FeDb; import org.apache.impala.common.AnalysisException; import org.apache.impala.compat.MetastoreShim; import org.apache.impala.thrift.TCreateDbParams; +import org.apache.impala.util.CatalogBlacklistUtils; /** * Represents a CREATE DATABASE statement @@ -91,6 +92,7 @@ public class CreateDbStmt extends StatementBase { if (!MetastoreShim.validateName(dbName_)) { throw new AnalysisException("Invalid database name: " + dbName_); } + CatalogBlacklistUtils.verifyDbName(dbName_); // Note: It is possible that a database with the same name was created external to // this Impala instance. If that happens, the caller will not get an diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeStmt.java index 4a53e57..888b550 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableLikeStmt.java @@ -173,7 +173,7 @@ public class CreateTableLikeStmt extends StatementBase { "not supported."); } srcDbName_ = srcTable.getDb().getName(); - tableName_.analyze(); + analyzer.getFqTableName(tableName_).analyze(); dbName_ = analyzer.getTargetDbName(tableName_); owner_ = analyzer.getUserShortName(); // Set the servername here if authorization is enabled because analyzer_ is not diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java index 6c38da4..a387022 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java @@ -41,7 +41,7 @@ public class CreateViewStmt extends CreateOrAlterViewStmtBase { public void analyze(Analyzer analyzer) throws AnalysisException { Preconditions.checkState(tableName_ != null && !tableName_.isEmpty()); - tableName_.analyze(); + analyzer.getFqTableName(tableName_).analyze(); // Use a child analyzer to let views have complex-typed columns. Analyzer viewAnalyzerr = new Analyzer(analyzer); // Enforce Hive column labels for view compatibility. diff --git a/fe/src/main/java/org/apache/impala/analysis/TableName.java b/fe/src/main/java/org/apache/impala/analysis/TableName.java index 4ebef4c..ae6426e 100644 --- a/fe/src/main/java/org/apache/impala/analysis/TableName.java +++ b/fe/src/main/java/org/apache/impala/analysis/TableName.java @@ -19,12 +19,15 @@ package org.apache.impala.analysis; import java.util.List; +import org.apache.impala.catalog.Catalog; import org.apache.impala.common.AnalysisException; import org.apache.impala.compat.MetastoreShim; import org.apache.impala.thrift.TTableName; import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; import com.google.common.collect.Lists; +import org.apache.impala.util.CatalogBlacklistUtils; /** * Represents a table/view name that optionally includes its database (a fully qualified @@ -45,23 +48,44 @@ public class TableName { this.tbl_ = tbl; } + /** + * Parse the given full name (in format <db>.<tbl>) and return a TableName object. + * Return null for any failures. Note that we keep table names in lower case so the + * string will be converted to lower case first. + */ + public static TableName parse(String fullName) { + // Avoid "db1." and ".tbl1" being treated as the same. We resolve ".tbl1" as + // "default.tbl1". But we reject "db1." since it only gives the database name. + if (fullName == null || fullName.trim().endsWith(".")) return null; + // TODO: upgrade Guava to 15+ to use splitToList instead + List<String> parts = Lists.newArrayList(Splitter.on('.').trimResults() + .omitEmptyStrings().split(fullName.toLowerCase())); + if (parts.size() == 1) { + return new TableName(Catalog.DEFAULT_DB, parts.get(0)); + } + if (parts.size() == 2) { + return new TableName(parts.get(0), parts.get(1)); + } + return null; + } + public String getDb() { return db_; } public String getTbl() { return tbl_; } public boolean isEmpty() { return tbl_.isEmpty(); } /** - * Checks whether the db and table name meet the Metastore's requirements. + * Checks whether the db and table name meet the Metastore's requirements. and not in + * our blacklist. 'db_' is assumed to be resolved. */ public void analyze() throws AnalysisException { - if (db_ != null) { - if (!MetastoreShim.validateName(db_)) { - throw new AnalysisException("Invalid database name: " + db_); - } + Preconditions.checkNotNull(isFullyQualified()); + if (!MetastoreShim.validateName(db_)) { + throw new AnalysisException("Invalid database name: " + db_); } - Preconditions.checkNotNull(tbl_); if (!MetastoreShim.validateName(tbl_)) { throw new AnalysisException("Invalid table/view name: " + tbl_); } + CatalogBlacklistUtils.verifyTableName(this); } /** diff --git a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java index b881b60..1d625f9 100644 --- a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java +++ b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java @@ -84,6 +84,7 @@ import org.apache.impala.thrift.TTableUsage; import org.apache.impala.thrift.TTableUsageMetrics; import org.apache.impala.thrift.TUniqueId; import org.apache.impala.thrift.TUpdateTableUsageRequest; +import org.apache.impala.util.CatalogBlacklistUtils; import org.apache.impala.util.FunctionUtils; import org.apache.impala.util.PatternMatcher; import org.apache.impala.util.TUniqueIdUtil; @@ -266,6 +267,11 @@ public class CatalogServiceCatalog extends Catalog { private AuthorizationManager authzManager_; + // Databases that will be skipped in loading. + private final Set<String> blacklistedDbs_; + // Tables that will be skipped in loading. + private final Set<TableName> blacklistedTables_; + /** * Initialize the CatalogServiceCatalog using a given MetastoreClientPool impl. * @@ -279,6 +285,10 @@ public class CatalogServiceCatalog extends Catalog { MetaStoreClientPool metaStoreClientPool) throws ImpalaException { super(metaStoreClientPool); + blacklistedDbs_ = CatalogBlacklistUtils.parseBlacklistedDbs( + BackendConfig.INSTANCE.getBlacklistedDbs(), LOG); + blacklistedTables_ = CatalogBlacklistUtils.parseBlacklistedTables( + BackendConfig.INSTANCE.getBlacklistedTables(), LOG); catalogServiceId_ = catalogServiceId; tableLoadingMgr_ = new TableLoadingMgr(this, numLoadingThreads); loadInBackground_ = loadInBackground; @@ -317,6 +327,27 @@ public class CatalogServiceCatalog extends Catalog { initialHmsCnxnTimeoutSec)); } + /** + * Check whether the database is in blacklist + */ + public boolean isBlacklistedDb(String dbName) { + return blacklistedDbs_.contains(dbName); + } + + /** + * Check whether the table is in blacklist + */ + public boolean isBlacklistedTable(TableName table) { + return blacklistedTables_.contains(table); + } + + /** + * Check whether the table is in blacklist + */ + public boolean isBlacklistedTable(String db, String table) { + return isBlacklistedTable(new TableName(db, table)); + } + public void setAuthzManager(AuthorizationManager authzManager) { authzManager_ = Preconditions.checkNotNull(authzManager); } @@ -1380,6 +1411,10 @@ public class CatalogServiceCatalog extends Catalog { List<TTableName> tblsToBackgroundLoad = new ArrayList<>(); for (String tableName: msClient.getHiveClient().getAllTables(dbName)) { + if (isBlacklistedTable(dbName, tableName.toLowerCase())) { + LOG.info("skip blacklisted table: " + dbName + "." + tableName); + continue; + } Table incompleteTbl = IncompleteTable.createUninitializedTable(newDb, tableName); incompleteTbl.setCatalogVersion(incrementAndGetCatalogVersion()); newDb.addTable(incompleteTbl); @@ -1486,6 +1521,10 @@ public class CatalogServiceCatalog extends Catalog { List<String> allDbs = msClient.getHiveClient().getAllDatabases(); int numComplete = 0; for (String dbName: allDbs) { + if (isBlacklistedDb(dbName)) { + LOG.info("skip blacklisted db: " + dbName); + continue; + } String annotation = String.format("invalidating metadata - %s/%s dbs complete", numComplete++, allDbs.size()); try (ThreadNameAnnotator tna = new ThreadNameAnnotator(annotation)) { @@ -2042,6 +2081,10 @@ public class CatalogServiceCatalog extends Catalog { dbWasAdded.setRef(false); String dbName = tableName.getDb_name(); String tblName = tableName.getTable_name(); + if (isBlacklistedTable(dbName, tblName)) { + LOG.info("Skip invalidating blacklisted table: " + tableName); + return null; + } LOG.info(String.format("Invalidating table metadata: %s.%s", dbName, tblName)); // Stores whether the table exists in the metastore. Can have three states: diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java b/fe/src/main/java/org/apache/impala/service/BackendConfig.java index e299c7f..534dc70 100644 --- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java +++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java @@ -169,6 +169,14 @@ public class BackendConfig { return !backendCfg_.is_executor && backendCfg_.use_dedicated_coordinator_estimates; } + public String getBlacklistedDbs() { + return backendCfg_.blacklisted_dbs; + } + + public String getBlacklistedTables() { + return backendCfg_.blacklisted_tables; + } + // Inits the auth_to_local configuration in the static KerberosName class. private static void initAuthToLocal() { // If auth_to_local is enabled, we read the configuration hadoop.security.auth_to_local diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java index ed301b0..31e6b5d 100644 --- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java +++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java @@ -86,7 +86,6 @@ import org.apache.impala.catalog.KuduTable; import org.apache.impala.catalog.MetaStoreClientPool.MetaStoreClient; import org.apache.impala.catalog.PartitionNotFoundException; import org.apache.impala.catalog.PartitionStatsUtil; -import org.apache.impala.catalog.PrunablePartition; import org.apache.impala.catalog.RowFormat; import org.apache.impala.catalog.ScalarFunction; import org.apache.impala.catalog.Table; @@ -268,6 +267,12 @@ public class CatalogOpExecutor { // Format string for exceptions returned by Hive Metastore RPCs. private final static String HMS_RPC_ERROR_FORMAT_STR = "Error making '%s' RPC to Hive Metastore: "; + // Error string for inconsistent blacklisted dbs/tables configs between catalogd and + // coordinators. + private final static String BLACKLISTED_DBS_INCONSISTENT_ERR_STR = + "--blacklisted_dbs may be inconsistent between catalogd and coordinators"; + private final static String BLACKLISTED_TABLES_INCONSISTENT_ERR_STR = + "--blacklisted_tables may be inconsistent between catalogd and coordinators"; // Table capabilities property name private static final String CAPABILITIES_KEY = "OBJCAPABILITIES"; @@ -541,6 +546,14 @@ public class CatalogOpExecutor { TableName tableName = TableName.fromThrift(params.getTable_name()); Table tbl = getExistingTable(tableName.getDb(), tableName.getTbl(), "Load for ALTER TABLE"); + if (params.getAlter_type() == TAlterTableType.RENAME_VIEW + || params.getAlter_type() == TAlterTableType.RENAME_TABLE) { + TableName newTableName = TableName.fromThrift( + params.getRename_params().getNew_table_name()); + Preconditions.checkState(!catalog_.isBlacklistedTable(newTableName), + String.format("Can't rename to blacklisted table name: %s. %s", newTableName, + BLACKLISTED_DBS_INCONSISTENT_ERR_STR)); + } tryLock(tbl); // Get a new catalog version to assign to the table being altered. long newCatalogVersion = catalog_.incrementAndGetCatalogVersion(); @@ -1153,6 +1166,9 @@ public class CatalogOpExecutor { String dbName = params.getDb(); Preconditions.checkState(dbName != null && !dbName.isEmpty(), "Null or empty database name passed as argument to Catalog.createDatabase"); + Preconditions.checkState(!catalog_.isBlacklistedDb(dbName), + String.format("Can't create blacklisted database: %s. %s", dbName, + BLACKLISTED_DBS_INCONSISTENT_ERR_STR)); Db existingDb = catalog_.getDb(dbName); if (params.if_not_exists && existingDb != null) { if (LOG.isTraceEnabled()) { @@ -1492,11 +1508,20 @@ public class CatalogOpExecutor { private void dropDatabase(TDropDbParams params, TDdlExecResponse resp) throws ImpalaException { Preconditions.checkNotNull(params); - Preconditions.checkState(params.getDb() != null && !params.getDb().isEmpty(), + String dbName = params.getDb(); + Preconditions.checkState(dbName != null && !dbName.isEmpty(), "Null or empty database name passed as argument to Catalog.dropDatabase"); + Preconditions.checkState(!catalog_.isBlacklistedDb(dbName) || params.if_exists, + String.format("Can't drop blacklisted database: %s. %s", dbName, + BLACKLISTED_DBS_INCONSISTENT_ERR_STR)); + if (catalog_.isBlacklistedDb(dbName)) { + // It's expected to go here if "if_exists" is set to true. + addSummary(resp, "Can't drop blacklisted database: " + dbName); + return; + } - LOG.trace("Dropping database " + params.getDb()); - Db db = catalog_.getDb(params.db); + LOG.trace("Dropping database " + dbName); + Db db = catalog_.getDb(dbName); if (db != null && db.numFunctions() > 0 && !params.cascade) { throw new CatalogException("Database " + db.getName() + " is not empty"); } @@ -1514,7 +1539,7 @@ public class CatalogOpExecutor { // ignoreIfUnknown as false and catch the NoSuchObjectFoundException and // determine if we should throw or not msClient.getHiveClient().dropDatabase( - params.getDb(), /* deleteData */true, /* ignoreIfUnknown */false, + dbName, /* deleteData */true, /* ignoreIfUnknown */false, params.cascade); addSummary(resp, "Database has been dropped."); } catch (TException e) { @@ -1526,7 +1551,7 @@ public class CatalogOpExecutor { String.format(HMS_RPC_ERROR_FORMAT_STR, "dropDatabase"), e); } } - Db removedDb = catalog_.removeDb(params.getDb()); + Db removedDb = catalog_.removeDb(dbName); if (removedDb == null) { // Nothing was removed from the catalogd's cache. @@ -1539,7 +1564,7 @@ public class CatalogOpExecutor { } removedObject = removedDb.toTCatalogObject(); if (authzConfig_.isEnabled()) { - authzManager_.updateDatabaseOwnerPrivilege(params.server_name, db.getName(), + authzManager_.updateDatabaseOwnerPrivilege(params.server_name, dbName, db.getMetaStoreDb().getOwnerName(), db.getMetaStoreDb().getOwnerType(), /* newOwner */ null, /* newOwnerType */ null, resp); } @@ -1615,6 +1640,14 @@ public class CatalogOpExecutor { throws ImpalaException { TableName tableName = TableName.fromThrift(params.getTable_name()); Preconditions.checkState(tableName != null && tableName.isFullyQualified()); + Preconditions.checkState(!catalog_.isBlacklistedTable(tableName) || params.if_exists, + String.format("Can't drop blacklisted table: %s. %s", tableName, + BLACKLISTED_TABLES_INCONSISTENT_ERR_STR)); + if (catalog_.isBlacklistedTable(tableName)) { + // It's expected to go here if "if_exists" is set to true. + addSummary(resp, "Can't drop blacklisted table: " + tableName); + return; + } LOG.trace(String.format("Dropping table/view %s", tableName)); // If the table exists, ensure that it is loaded before we try to operate on it. @@ -1996,6 +2029,9 @@ public class CatalogOpExecutor { Preconditions.checkState(tableName != null && tableName.isFullyQualified()); Preconditions.checkState(params.getColumns() != null, "Null column list given as argument to Catalog.createTable"); + Preconditions.checkState(!catalog_.isBlacklistedTable(tableName), + String.format("Can't create blacklisted table: %s. %s", tableName, + BLACKLISTED_TABLES_INCONSISTENT_ERR_STR)); Table existingTbl = catalog_.getTableNoThrow(tableName.getDb(), tableName.getTbl()); if (params.if_not_exists && existingTbl != null) { @@ -2241,6 +2277,9 @@ public class CatalogOpExecutor { Preconditions.checkState(params.getColumns() != null && params.getColumns().size() > 0, "Null or empty column list given as argument to DdlExecutor.createView"); + Preconditions.checkState(!catalog_.isBlacklistedTable(tableName), + String.format("Can't create view with blacklisted table name: %s. %s", tableName, + BLACKLISTED_TABLES_INCONSISTENT_ERR_STR)); if (params.if_not_exists && catalog_.containsTable(tableName.getDb(), tableName.getTbl())) { LOG.trace(String.format("Skipping view creation because %s already exists and " + @@ -2266,9 +2305,8 @@ public class CatalogOpExecutor { * table's metadata on the next access. * @param syncDdl tells is SYNC_DDL is enabled for this DDL request. */ - private void createTableLike(TCreateTableLikeParams params, TDdlExecResponse response - , boolean syncDdl) - throws ImpalaException { + private void createTableLike(TCreateTableLikeParams params, TDdlExecResponse response, + boolean syncDdl) throws ImpalaException { Preconditions.checkNotNull(params); THdfsFileFormat fileFormat = @@ -2278,6 +2316,9 @@ public class CatalogOpExecutor { TableName srcTblName = TableName.fromThrift(params.getSrc_table_name()); Preconditions.checkState(tblName != null && tblName.isFullyQualified()); Preconditions.checkState(srcTblName != null && srcTblName.isFullyQualified()); + Preconditions.checkState(!catalog_.isBlacklistedTable(tblName), + String.format("Can't create blacklisted table: %s. %s", tblName, + BLACKLISTED_TABLES_INCONSISTENT_ERR_STR)); Table existingTbl = catalog_.getTableNoThrow(tblName.getDb(), tblName.getTbl()); if (params.if_not_exists && existingTbl != null) { @@ -3769,6 +3810,9 @@ public class CatalogOpExecutor { resp.getResult().setCatalog_service_id(JniCatalog.getServiceId()); if (req.isSetDb_name()) { + Preconditions.checkState(!catalog_.isBlacklistedDb(req.getDb_name()), + String.format("Can't refresh functions in blacklisted database: %s. %s", + req.getDb_name(), BLACKLISTED_DBS_INCONSISTENT_ERR_STR)); // This is a "refresh functions" operation. synchronized (metastoreDdlLock_) { try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) { diff --git a/fe/src/main/java/org/apache/impala/util/CatalogBlacklistUtils.java b/fe/src/main/java/org/apache/impala/util/CatalogBlacklistUtils.java new file mode 100644 index 0000000..aedd911 --- /dev/null +++ b/fe/src/main/java/org/apache/impala/util/CatalogBlacklistUtils.java @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.util; + +import org.apache.impala.analysis.TableName; + +import java.util.Set; + +import org.apache.impala.common.AnalysisException; +import org.apache.impala.service.BackendConfig; +import org.slf4j.Logger; + +import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; +import com.google.common.collect.Sets; + +public class CatalogBlacklistUtils { + private final static Set<String> BLACKLISTED_DBS = + CatalogBlacklistUtils.parseBlacklistedDbsFromConfigs(); + private final static Set<TableName> BLACKLISTED_TABLES = + CatalogBlacklistUtils.parseBlacklistedTablesFromConfigs(); + + /** + * Parse blacklisted databases from backend configs. + */ + public static Set<String> parseBlacklistedDbsFromConfigs() { + return parseBlacklistedDbs( + BackendConfig.INSTANCE == null ? "" : BackendConfig.INSTANCE.getBlacklistedDbs(), + null); + } + + /** + * Prase blacklisted tables from backend configs. + */ + public static Set<TableName> parseBlacklistedTablesFromConfigs() { + return parseBlacklistedTables( + BackendConfig.INSTANCE == null ? "" : + BackendConfig.INSTANCE.getBlacklistedTables(), + null); + } + + /** + * Parse blacklisted databases from given configs string. Pass Logger if logging is + * necessary. + */ + public static Set<String> parseBlacklistedDbs(String blacklistedDbsConfig, + Logger logger) { + Preconditions.checkNotNull(blacklistedDbsConfig); + Set<String> blacklistedDbs = Sets.newHashSet(); + for (String db: Splitter.on(',').trimResults().omitEmptyStrings().split( + blacklistedDbsConfig)) { + blacklistedDbs.add(db.toLowerCase()); + if (logger != null) logger.info("Blacklist db: " + db); + } + return blacklistedDbs; + } + + /** + * Parse blacklisted tables from configs string. Pass Logger if logging is necessary. + */ + public static Set<TableName> parseBlacklistedTables(String blacklistedTablesConfig, + Logger logger) { + Preconditions.checkNotNull(blacklistedTablesConfig); + Set<TableName> blacklistedTables = Sets.newHashSet(); + for (String tblName: Splitter.on(',').trimResults().omitEmptyStrings().split( + blacklistedTablesConfig)) { + TableName tbl = TableName.parse(tblName); + if (tbl == null) { + if (logger != null) { + logger.warn(String.format("Illegal blacklisted table name: '%s'", + tblName)); + } + continue; + } + blacklistedTables.add(tbl); + if (logger != null) logger.info("Blacklist table: " + tbl); + } + return blacklistedTables; + } + + public static void verifyDbName(String dbName) throws AnalysisException { + if (BLACKLISTED_DBS.contains(dbName)) { + throw new AnalysisException("Invalid db name: " + dbName + + ". It has been blacklisted using --blacklisted_dbs"); + } + } + + public static void verifyTableName(TableName table) throws AnalysisException { + if (BLACKLISTED_TABLES.contains(table)) { + throw new AnalysisException("Invalid table/view name: " + table + + ". It has been blacklisted using --blacklisted_tables"); + } + } +} diff --git a/fe/src/test/java/org/apache/impala/util/CatalogBlacklistUtilsTest.java b/fe/src/test/java/org/apache/impala/util/CatalogBlacklistUtilsTest.java new file mode 100644 index 0000000..eb1a28d --- /dev/null +++ b/fe/src/test/java/org/apache/impala/util/CatalogBlacklistUtilsTest.java @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.Set; + +import org.apache.impala.analysis.TableName; +import org.apache.impala.catalog.Catalog; +import org.junit.Test; + +public class CatalogBlacklistUtilsTest { + + @Test + public void testParsingBlacklistedDbs() { + Set<String> blacklistedDbs; + + blacklistedDbs = CatalogBlacklistUtils.parseBlacklistedDbs("db1,db2", null); + assertEquals(blacklistedDbs.size(), 2); + assertTrue(blacklistedDbs.contains("db1")); + assertTrue(blacklistedDbs.contains("db2")); + + // Test spaces + blacklistedDbs = CatalogBlacklistUtils.parseBlacklistedDbs(" db1 , db2 ", null); + assertEquals(blacklistedDbs.size(), 2); + assertTrue(blacklistedDbs.contains("db1")); + assertTrue(blacklistedDbs.contains("db2")); + blacklistedDbs = CatalogBlacklistUtils.parseBlacklistedDbs(" ", null); + assertTrue(blacklistedDbs.isEmpty()); + + // Test lower/upper cases + blacklistedDbs = CatalogBlacklistUtils.parseBlacklistedDbs("DB1,Db2", null); + assertEquals(blacklistedDbs.size(), 2); + assertTrue(blacklistedDbs.contains("db1")); + assertTrue(blacklistedDbs.contains("db2")); + + // Test abnormal inputs + blacklistedDbs = CatalogBlacklistUtils.parseBlacklistedDbs("db1,", null); + assertEquals(blacklistedDbs.size(), 1); + assertTrue(blacklistedDbs.contains("db1")); + } + + @Test + public void testParsingBlacklistedTables() { + Set<TableName> blacklistedTables; + + blacklistedTables = CatalogBlacklistUtils.parseBlacklistedTables( + "db3.foo,db3.bar", null); + assertEquals(blacklistedTables.size(), 2); + assertTrue(blacklistedTables.contains(new TableName("db3", "foo"))); + assertTrue(blacklistedTables.contains(new TableName("db3", "bar"))); + + // Test spaces + blacklistedTables = CatalogBlacklistUtils.parseBlacklistedTables( + " db3 . foo , db3 . bar ", null); + assertEquals(blacklistedTables.size(), 2); + assertTrue(blacklistedTables.contains(new TableName("db3", "foo"))); + assertTrue(blacklistedTables.contains(new TableName("db3", "bar"))); + + // Test defaults + blacklistedTables = CatalogBlacklistUtils.parseBlacklistedTables("foo", null); + assertEquals(blacklistedTables.size(), 1); + assertTrue(blacklistedTables.contains(new TableName(Catalog.DEFAULT_DB, "foo"))); + + // Test lower/upper cases + blacklistedTables = CatalogBlacklistUtils.parseBlacklistedTables( + "DB3.Foo,db3.Bar", null); + assertEquals(blacklistedTables.size(), 2); + assertTrue(blacklistedTables.contains(new TableName("db3", "foo"))); + assertTrue(blacklistedTables.contains(new TableName("db3", "bar"))); + + // Test abnormal inputs + blacklistedTables = CatalogBlacklistUtils.parseBlacklistedTables("db3.,.bar,,", null); + assertEquals(blacklistedTables.size(), 1); + assertTrue(blacklistedTables.contains(new TableName(Catalog.DEFAULT_DB, "bar"))); + } +} diff --git a/tests/custom_cluster/test_blacklisted_dbs_and_tables.py b/tests/custom_cluster/test_blacklisted_dbs_and_tables.py new file mode 100644 index 0000000..bdcb33d --- /dev/null +++ b/tests/custom_cluster/test_blacklisted_dbs_and_tables.py @@ -0,0 +1,181 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from tests.common.custom_cluster_test_suite import CustomClusterTestSuite + + +class TestBlacklistedDbsAndTables(CustomClusterTestSuite): + """Test for db and table blacklist.""" + + @classmethod + def get_workload(self): + return 'functional-query' + + def __expect_error_in_result(self, stmt, expected_err): + # Drop db/table/view statements won't fail if they contains IF EXISTS. Instead, + # the error message is returned as results. + result = self.execute_query(stmt) + assert expected_err in result.get_data() + + def __expect_error_in_query(self, stmt, expected_err): + err = self.execute_query_expect_failure(self.client, stmt) + assert expected_err in str(err) + + def __check_db_not_visible(self, db): + result = self.hive_client.get_database(db) + assert result is not None + self.__expect_error_in_query("describe database %s" % db, + "Database does not exist: %s" % db) + # Check blacklisted dbs are not shown when querying the database list + result = self.execute_query("show databases") + assert db not in result.data + + def __check_table_not_visible(self, db, table): + result = self.hive_client.get_table(db, table) + assert result is not None + self.__expect_error_in_query("describe %s.%s" % (db, table), + "Could not resolve path: '%s.%s'" % (db, table)) + self.__expect_error_in_query("invalidate metadata %s.%s" % (db, table), + "Table not found: %s.%s" % (db, table)) + # Check blacklisted tables are not shown when querying the table list + result = self.execute_query("show tables in %s" % db) + assert table not in result.data + + def __check_create_drop_table(self, use_fully_qualified_table_name=True): + tbl_prefix = "functional." if use_fully_qualified_table_name else "" + # Check creating table/view with blacklisted table name + self.__expect_error_in_query( + "create table {0}alltypes (i int)".format(tbl_prefix), + "Invalid table/view name: functional.alltypes") + self.__expect_error_in_query( + "create table {0}alltypes as select 1".format(tbl_prefix), + "Invalid table/view name: functional.alltypes") + self.__expect_error_in_query( + "create table {0}alltypes like functional.alltypestiny".format(tbl_prefix), + "Invalid table/view name: functional.alltypes") + self.__expect_error_in_query( + "create view {0}alltypes as select 1".format(tbl_prefix), + "Invalid table/view name: functional.alltypes") + + # Check dropping table/view with blacklisted table name + self.__expect_error_in_result( + "drop table if exists {0}alltypes".format(tbl_prefix), + "Can't drop blacklisted table: functional.alltypes") + self.__expect_error_in_result( + "drop view if exists {0}alltypes".format(tbl_prefix), + "Can't drop blacklisted table: functional.alltypes") + + # Check renaming table/view to blacklisted table name + self.__expect_error_in_query( + "alter table functional.alltypestiny rename to functional.alltypes", + "Invalid table/view name: functional.alltypes") + self.__expect_error_in_query( + "alter view functional.alltypes_view rename to functional.alltypes", + "Invalid table/view name: functional.alltypes") + + @pytest.mark.execute_serially + @CustomClusterTestSuite.with_args( + impalad_args="--blacklisted_dbs=functional_rc,functional_seq " + "--blacklisted_tables=functional.alltypes,functional_parquet.alltypes", + catalogd_args="--blacklisted_dbs=functional_rc,functional_seq " + "--blacklisted_tables=functional.alltypes,functional_parquet.alltypes") + def test_blacklisted_dbs_and_tables(self, vector): + self.__check_db_not_visible("functional_rc") + self.__check_db_not_visible("functional_seq") + self.__check_table_not_visible("functional", "alltypes") + self.__check_table_not_visible("functional_parquet", "alltypes") + + # Check blacklisted dbs/tables not appear after INVALIDATE METADATA + self.execute_query("INVALIDATE METADATA") + self.__check_db_not_visible("functional_rc") + self.__check_db_not_visible("functional_seq") + self.__check_table_not_visible("functional", "alltypes") + self.__check_table_not_visible("functional_parquet", "alltypes") + + # Check creating/dropping blacklisted database + self.__expect_error_in_query( + "create database functional_rc", + "Invalid db name: functional_rc") + self.__expect_error_in_query( + "create database if not exists functional_rc", + "Invalid db name: functional_rc") + self.__expect_error_in_query( + "drop database functional_rc", + "Database does not exist: functional_rc") + self.__expect_error_in_result( + "drop database if exists functional_rc", + "Can't drop blacklisted database: functional_rc") + + self.__check_create_drop_table(use_fully_qualified_table_name=True) + + @pytest.mark.execute_serially + @CustomClusterTestSuite.with_args( + impalad_args="--blacklisted_tables=alltypes_def,functional.alltypes", + catalogd_args="--blacklisted_tables=alltypes_def,functional.alltypes") + def test_resolving_default_database(self, vector): + # Check that "alltypes_def" is resolved as "default.alltypes_def" + table = self.hive_client.get_table("functional", "alltypes") + table.dbName = "default" + table.tableName = "alltypes_def" + self.hive_client.create_table(table) + self.__check_table_not_visible("default", "alltypes_def") + self.hive_client.drop_table("default", "alltypes_def", True) + + # Check non fully qualified table names are recognized correctly + self.change_database(self.client, db_name="functional") + self.__check_create_drop_table(use_fully_qualified_table_name=False) + + @pytest.mark.execute_serially + @CustomClusterTestSuite.with_args( + catalogd_args="--blacklisted_dbs=functional_rc " + "--blacklisted_tables=functional.alltypes", + impalad_args="--blacklisted_dbs=functional_seq " + "--blacklisted_tables=functional.alltypestiny") + def test_inconsistent_blacklist(self, vector): + """Test the error handling when blacklists are accidentally set differently between + coordinators and the catalogd""" + self.__expect_error_in_query( + "create database functional_rc", + "Can't create blacklisted database: functional_rc") + self.__expect_error_in_query( + "refresh functions functional_rc", + "Can't refresh functions in blacklisted database: functional_rc") + self.__expect_error_in_result( + "drop database if exists functional_rc", + "Can't drop blacklisted database: functional_rc") + self.__expect_error_in_query( + "create table functional.alltypes (i int)", + "Can't create blacklisted table: functional.alltypes") + self.__expect_error_in_query( + "create table if not exists functional.alltypes (i int)", + "Can't create blacklisted table: functional.alltypes") + self.__expect_error_in_query( + "create table functional.alltypes as select 1", + "Can't create blacklisted table: functional.alltypes") + self.__expect_error_in_query( + "create table functional.alltypes like functional.alltypestiny", + "Can't create blacklisted table: functional.alltypes") + self.__expect_error_in_query( + "create view functional.alltypes as select 1", + "Can't create view with blacklisted table name: functional.alltypes") + self.__expect_error_in_query( + "alter table functional.alltypesagg rename to functional.alltypes", + "Can't rename to blacklisted table name: functional.alltypes") + self.__expect_error_in_query( + "alter view functional.alltypes_view rename to functional.alltypes", + "Can't rename to blacklisted table name: functional.alltypes")