This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 3dac0135fba0717dd977043e7ecc6b52bf55189f Author: stiga-huang <[email protected]> AuthorDate: Tue Jan 27 22:24:02 2026 +0800 IMPALA-14703: Improves finding column masking policies of a table When Ranger authorization is enabled, if a user wants to update masked data, it should be blocked. This is done by checking if there are any column masking or row filtering policies on the table enabled for the user. Currently we iterate all the columns of the table and check if a masking policy exists on the column. This is inefficent, especially for wide tables. It also requires metadata of the table is loaded to get the column list, which introduces a performance regression for INVALIDATE and REFRESH statements that previously don't trigger metadata loading. This patch improves the check to make the request on table level with a resource matching scope of SELF_OR_DESCENDANTS. By using this, ranger plugin will return the first matching column masking policy in evalDataMaskPolicies(). As we don't need the column list now, table loading triggered by INVALIDATE and REFRESH statements are also removed. Tests - Ran test_block_metadata_update and data masking tests in test_ranger.py Change-Id: Ic8ab88b7cfd4f7e156c4eead53a2ff3086b1daa6 Reviewed-on: http://gerrit.cloudera.org:8080/23908 Reviewed-by: Csaba Ringhofer <[email protected]> Reviewed-by: Fang-Yu Rao <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../apache/impala/analysis/StmtMetadataLoader.java | 14 +----- .../impala/authorization/AuthorizableTable.java | 13 ------ .../authorization/BaseAuthorizationChecker.java | 24 ---------- .../ranger/RangerAuthorizationChecker.java | 52 +++++++++------------- 4 files changed, 22 insertions(+), 81 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtMetadataLoader.java b/fe/src/main/java/org/apache/impala/analysis/StmtMetadataLoader.java index e13695367..b055fcecd 100644 --- a/fe/src/main/java/org/apache/impala/analysis/StmtMetadataLoader.java +++ b/fe/src/main/java/org/apache/impala/analysis/StmtMetadataLoader.java @@ -485,19 +485,7 @@ public class StmtMetadataLoader { public Set<TableName> collectTableCandidates(StatementBase stmt) { Preconditions.checkNotNull(stmt); List<TableRef> tblRefs = new ArrayList<>(); - // The information about whether table masking is supported is not available to - // ResetMetadataStmt so we collect the TableRef for ResetMetadataStmt whenever - // applicable. Skip this if allow_catalog_cache_op_from_masked_users=true because - // we don't need column info for fetching column-masking policies. - if (stmt instanceof ResetMetadataStmt - && fe_.getAuthzFactory().getAuthorizationConfig().isEnabled() - && fe_.getAuthzFactory().supportsTableMasking() - && !BackendConfig.INSTANCE.allowCatalogCacheOpFromMaskedUsers()) { - TableName tableName = ((ResetMetadataStmt) stmt).getTableName(); - if (tableName != null) tblRefs.add(new TableRef(tableName.toPath(), null)); - } else { - stmt.collectTableRefs(tblRefs); - } + stmt.collectTableRefs(tblRefs); Set<TableName> tableNames = new HashSet<>(); for (TableRef ref: tblRefs) { tableNames.addAll(Path.getCandidateTables(ref.getPath(), sessionDb_)); diff --git a/fe/src/main/java/org/apache/impala/authorization/AuthorizableTable.java b/fe/src/main/java/org/apache/impala/authorization/AuthorizableTable.java index b670438af..3cfc8ad86 100644 --- a/fe/src/main/java/org/apache/impala/authorization/AuthorizableTable.java +++ b/fe/src/main/java/org/apache/impala/authorization/AuthorizableTable.java @@ -22,9 +22,6 @@ import javax.annotation.Nullable; import com.google.common.base.Preconditions; import com.google.common.base.Strings; -import java.util.ArrayList; -import java.util.List; - /** * A class to authorize access to a table. */ @@ -33,7 +30,6 @@ public class AuthorizableTable extends Authorizable { private final String tableName_; @Nullable // Is null if the owner is not set. private final String ownerUser_; - private final List<String> columns_ = new ArrayList<>(); public AuthorizableTable(String dbName, String tableName, @Nullable String ownerUser) { Preconditions.checkArgument(!Strings.isNullOrEmpty(dbName)); @@ -60,13 +56,4 @@ public class AuthorizableTable extends Authorizable { @Override public String getOwnerUser() { return ownerUser_; } - - public void setColumns(List<String> columns) { - columns_.clear(); - columns_.addAll(columns); - } - - public List<String> getColumns() { - return columns_; - } } diff --git a/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java b/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java index aa8123dcc..b2b3980cc 100644 --- a/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java +++ b/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java @@ -216,30 +216,6 @@ public abstract class BaseAuthorizationChecker implements AuthorizationChecker { if (dbName != null && checkSystemDbAccess(catalog, dbName, request.getPrivilege())) { return; } - // Populate column names to check column masking policies in blocking updates. - // No need to do this for REFRESH if allow_catalog_cache_op_from_masked_users=true. - // Note that db.getTable() could be a heavy operation in local catalog mode since it - // triggers metadata loading on the table if it's unloaded in catalogd. Skipping this - // improves the performance of "INVALIDATE METADATA <table>" statements. For REFRESH - // statements, the performance doesn't differ a lot since there are other places that - // use db.getTable() (see IMPALA-12591). - if (config_.isEnabled() && request.getAuthorizable() != null - && request.getAuthorizable().getType() == Type.TABLE - && (request.getPrivilege() != Privilege.REFRESH - || !BackendConfig.INSTANCE.allowCatalogCacheOpFromMaskedUsers())) { - Preconditions.checkNotNull(dbName); - AuthorizableTable authorizableTable = (AuthorizableTable) request.getAuthorizable(); - FeDb db = catalog.getDb(dbName); - if (db != null) { - // 'db', 'table' could be null for an unresolved table ref. 'table' could be - // null for target table of a CTAS statement. Don't need to populate column - // names in such cases since no column masking policies will be checked. - FeTable table = db.getTable(authorizableTable.getTableName()); - if (table != null && !(table instanceof FeIncompleteTable)) { - authorizableTable.setColumns(table.getColumnNames()); - } - } - } checkAccess(authzCtx, analysisResult.getAnalyzer().getUser(), request); } diff --git a/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java b/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java index 4f9bdb18e..f062b6240 100644 --- a/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java +++ b/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java @@ -491,13 +491,18 @@ public class RangerAuthorizationChecker extends BaseAuthorizationChecker { String tableName, String columnName, RangerBufferAuditHandler auditHandler) throws InternalException { Preconditions.checkNotNull(user); - RangerAccessResourceImpl resource = new RangerImpalaResourceBuilder() - .database(dbName) - .table(tableName) - .column(columnName) - .build(); RangerAccessRequestImpl req = new RangerAccessRequestImpl(); - req.setResource(resource); + RangerImpalaResourceBuilder builder = new RangerImpalaResourceBuilder() + .database(dbName) + .table(tableName); + if (columnName != null) { + builder.column(columnName); + } else { + // If no column is given, find any column masking policy on the table. + req.setResourceMatchingScope( + RangerAccessRequest.ResourceMatchingScope.SELF_OR_DESCENDANTS); + } + req.setResource(builder.build()); req.setAccessType(SELECT_ACCESS_TYPE); req.setUser(user.getShortName()); req.setUserGroups(getUserGroups(user)); @@ -714,7 +719,7 @@ public class RangerAuthorizationChecker extends BaseAuthorizationChecker { RangerAccessResult rowFilterResult = plugin_.evalRowFilterPolicies( request, /*resultProcessor*/null); if (rowFilterResult != null && rowFilterResult.isRowFilterEnabled()) { - LOG.trace("Deny {} on {} due to row filtering policy {}", + LOG.info("Deny {} on {} due to row filtering policy {}", privilege, authorizable.getName(), rowFilterResult.getPolicyId()); accessResult.setIsAllowed(false); accessResult.setPolicyId(rowFilterResult.getPolicyId()); @@ -725,30 +730,15 @@ public class RangerAuthorizationChecker extends BaseAuthorizationChecker { } // Check if masking is enabled for any column in the table/view. if (accessResult.getIsAllowed()) { - List<String> columns; - if (authorizable.getType() == Type.TABLE) { - // Check all columns. - columns = ((AuthorizableTable) authorizable).getColumns(); - LOG.trace("Checking mask policies on {} columns of table {}", columns.size(), - authorizable.getFullTableName()); - } else { - columns = Lists.newArrayList(authorizable.getColumnName()); - } - for (String column : columns) { - RangerAccessResult columnMaskResult = evalColumnMask(user, - authorizable.getDbName(), authorizable.getTableName(), column, - /*auditHandler*/null); - if (columnMaskResult != null && columnMaskResult.isMaskEnabled()) { - LOG.trace("Deny {} on {} due to column masking policy {}", - privilege, authorizable.getName(), columnMaskResult.getPolicyId()); - accessResult.setIsAllowed(false); - accessResult.setPolicyId(columnMaskResult.getPolicyId()); - accessResult.setReason("User does not have access to unmasked column values"); - break; - } else { - LOG.trace("No column masking policy found on column {} of {}.", column, - authorizable.getFullTableName()); - } + RangerAccessResult columnMaskResult = evalColumnMask(user, + authorizable.getDbName(), authorizable.getTableName(), /*columnName*/null, + /*auditHandler*/null); + if (columnMaskResult != null && columnMaskResult.isMaskEnabled()) { + LOG.info("Deny {} on {} due to column masking policy {}", + privilege, authorizable.getName(), columnMaskResult.getPolicyId()); + accessResult.setIsAllowed(false); + accessResult.setPolicyId(columnMaskResult.getPolicyId()); + accessResult.setReason("User does not have access to unmasked column values"); } } // Set back the original access type. The request object is still referenced by the
