This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0a077fe9921af7c5bac2f9decba371c5eca55d6a
Author: wzhou-code <[email protected]>
AuthorDate: Thu Mar 21 21:58:51 2024 -0700

    IMPALA-12928: Mask JDBC table property dbcp.password for DESC FORMATTED and 
SHOW CREATE TABLE
    
    'desc formatted' and 'show create table' commands show all of table
    properties in clear text. For external JDBC table, dbcp.password table
    property value should be masked in the output of these two commands.
    
    This patch makes dbcp.password property value been masked in the output
    of 'desc formatted' and 'show create table' commands.
    
    dbcp.password table property could be wrote into Impala and HMS log
    files with JDBC table creation statements. There is generic tool in
    production environment with which user could set up the regular
    expressions to detect and redact sensitive information within SQL
    statement text in log files.
    
    Testing:
     - Added end-to-end test cases.
     - Passed core tests.
    
    Change-Id: I83dc32c8d0fec1cdfdfe06e720561b2ae1adf5df
    Reviewed-on: http://gerrit.cloudera.org:8080/21187
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../java/org/apache/impala/analysis/ToSqlUtils.java     | 11 +++++++++++
 .../java/org/apache/impala/catalog/DataSourceTable.java | 12 ++++++++++++
 .../org/apache/impala/util/HiveMetadataFormatUtils.java | 16 +++++++++++++---
 tests/query_test/test_ext_data_sources.py               | 17 ++++++++++++++++-
 4 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java 
b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
index 66434a184..305905ef4 100755
--- a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java
@@ -23,6 +23,7 @@ import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Set;
 
 import org.antlr.runtime.ANTLRStringStream;
 import org.antlr.runtime.RecognitionException;
@@ -34,6 +35,8 @@ import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.ql.parse.HiveLexer;
 import org.apache.impala.catalog.CatalogException;
 import org.apache.impala.catalog.Column;
+import org.apache.impala.catalog.DataSourceTable;
+import org.apache.impala.catalog.FeDataSourceTable;
 import org.apache.impala.catalog.FeFsTable;
 import org.apache.impala.catalog.FeHBaseTable;
 import org.apache.impala.catalog.FeIcebergTable;
@@ -468,6 +471,14 @@ public class ToSqlUtils {
       } catch (Exception e) {
         throw new CatalogException("Could not get primary key/foreign keys 
sql.", e);
       }
+    } else if (table instanceof FeDataSourceTable) {
+      // Mask sensitive table properties for external JDBC table.
+      Set<String> keysToBeMasked = 
DataSourceTable.getJdbcTblPropertyMaskKeys();
+      for (String key : properties.keySet()) {
+        if (keysToBeMasked.contains(key.toLowerCase())) {
+          properties.put(key, "******");
+        }
+      }
     }
 
     HdfsUri tableLocation = location == null ? null : new HdfsUri(location);
diff --git a/fe/src/main/java/org/apache/impala/catalog/DataSourceTable.java 
b/fe/src/main/java/org/apache/impala/catalog/DataSourceTable.java
index 2bddfacc1..4a5f4a05d 100644
--- a/fe/src/main/java/org/apache/impala/catalog/DataSourceTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/DataSourceTable.java
@@ -17,7 +17,9 @@
 
 package org.apache.impala.catalog;
 
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -370,4 +372,14 @@ public class DataSourceTable extends Table implements 
FeDataSourceTable {
       org.apache.hadoop.hive.metastore.api.Table msTbl) {
     return msTbl.getParameters().containsKey(TBL_PROP_DATA_SRC_NAME);
   }
+
+  /**
+   * Returns a list of keys of external JDBC table properties for which the 
property
+   * values should be masked in the output of "desc formatted" and "show 
create table"
+   * commands.
+   */
+  public static Set<String> getJdbcTblPropertyMaskKeys() {
+    return new HashSet<String>(Arrays.asList(
+        JdbcStorageConfig.DBCP_PASSWORD.getPropertyName()));
+  }
 }
diff --git 
a/fe/src/main/java/org/apache/impala/util/HiveMetadataFormatUtils.java 
b/fe/src/main/java/org/apache/impala/util/HiveMetadataFormatUtils.java
index 61f33a2b2..4a9e2cea4 100644
--- a/fe/src/main/java/org/apache/impala/util/HiveMetadataFormatUtils.java
+++ b/fe/src/main/java/org/apache/impala/util/HiveMetadataFormatUtils.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo;
 import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.impala.catalog.DataSourceTable;
 import org.apache.impala.catalog.IcebergTable;
 import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.compat.MetastoreShim;
@@ -557,7 +558,13 @@ public class HiveMetadataFormatUtils {
 
     if (tbl.getParameters().size() > 0) {
       tableInfo.append("Table Parameters:").append(LINE_DELIM);
-      displayAllParameters(tbl.getParameters(), tableInfo, false, 
isOutputPadded);
+      // Mask sensitive table properties for external JDBC table.
+      Set<String> keysToBeMasked = null;
+      if (DataSourceTable.isDataSourceTable(tbl)) {
+        keysToBeMasked = DataSourceTable.getJdbcTblPropertyMaskKeys();
+      }
+      displayAllParameters(
+          tbl.getParameters(), tableInfo, false, isOutputPadded, 
keysToBeMasked);
     }
   }
 
@@ -573,7 +580,8 @@ public class HiveMetadataFormatUtils {
    * escaped.
    */
   private static void displayAllParameters(Map<String, String> params,
-      StringBuilder tableInfo, boolean escapeUnicode, boolean isOutputPadded) {
+      StringBuilder tableInfo, boolean escapeUnicode, boolean isOutputPadded,
+      Set<String> keysToBeMasked) {
     List<String> keys = new ArrayList<String>(params.keySet());
     Collections.sort(keys);
     for (String key : keys) {
@@ -583,6 +591,8 @@ public class HiveMetadataFormatUtils {
         if ("0".equals(value)) {
           continue;
         }
+      } else if (keysToBeMasked != null && 
keysToBeMasked.contains(key.toLowerCase())) {
+        value = "******";
       }
       tableInfo.append(FIELD_DELIM); // Ensures all params are indented.
       formatOutput(key, escapeUnicode ? StringEscapeUtils.escapeJava(value)
@@ -692,7 +702,7 @@ public class HiveMetadataFormatUtils {
     if (storageDesc.getSerdeInfo().getParametersSize() > 0) {
       tableInfo.append("Storage Desc Params:").append(LINE_DELIM);
       displayAllParameters(storageDesc.getSerdeInfo().getParameters(), 
tableInfo, true,
-          false);
+          false, /* keysToBeMasked */ null);
     }
   }
 
diff --git a/tests/query_test/test_ext_data_sources.py 
b/tests/query_test/test_ext_data_sources.py
index ca16578a9..554bb96fa 100644
--- a/tests/query_test/test_ext_data_sources.py
+++ b/tests/query_test/test_ext_data_sources.py
@@ -65,8 +65,23 @@ class TestExtDataSources(ImpalaTestSuite):
   def test_verify_jdbc_table_properties(self, vector):
     jdbc_tbl_name = "functional.alltypes_jdbc_datasource"
     properties = self._get_tbl_properties(jdbc_tbl_name)
-    # Verify data source related table properties
+    # Verify table properties specific for external JDBC table
     assert properties['__IMPALA_DATA_SOURCE_NAME'] == 'impalajdbcdatasource'
+    assert properties['database.type'] == 'POSTGRES'
+    assert properties['jdbc.driver'] == 'org.postgresql.Driver'
+    assert properties['dbcp.username'] == 'hiveuser'
+    assert properties['table'] == 'alltypes'
+    # Verify dbcp.password is masked in the output of DESCRIBE FORMATTED 
command
+    assert properties['dbcp.password'] == '******'
+
+    # Verify dbcp.password is masked in the output of SHOW CREATE TABLE command
+    result = self.client.execute("SHOW CREATE TABLE {0}".format(jdbc_tbl_name))
+    match = False
+    for row in result.data:
+      if "'dbcp.password'='******'" in row:
+        match = True
+        break
+    assert match, result.data
 
   def test_data_source_tables(self, vector, unique_database):
     self.run_test_case('QueryTest/data-source-tables', vector, 
use_db=unique_database)

Reply via email to